[PATCH 07 of 14 eac] Implementing <maintananceAgency>, some refacto

Guillaume Vandevelde guillaume.vandevelde at logilab.fr
Tue Jul 2 14:53:56 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1561130652 -7200
#      Fri Jun 21 17:24:12 2019 +0200
# Node ID 56aa0f4aa6eda5018e0d9c1a8190d3ec4fe44293
# Parent  81fab457faebf1851f7d5e7cc0826684f64fba1f
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 56aa0f4aa6ed
Implementing <maintananceAgency>, some refacto.

Start to add the support of the <maintenanceAgency> tag via the `MaintenanceAg` entity. Took time for some refacto:

The condensing of the repeating for loops have been split into a data/logic structure, so you can see what will be handled and after how it will be instead of a repeating pattern.

Tried some python functional capabilities, by defining the `values_from_xpaths` method. Its goal is to automate the:
`if foo is not None and foo.text:
    values['var_foo': foo]`
pattern so you can build a `values` dic with less lines and duplicate less code.

If my use of `partial` is obscure, there is more informations on the phabricator review. And you can always ask me questions.

Differential Revision: https://phab.logilab.fr/D3554

diff -r 81fab457faeb -r 56aa0f4aa6ed cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py	Thu Jun 20 16:00:22 2019 +0200
+++ b/cubicweb_eac/dataimport.py	Fri Jun 21 17:24:12 2019 +0200
@@ -19,7 +19,7 @@
 
 from collections import deque
 import datetime
-from functools import wraps
+from functools import wraps, partial
 import inspect
 import logging
 from uuid import uuid4
@@ -40,8 +40,8 @@
 TYPE_MAPPING['human'] = u'person'
 
 ETYPES_ORDER_HINT = ('AgentKind', 'PhoneNumber', 'PostalAddress', 'AuthorityRecord',
-                     'Convention', 'LanguageDec', 'AgentPlace', 'Mandate', 'LegalStatus',
-                     'History', 'Event', 'Structure', 'AgentFunction', 'Occupation',
+                     'Convention', 'LanguageDec', 'AgentPlace', 'MaintenanceAg', 'Mandate',
+                     'LegalStatus', 'History', 'Event', 'Structure', 'AgentFunction', 'Occupation',
                      'GeneralContext', 'AssociationRelation', 'ChronologicalRelation',
                      'HierarchicalRelation', 'EACResourceRelation', 'ExternalUri',
                      'EACSource', 'Activity')
@@ -64,6 +64,17 @@
         self.tag_parent = tag_parent
 
 
+def safe_append(finder, dct, var_tuple):
+    var_name, var_xpath = var_tuple
+    var = finder(var_xpath)
+    if var is not None and var.text:
+        new_dct = dct.copy()
+        new_dct[var_name] = set([text_type(var.text)])
+        return new_dct
+    else:
+        return dct
+
+
 def external_uri(uri):
     values = [text_type(uri)]
     return ExtEntity('ExternalUri', uri, {'uri': set(values), 'cwuri': set(values)})
@@ -291,6 +302,12 @@
             return trace_extentity(self)(attr)
         return attr
 
+    def values_from_xpaths(self, elem, name_path_tuples, values={}):
+        """build a `values` dict from xpath requests"""
+        finder = partial(self._elem_find, elem)
+        append_func = partial(safe_append, finder)
+        return reduce(append_func, name_path_tuples, values)
+
     def record_visited(self, elem, extentity):
         assert extentity.extid, extentity
         self._visited.setdefault(elem, set([])).add(extentity.extid)
@@ -879,22 +896,18 @@
                 extentity = ExtEntity('EACOtherRecordId', self._gen_extid(), values)
                 self.record_visited(other_record_id, extentity)
                 yield extentity
-        for elem in control.xpath('eac:sources/eac:source',
-                                  namespaces=self.namespaces):
-            for extentity in self.build_source(elem):
-                yield extentity
-        for elem in control.xpath('eac:maintenanceHistory/eac:maintenanceEvent',
-                                  namespaces=self.namespaces):
-            for extentity in self.build_maintenance_event(elem):
-                yield extentity
-        for elem in control.xpath('eac:languageDeclaration',
-                                  namespaces=self.namespaces):
-            for extentity in self.build_language_declaration(elem):
-                yield extentity
-        for elem in control.xpath('eac:conventionDeclaration',
-                                  namespaces=self.namespaces):
-            for extentity in self.build_convention(elem):
-                yield extentity
+        maintenance_agency = self._elem_find(control, 'eac:maintenanceAgency')
+        if maintenance_agency is not None:
+            yield next(self.build_maintenance_agency(maintenance_agency))
+        builders = (('eac:sources/eac:source', self.build_source),
+                    ('eac:maintenanceHistory/eac:maintenanceEvent', self.build_maintenance_event),
+                    ('eac:languageDeclaration', self.build_language_declaration),
+                    ('eac:conventionDeclaration', self.build_convention))
+        for xpath_str, builder in builders:
+            for elem in control.xpath(xpath_str,
+                                      namespaces=self.namespaces):
+                for extentity in builder(elem):
+                    yield extentity
 
     def build_maintenance_event(self, elem):
         """Parse a `maintenanceEvent` tag, yielding a prov:Activity external
@@ -923,6 +936,20 @@
             values['agent'] = set([text_type(agent.text)])
         yield ExtEntity('Activity', self._gen_extid(), values)
 
+    @relate_to_record_through('MaintenanceAg', 'agency_of')
+    @filter_empty
+    @elem_maybe_none
+    def build_maintenance_agency(self, elem):
+        """Build a `MaintenanceAg` external entity"""
+        desc_value = self.parse_tag_description(elem)
+        values = self.values_from_xpaths(
+            elem,
+            (('agency_code', 'eac:agencyCode'),
+             ('agency_name', 'eac:agencyName'),
+             ('other_agency_code', 'eac:otherAgencyCode')),
+            desc_value)
+        yield ExtEntity('MaintenanceAg', self._gen_extid(), values)
+
     @relate_to_record_through('LanguageDec', 'language_declaration_of')
     @filter_empty
     @elem_maybe_none
diff -r 81fab457faeb -r 56aa0f4aa6ed cubicweb_eac/migration/0.9.0_Any.py
--- a/cubicweb_eac/migration/0.9.0_Any.py	Thu Jun 20 16:00:22 2019 +0200
+++ b/cubicweb_eac/migration/0.9.0_Any.py	Fri Jun 21 17:24:12 2019 +0200
@@ -2,3 +2,4 @@
 add_entity_type('Event')
 add_entity_type('Convention')
 add_entity_type('LanguageDec')
+add_entity_type('MaintenanceAg')
diff -r 81fab457faeb -r 56aa0f4aa6ed cubicweb_eac/schema.py
--- a/cubicweb_eac/schema.py	Thu Jun 20 16:00:22 2019 +0200
+++ b/cubicweb_eac/schema.py	Fri Jun 21 17:24:12 2019 +0200
@@ -193,6 +193,14 @@
     composite = 'object'
 
 
+class agency_of(RelationDefinition):
+    subject = 'MaintenanceAg'
+    object = 'AuthorityRecord'
+    cardinality = '1?'
+    composite = 'object'
+    description = _('Agency name and code linked to an AuthorityRecord')
+
+
 class _agent_relation(RelationDefinition):
     """Abstract relation between authority record"""
     subject = None
@@ -279,6 +287,13 @@
     description = RichString(fulltextindexed=True)
 
 
+class MaintenanceAg(EntityType):
+    agency_code = String(fulltextindexed=True)
+    agency_name = String(fulltextindexed=True)
+    description = RichString(fulltextindexed=True)
+    other_agency_code = String(fulltextindexed=True)
+
+
 class Convention(EntityType):
     "Rules or conventions applied in creating the EAC-CPF instance"
     abbrev = String(fulltextindexed=True)
diff -r 81fab457faeb -r 56aa0f4aa6ed test/test_dataimport.py
--- a/test/test_dataimport.py	Thu Jun 20 16:00:22 2019 +0200
+++ b/test/test_dataimport.py	Fri Jun 21 17:24:12 2019 +0200
@@ -73,16 +73,8 @@
         return importer.external_entities()
 
     def test_parse_FRAD033_EAC_00001(self):
-        _gen_extid = map(str, (x for x in count() if x != 2)).next
+        _gen_extid = map(str, (x for x in count() if x != 3)).next
         expected = [
-            ('AuthorityRecord', 'FRAD033_EAC_00001',
-             {'isni': set([u'22330001300016']),
-              'start_date': set([datetime.date(1800, 1, 1)]),
-              'end_date': set([datetime.date(2099, 1, 1)]),
-              'agent_kind': set(['agentkind/authority']),
-              'record_id': set(['FRAD033_EAC_00001']),
-             },
-            ),
             ('EACOtherRecordId', _gen_extid(),
              {'eac_other_record_id_of': set(['FRAD033_EAC_00001']),
               'value': set([u'1234']),
@@ -94,6 +86,11 @@
               'local_type': set([u'letters']),
              },
             ),
+            ('MaintenanceAg', _gen_extid(),
+             {'agency_of': ['FRAD033_EAC_00001'],
+              'agency_name': set([u'Gironde. Archives d\xe9partementales']),
+              'agency_code': set([u'FR-AD033'])},
+            ),
             ('EACSource', _gen_extid(),
              {'source_agent': set(['FRAD033_EAC_00001']),
               'title': set([u'1. Ouvrages imprimés...']),
@@ -133,7 +130,7 @@
             ('Convention', _gen_extid(),
              {'convention_of': ['FRAD033_EAC_00001'],
               'abbrev': set([u'ISAAR(CPF)']),
-              'has_citation': ['9'],
+              'has_citation': ['10'],
               'description_format': set([u'text/html']),
               'description': set([u'<p xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">Norme ISAAR(CPF) du Conseil international des archives, 2e \xe9dition, 1996.</p>']), # noqa
              },
@@ -178,7 +175,7 @@
              {'name': set([u'Bordeaux (Gironde, France)']),
               'role': set([u'siege']),
               'place_agent': set(['FRAD033_EAC_00001']),
-              'place_address': set(['14']),
+              'place_address': set(['15']),
               'equivalent_concept': set(['http://catalogue.bnf.fr/ark:/12148/cb152418385']),
              },
             ),
@@ -221,8 +218,8 @@
               ]),
               'text_format': set([u'text/html']),
               'history_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['21', '22']),
-              'has_event': set(['24', '23']),
+              'has_citation': set(['23', '22']),
+              'has_event': set(['24', '25']),
              },
             ),
             ('Citation', _gen_extid(),
@@ -278,7 +275,7 @@
               'description': set([u'Organisation des réunions ...']),
               'description_format': set([u'text/plain']),
               'occupation_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['30']),
+              'has_citation': set(['31']),
               'equivalent_concept': set(['http://pifgadget.com']),
              },
             ),
@@ -291,7 +288,7 @@
                               u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                               u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
               'content_format': set([u'text/html']),
-              'has_citation': set(['32']),
+              'has_citation': set(['33']),
               'general_context_of': set(['FRAD033_EAC_00001']),
               }
             ),
@@ -380,6 +377,14 @@
              {'uri': set([u'http://pifgadget.com']),
               'cwuri': set([u'http://pifgadget.com'])},
             ),
+            ('AuthorityRecord', 'FRAD033_EAC_00001',
+             {'isni': set([u'22330001300016']),
+              'start_date': set([datetime.date(1800, 1, 1)]),
+              'end_date': set([datetime.date(2099, 1, 1)]),
+              'agent_kind': set(['agentkind/authority']),
+              'record_id': set(['FRAD033_EAC_00001']),
+             },
+            ),
         ]
         expected = [ExtEntity(*vals) for vals in expected]
         fpath = self.datapath('FRAD033_EAC_00001_simplified.xml')
@@ -401,7 +406,6 @@
         self.assertEqual(not_visited,
                          {'maintenanceStatus': set([12]),
                           'publicationStatus': set([14]),
-                          'maintenanceAgency': set([16]),
                           'localControl': set([54]),
                           'source': set([76]),  # empty.
                           'structureOrGenealogy': set([189]),  # empty.
@@ -503,7 +507,7 @@
                                cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
             cnx.commit()
             created, updated = testutils.eac_import(cnx, fpath)
-            self.assertEqual(len(created), 46)
+            self.assertEqual(len(created), 47)
             self.assertEqual(updated, set())
             rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
             self.assertEqual(len(rset), 1)
diff -r 81fab457faeb -r 56aa0f4aa6ed test/test_schema.py
--- a/test/test_schema.py	Thu Jun 20 16:00:22 2019 +0200
+++ b/test/test_schema.py	Fri Jun 21 17:24:12 2019 +0200
@@ -114,6 +114,7 @@
             'GeneralContext': {('general_context_of', 'subject'): set(['AuthorityRecord'])},
             'History': {('history_agent', 'subject'): set(['AuthorityRecord'])},
             'LegalStatus': {('legal_status_agent', 'subject'): set(['AuthorityRecord'])},
+            'MaintenanceAg': {('agency_of', 'subject'): set(['AuthorityRecord'])},
             'Mandate': {('mandate_agent', 'subject'): set(['AuthorityRecord'])},
             'NameEntry': {('name_entry_for', 'subject'): set(['AuthorityRecord'])},
             'Occupation': {('occupation_agent', 'subject'): set(['AuthorityRecord'])},


More information about the saem-devel mailing list