[PATCH 11 of 14 eac] Add support for <nameEntryParallel> tag

Guillaume Vandevelde guillaume.vandevelde at logilab.fr
Tue Jul 2 14:54:00 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1561631533 -7200
#      Thu Jun 27 12:32:13 2019 +0200
# Node ID 4e7a62a1630f3e0e254c36640d267744ad9276cc
# Parent  87d6331393a7e8c37291cd670a6b4eb2ccb6c61a
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 4e7a62a1630f
Add support for <nameEntryParallel> tag

Add support for a new tag, modified some function to avoid side-effects, start to adapt the date entity for using it as child, so it can be used for list of elements.

Differential Revision: https://phab.logilab.fr/D3631

diff -r 87d6331393a7 -r 4e7a62a1630f cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py	Tue Jun 25 18:02:18 2019 +0200
+++ b/cubicweb_eac/dataimport.py	Thu Jun 27 12:32:13 2019 +0200
@@ -44,7 +44,7 @@
                      'Mandate', 'LegalStatus', 'History', 'Event', 'Structure', 'AgentFunction',
                      'Occupation', 'GeneralContext', 'AssociationRelation', 'ChronologicalRelation',
                      'HierarchicalRelation', 'EACResourceRelation', 'EACFunctionRelation',
-                     'ExternalUri', 'EACSource', 'Activity')
+                     'ParallelNames', 'ExternalUri', 'EACSource', 'Activity')
 
 
 class InvalidEAC(RuntimeError):
@@ -64,7 +64,7 @@
         self.tag_parent = tag_parent
 
 
-def safe_append(finder, dct, var_tuple):
+def safe_append_text(finder, dct, var_tuple):
     var_name, var_xpath = var_tuple
     var = finder(var_xpath)
     if var is not None and var.text:
@@ -75,6 +75,17 @@
         return dct
 
 
+def safe_append_attrib(elem, dct, attrib_tuple):
+    var_name, attrib_name = attrib_tuple
+    attrib_value = elem.attrib.get(attrib_name)
+    if attrib_value:
+        new_dct = dct.copy()
+        new_dct[var_name] = set([text_type(attrib_value)])
+        return new_dct
+    else:
+        return dct
+
+
 def external_uri(uri):
     values = [text_type(uri)]
     return ExtEntity('ExternalUri', uri, {'uri': set(values), 'cwuri': set(values)})
@@ -165,14 +176,16 @@
 
 
 def add_child_for(etype, relation):
-    """Handle import of citation tag for `etype` ExtEntity that is yielded by
+    """Handle import of child tag for `etype` ExtEntity that is yielded by
     decorated method.
     """
     def decorator(func):
         @wraps(func)
         def wrapper(self, elem):
             builders = {'has_citation': self.build_citation,
-                        'has_event': self.build_event}
+                        'has_event': self.build_event,
+                        'simple_name_relation': self.build_parallel_name_entry,
+                        'date_relation': self.build_parallel_date_entry}
             build_child = builders[relation]
             for extentity in func(self, elem):
                 if extentity.etype == etype:
@@ -306,15 +319,21 @@
             return trace_extentity(self)(attr)
         return attr
 
-    def values_from_xpaths(self, elem, name_path_tuples, values={}):
+    def values_from_xpaths(self, elem, name_path_tuples, values=None):
         """build a `values` dict from xpath requests
         partial is used for building a function with the signature
         append_func :: dict -> (varname, xpath) -> dict
         and use it in the reduce"""
+        values = values or {}
         finder = partial(self._elem_find, elem)
-        append_func = partial(safe_append, finder)
+        append_func = partial(safe_append_text, finder)
         return reduce(append_func, name_path_tuples, values)
 
+    def values_from_attrib(self, elem, name_attrib_tuples, values=None):
+        values = values or {}
+        append_func = partial(safe_append_attrib, elem)
+        return reduce(append_func, name_attrib_tuples, values)
+
     def record_visited(self, elem, extentity):
         assert extentity.extid, extentity
         self._visited.setdefault(elem, set([])).add(extentity.extid)
@@ -418,6 +437,42 @@
             raise MissingTag('nameEntry', 'identity')
         for name_entry in name_entries:
             yield self.build_name_entry(name_entry)
+        parallel_name_entries = self._elem_findall(identity, 'eac:nameEntryParallel')
+        for parallel_name_entry in parallel_name_entries:
+            for extentity in self.build_parallel(parallel_name_entry):
+                yield extentity
+
+    @relate_to_record_through('ParallelNames', 'parallel_names_of')
+    @add_child_for('ParallelNames', 'simple_name_relation')
+    @add_child_for('ParallelNames', 'date_relation')
+    def build_parallel(self, elem):
+        values = self.values_from_xpaths(elem, (('authorized_form', 'eac:authorizedForm'),
+                                                ('alternative_form', 'eac:alternativeForm')))
+        yield ExtEntity('ParallelNames', self._gen_extid(), values)
+
+    @filter_empty
+    @filter_none
+    def build_parallel_date_entry(self, elem):
+        for date in self.find_nested(elem, 'eac:date', 'eac:useDates'):
+            yield ExtEntity('DateEntity', self._gen_extid(),
+                            {'start_date': set([self.parse_date(date)]),
+                             'end_date': set([self.parse_date(date)])})
+        for date_range in self.find_nested(elem, 'eac:dateRange', 'eac:useDates'):
+            yield ExtEntity('DateEntity', self._gen_extid(), self.parse_daterange(date_range))
+
+    def build_parallel_name_entry(self, elem):
+        for elem in self._elem_findall(elem, 'eac:nameEntry'):
+            values = self.values_from_attrib(elem, (('language', 'lang'),
+                                                    ('script_code', 'scriptCode')))
+            values.update(self.values_from_xpaths(elem,
+                                                  (('preferred_form', 'eac:preferredForm'),
+                                                   ('alternative_form', 'eac:alternativeForm'),
+                                                   ('authorized_form', 'eac:authorizedForm'))))
+            parts = self._elem_findall(elem, 'eac:part')
+            if not parts:
+                raise MissingTag('part', 'nameEntry')
+            values.update({'parts': set([u', '.join(text_type(p.text) for p in parts)])})
+            yield ExtEntity('NameEntryS', self._gen_extid(), values)
 
     @filter_none
     def parse_description(self, description):
diff -r 87d6331393a7 -r 4e7a62a1630f cubicweb_eac/migration/0.9.0_Any.py
--- a/cubicweb_eac/migration/0.9.0_Any.py	Tue Jun 25 18:02:18 2019 +0200
+++ b/cubicweb_eac/migration/0.9.0_Any.py	Thu Jun 27 12:32:13 2019 +0200
@@ -3,8 +3,18 @@
 add_entity_type('Convention')
 add_entity_type('Language')
 add_entity_type('LanguageDec')
+add_entity_type('ParallelNames')
+add_entity_type('EACFunctionRelation')
 add_entity_type('MaintenanceAg')
 add_attribute('MaintenanceAg', 'maintenance_status')
 add_attribute('MaintenanceAg', 'publication_status')
 add_entity_type('EACFunctionRelation')
 add_attribute('EACResourceRelation', 'attributes')
+add_attribute('EACFunctionRelation', 'attributes')
+add_entity_type('NameEntryS')
+add_attribute('NameEntry', 'language')
+add_attribute('NameEntry', 'preferred_form')
+add_attribute('NameEntry', 'alternative_form')
+add_attribute('NameEntry', 'authorized_form')
+add_attribute('NameEntry', 'script_code')
+add_entity_type('DateEntity')
diff -r 87d6331393a7 -r 4e7a62a1630f cubicweb_eac/schema.py
--- a/cubicweb_eac/schema.py	Tue Jun 25 18:02:18 2019 +0200
+++ b/cubicweb_eac/schema.py	Thu Jun 27 12:32:13 2019 +0200
@@ -60,8 +60,13 @@
                   description=_('International Standard Name Identifier'))
 
 
-class NameEntry(EntityType):
-    """Represent a nameEntry tag of an EAC-CPF document."""
+class NameEntryS(EntityType):
+    """Represent a simple nameEntry tag of an EAC-CPF document."""
+    language = String(fulltextindexed=True)
+    preferred_form = String(fulltextindexed=True)
+    alternative_form = String(fulltextindexed=True)
+    authorized_form = String(fulltextindexed=True)
+    script_code = String(fulltextindexed=True)
     parts = String(
         required=True, fulltextindexed=True,
         description=_('concatenation of part tags within a nameEntry'))
@@ -69,6 +74,12 @@
                           vocabulary=[_('authorized'), _('alternative')])
 
 
+class NameEntry(NameEntryS):
+    """Represent a nameEntry with attributes it can have outside of
+     an nameEntryParallel"""
+    pass
+
+
 class name_entry_for(RelationDefinition):
     subject = 'NameEntry'
     object = 'AuthorityRecord'
@@ -78,6 +89,35 @@
     inlined = True
 
 
+class ParallelNames(EntityType):
+    authorized_form = String(fulltextindexed=True)
+    alternative_form = String(fulltextindexed=True)
+
+
+class date_relation(RelationDefinition):
+    subject = 'ParallelNames'
+    object = 'DateEntity'
+    cardinality = '*1'
+    composite = 'subject'
+    fulltext_container = 'subject'
+
+
+class simple_name_relation(RelationDefinition):
+    subject = 'ParallelNames'
+    object = 'NameEntryS'
+    cardinality = '*1'
+    composite = 'subject'
+    fulltext_container = 'subject'
+
+
+class parallel_names_of(RelationDefinition):
+    subject = 'ParallelNames'
+    object = 'AuthorityRecord'
+    cardinality = '1*'
+    composite = 'object'
+    fulltext_container = 'object'
+
+
 class EACOtherRecordId(EntityType):
     value = String(required=True, fulltextindexed=True, indexed=True)
     local_type = String(indexed=True)
@@ -470,6 +510,12 @@
     inlined = True
 
 
+ at dated_entity_type
+class DateEntity(EntityType):
+    """Represent either a date tag or a dateRange tag
+    for the representation of dateSets"""
+
+
 @xml_wrap
 class EACSource(EntityType):
     """A source used to establish the description of an AuthorityRecord"""
diff -r 87d6331393a7 -r 4e7a62a1630f test/data/FRAD033_EAC_00001_simplified.xml
--- a/test/data/FRAD033_EAC_00001_simplified.xml	Tue Jun 25 18:02:18 2019 +0200
+++ b/test/data/FRAD033_EAC_00001_simplified.xml	Thu Jun 27 12:32:13 2019 +0200
@@ -119,7 +119,51 @@
           <alternativeForm>conventionDeclaration</alternativeForm>
 
       </nameEntry>
+      <nameEntryParallel>
+	<nameEntry lang="fr" scriptCode="Latn">
+	  <part>Institut international des droits de
+	  l'homme
+	  </part>
+	  <preferredForm>AFNOR_Z44-060
+	  </preferredForm>
+	</nameEntry>
+	<nameEntry lang="en" scriptCode="Latn">
+	  <part>International institute of human
+	  rights
+	  </part>
+	</nameEntry>
+	<authorizedForm>AFNOR_Z44-060
+	</authorizedForm>
+      </nameEntryParallel>
+      <nameEntryParallel>
+	<nameEntry localType="authorized">
+	  <part lang="eng"
+		localType="corpname">Federal Chancellery
+	  of Germany
+	  </part>
+	</nameEntry>
+	<nameEntry localType="authorized">
+	  <part lang="fre"
+		localType="corpname">Chancellerie fédérale
+	  d'Allemagne
+	  </part>
+	</nameEntry>
+	<nameEntry localType="abbreviation">
+	  <part lang="ger"
+		localType="corpname">BK
+	  </part>
+	</nameEntry>
 
+	<useDates>
+	  <dateRange localType="open">
+	    <fromDate
+		standardDate="1949">1949
+	    </fromDate>
+	    <toDate>open
+	    </toDate>
+	  </dateRange>
+	  </useDates>
+	</nameEntryParallel>
     </identity>
 
     <description>
diff -r 87d6331393a7 -r 4e7a62a1630f test/test_dataimport.py
--- a/test/test_dataimport.py	Tue Jun 25 18:02:18 2019 +0200
+++ b/test/test_dataimport.py	Thu Jun 27 12:32:13 2019 +0200
@@ -167,6 +167,47 @@
               'name_entry_for': set(['FRAD033_EAC_00001']),
              },
             ),
+            ('ParallelNames', _gen_extid(),
+             {'parallel_names_of': set(['FRAD033_EAC_00001']),
+              'simple_name_relation': set(['17', '16']),
+              'authorized_form': set([u'AFNOR_Z44-060\n\t'])
+             },
+            ),
+            ('NameEntryS', _gen_extid(),
+             {'script_code': set([u'Latn']),
+              'preferred_form': [u'AFNOR_Z44-060\n\t  '],
+              'parts': set([u"Institut international des droits de\n\t  l'homme\n\t  "]),
+              'language': set([u'fr'])
+             },
+             ),
+            ('NameEntryS', _gen_extid(),
+             {'script_code': set([u'Latn']),
+              'parts': set([u'International institute of human\n\t  rights\n\t  ']),
+              'language': set([u'en'])
+             },
+            ),
+            ('ParallelNames', _gen_extid(),
+             {'parallel_names_of': set(['FRAD033_EAC_00001']),
+              'date_relation': set(['19']),
+              'simple_name_relation': set(['20', '21', '22'])
+             },
+            ),
+            ('DateEntity', _gen_extid(),
+             {'start_date': set([datetime.date(1949, 1, 1)])
+             },
+            ),
+            ('NameEntryS', _gen_extid(),
+             {'parts': [u'Federal Chancellery\n\t  of Germany\n\t  ']
+             },
+            ),
+            ('NameEntryS', _gen_extid(),
+             {'parts': [u"Chancellerie f\xe9d\xe9rale\n\t  d'Allemagne\n\t  "]
+             },
+            ),
+            ('NameEntryS', _gen_extid(),
+             {'parts': set([u'BK\n\t  '])
+             },
+            ),
             ('PostalAddress', _gen_extid(),
              {'street': set([u'1 Esplanade Charles de Gaulle']),
               'postalcode': set([u'33074']),
@@ -177,7 +218,7 @@
              {'name': set([u'Bordeaux (Gironde, France)']),
               'role': set([u'siege']),
               'place_agent': set(['FRAD033_EAC_00001']),
-              'place_address': set(['15']),
+              'place_address': set(['23']),
               'equivalent_concept': set(['http://catalogue.bnf.fr/ark:/12148/cb152418385']),
              },
             ),
@@ -232,8 +273,8 @@
               ]),
               'text_format': set([u'text/html']),
               'history_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['25', '24']),
-              'has_event': set(['26', '27']),
+              'has_citation': set(['32', '33']),
+              'has_event': set(['35', '34']),
              },
             ),
             ('Citation', _gen_extid(),
@@ -289,7 +330,7 @@
               'description': set([u'Organisation des réunions ...']),
               'description_format': set([u'text/plain']),
               'occupation_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['33']),
+              'has_citation': set(['41']),
               'equivalent_concept': set(['http://pifgadget.com']),
              },
             ),
@@ -302,7 +343,7 @@
                               u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                               u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
               'content_format': set([u'text/html']),
-              'has_citation': set(['35']),
+              'has_citation': set(['43']),
               'general_context_of': set(['FRAD033_EAC_00001']),
               }
             ),
@@ -457,8 +498,8 @@
         self.assertEqual(not_visited,
                          {'localControl': set([54]),
                           'source': set([76]),  # empty.
-                          'structureOrGenealogy': set([203]),  # empty.
-                          'biogHist': set([243, 246]),  # empty.
+                          'structureOrGenealogy': set([247]),  # empty.
+                          'biogHist': set([287, 290]),  # empty.
                           })
 
     def test_mandate_under_mandates(self):
@@ -556,7 +597,7 @@
                                cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
             cnx.commit()
             created, updated = testutils.eac_import(cnx, fpath)
-            self.assertEqual(len(created), 52)
+            self.assertEqual(len(created), 60)
             self.assertEqual(updated, set())
             rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
             self.assertEqual(len(rset), 1)
@@ -579,7 +620,8 @@
             self.assertEqual(len(record.reverse_function_agent), 3)
             for related in ('structure', 'history', 'mandate', 'occupation',
                             'generalcontext', 'legal_status', 'eac_relations',
-                            'equivalent_concept', 'control'):
+                            'equivalent_concept', 'control', 'parallel_relations',
+                            'maintenance_agency', 'convention', 'language_declaration',):
                 with self.subTest(related=related):
                     checker = getattr(self, '_check_' + related)
                     checker(cnx, record)
@@ -591,31 +633,27 @@
                                                     format=u'text/plain').strip(),
                          u'Pour accomplir ses missions ...')
 
-    def check_maintenance_agency(self, cnx, record):
-        rset = cnx.find('MaintenanceAg', structure_agent=record)
-        self.assertEqual(len(rset), 1)
+    def _check_maintenance_agency(self, cnx, record):
+        rset = cnx.find('MaintenanceAg', agency_of=record)
         self.assertEqual(rset.one().printable_value('agency_name',
                                                     format=u'text/plain').strip(),
-                         set([u'Gironde. Archives d\xe9partementales']))
+                         u'Gironde. Archives d\xe9partementales')
         self.assertEqual(rset.one().printable_value('agency_code',
                                                     format=u'text/plain').strip(),
-                         set([u'FR-AD033']))
+                         u'FR-AD033')
         self.assertEqual(rset.one().printable_value('maintenance_status',
                                                     format=u'text/plain').strip(),
-                         set([u'new']))
+                         u'new')
         self.assertEqual(rset.one().printable_value('publication_status',
                                                     format=u'text/plain').strip(),
-                         set([u'approved']))
+                         u'approved')
 
     def _check_convention(self, cnx, record):
-        rset = cnx.find('History', convention_of=record)
+        rset = cnx.find('Convention', convention_of=record).sorted_rset(lambda x: x.eid)
         self.assertEqual(len(rset), 3)
-        self.assertEqual(rset.one().printable_value('description', format=u'text/plain').strip(),
-                         u'<p xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">Norme ISAAR(CPF) du Conseil international des archives, 2e \xe9dition, 1996.</p>') # noqa
-        self.assertEqual(rset.one().printable_value('description', format=u'text/plain').strip(),
-                         u'<p xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">AFNOR NF Z 44-060, octobre 1983, Catalogue d\u2019auteurs et d\u2019anonymes : forme et\n          structure des vedettes des collectivit\xe9s auteurs.</p>') # noqa
-        self.assertEqual(rset.one().printable_value('description', format=u'text/plain').strip(),
-                         u'<p xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">Norme ISO 8601 :2004 \xc9l\xe9ments de donn\xe9es et formats d\u2019\xe9change -- \xc9change\n          d\u2019information -- Repr\xe9sentation de la date et de l\u2019heure.</p>') # noqa
+        self.assertEqual(rset.get_entity(0, 0)
+                         .printable_value('description', format=u'text/plain').strip(),
+                         u'Norme ISAAR(CPF) du Conseil international des archives, 2e \xe9dition, 1996.') # noqa
 
     def _check_language_declaration(self, cnx, record):
         rset = cnx.find('LanguageDec', language_declaration_of=record)
@@ -633,19 +671,8 @@
         self.assertEqual(rset.one().printable_value('text',
                                                     format=u'text/plain').strip(),
                          u"La loi du 22 décembre 1789, en divisant ...\n\nL'inspecteur Canardo")
-
-    def _check_event(self, cnx, record):
-        rset = cnx.find('Event', record)
-        self.assertEqual(len(rset), 2)
-        self.assertEqual(rset[0].printable_value('text',
-                                                 format=u'text/plain').strip(),
-                         u'Left Mer and moved to the mainland.'
-                         u'Worked at various jobs including canecutter'
-                         u'and railway labourer.')
-        self.assertEqual(rset[1].printable_value('text',
-                                                 format=u'text/plain').strip(),
-                         u'Union representative, Townsville-'
-                         u'Mount Isa rail construction project.')
+        events = rset.one().has_event
+        self.assertEqual(len(events), 2)
 
     def _check_mandate(self, cnx, record):
         rset = cnx.find('Mandate', mandate_agent=record)
@@ -722,6 +749,18 @@
         self.assertEqual(func_relation.end_date,
                          datetime.date(2001, 1, 1))
 
+    def _check_parallel_relations(self, cnx, record):
+        rset = cnx.find('ParallelNames', parallel_names_of=record).sorted_rset(lambda x: x.eid)
+        self.assertEqual(len(rset), 2)
+        p_entity = rset.get_entity(0, 0)
+        self.assertEqual(p_entity.parallel_names_of[0], record)
+        self.assertEqual(len(p_entity.simple_name_relation), 2)
+        self.assertEqual(len(p_entity.date_relation), 0)
+        p_entity = rset.get_entity(1, 0)
+        self.assertEqual(p_entity.parallel_names_of[0], record)
+        self.assertEqual(len(p_entity.simple_name_relation), 3)
+        self.assertEqual(len(p_entity.date_relation), 1)
+
     def _check_equivalent_concept(self, cnx, record):
         functions = dict((f.name, f) for f in record.reverse_function_agent)
         self.assertEqual(functions['action sociale'].equivalent_concept[0].cwuri,
diff -r 87d6331393a7 -r 4e7a62a1630f test/test_schema.py
--- a/test/test_schema.py	Tue Jun 25 18:02:18 2019 +0200
+++ b/test/test_schema.py	Thu Jun 27 12:32:13 2019 +0200
@@ -109,6 +109,7 @@
                                  set(['AuthorityRecord'])},
             'EACResourceRelation': {('resource_relation_agent', 'subject'):
                                     set(['AuthorityRecord'])},
+            'DateEntity': {('date_relation', 'object'): set(['ParallelNames'])},
             'Convention': {('convention_of', 'subject'): set(['AuthorityRecord'])},
             'LanguageDec': {('language_declaration_of', 'subject'): set(['AuthorityRecord'])},
             'Language': {('language_used_in', 'subject'): set(['AuthorityRecord'])},
@@ -123,6 +124,8 @@
             'Occupation': {('occupation_agent', 'subject'): set(['AuthorityRecord'])},
             'PostalAddress': {('place_address', 'object'): set(['AgentPlace'])},
             'Structure': {('structure_agent', 'subject'): set(['AuthorityRecord'])},
+            'ParallelNames': {('parallel_names_of', 'subject'): set(['AuthorityRecord'])},
+            'NameEntryS': {('simple_name_relation', 'object'): set(['ParallelNames'])},
         }
         struct = dict(
             (k, dict((rel, set(targets)) for rel, targets in v.items()))


More information about the saem-devel mailing list