[PATCH 10 of 14 eac] Add support for <languagesUsed> and <languageUsed> tags

Guillaume Vandevelde guillaume.vandevelde at logilab.fr
Tue Jul 2 14:53:59 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1561478538 -7200
#      Tue Jun 25 18:02:18 2019 +0200
# Node ID 87d6331393a7e8c37291cd670a6b4eb2ccb6c61a
# Parent  045bcf5cd1aa7bbdf0821fad2099c344288db05c
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 87d6331393a7
Add support for <languagesUsed> and <languageUsed> tags

Add support for the language tags, made te choice of 2 entities with the same attributes for distinguish the 2 language tag category.

Also abandonned functionnal python, to the profit of natural pythonic declarative structure. Afraid of non explicit side effects in python that could discretely break the purity of functions.

Differential Revision: https://phab.logilab.fr/D3605

diff -r 045bcf5cd1aa -r 87d6331393a7 cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/cubicweb_eac/dataimport.py	Tue Jun 25 18:02:18 2019 +0200
@@ -40,9 +40,9 @@
 TYPE_MAPPING['human'] = u'person'
 
 ETYPES_ORDER_HINT = ('AgentKind', 'PhoneNumber', 'PostalAddress', 'AuthorityRecord',
-                     'Convention', 'LanguageDec', 'AgentPlace', 'MaintenanceAg', 'Mandate',
-                     'LegalStatus', 'History', 'Event', 'Structure', 'AgentFunction', 'Occupation',
-                     'GeneralContext', 'AssociationRelation', 'ChronologicalRelation',
+                     'Convention', 'LanguageDec', 'Language', 'AgentPlace', 'MaintenanceAg',
+                     'Mandate', 'LegalStatus', 'History', 'Event', 'Structure', 'AgentFunction',
+                     'Occupation', 'GeneralContext', 'AssociationRelation', 'ChronologicalRelation',
                      'HierarchicalRelation', 'EACResourceRelation', 'EACFunctionRelation',
                      'ExternalUri', 'EACSource', 'Activity')
 
@@ -75,12 +75,6 @@
         return dct
 
 
-def removekey(d, key):
-    r = dict(d)
-    del r[key]
-    return r
-
-
 def external_uri(uri):
     values = [text_type(uri)]
     return ExtEntity('ExternalUri', uri, {'uri': set(values), 'cwuri': set(values)})
@@ -452,6 +446,10 @@
         for mandate in self.find_nested(description, 'eac:mandate', 'eac:mandates'):
             for extentity in self.build_mandate(mandate):
                 yield extentity
+        # LanguagesUsed
+        for language in self.find_nested(description, 'eac:languageUsed', 'eac:languagesUsed'):
+            for extentity in self.build_language_used(language):
+                yield extentity
         # history
         for history in self._elem_findall(description, 'eac:biogHist'):
             for extentity in self.build_history(history):
@@ -860,6 +858,7 @@
     @add_xml_wrap_for('EACFunctionRelation')
     def build_function_relation(self, elem):
         """Build a relation between function entities
+
         yield an ExternalUri object, and an EACFunctionRelation
         object that make the link between the ExternalUri and
         the AuthorityRecord object"""
@@ -892,10 +891,10 @@
             elem,
             (('place_entry', 'eac:placeEntry'),
              ('relation_entry', 'eac:relationEntry'))))
-        attrib = {k: v for (k, v) in elem.attrib.items()}
-        attrib = reduce(removekey, ('functionRelationType',
-                                    '{%(xlink)s}href' % self.namespaces),
-                        attrib)
+        attrib = {k: v for (k, v) in dict(elem.attrib).items() if k not in {
+            'functionRelationType',
+            '{%(xlink)s}href' % self.namespaces
+        }}
         if attrib:
             values.update({'attributes': set([text_type(attrib)])})
         yield ExtEntity('EACFunctionRelation', self._gen_extid(), values)
@@ -926,6 +925,13 @@
         if dates:
             values.update(dates)
         values.update(self.parse_tag_description(elem))
+        attrib = {k: v for (k, v) in dict(elem.attrib).items() if k not in {
+            'relationRelationType',
+            '{%(xlink)s}role' % self.namespaces,
+            '{%(xlink)s}href' % self.namespaces,
+        }}
+        if attrib:
+            values.update({'attributes': set([text_type(attrib)])})
         yield ExtEntity('EACResourceRelation', self._gen_extid(), values)
 
     @filter_none
@@ -953,13 +959,16 @@
                 yield extentity
         publication_status = self._elem_find(control, 'eac:publicationStatus')
         p_status = publication_status.text.strip()
+        self.record_visited(publication_status, self.record)
         maintenance_status = self._elem_find(control, 'eac:maintenanceStatus')
         if maintenance_status is None:
             raise MissingTag('maintenanceStatus', 'control')
         m_status = maintenance_status.text.strip()
+        self.record_visited(maintenance_status, self.record)
         maintenance_agency = self._elem_find(control, 'eac:maintenanceAgency')
         if maintenance_agency is None:
             raise MissingTag('maintenanceAgency', 'control')
+        self.record_visited(maintenance_agency, self.record)
         if maintenance_agency is not None:
             yield next(self.build_maintenance_agency(maintenance_agency, m_status, p_status))
         builders = (('eac:sources/eac:source', self.build_source),
@@ -1017,11 +1026,20 @@
             values.update({'publication_status': set([text_type(p_status)])})
         yield ExtEntity('MaintenanceAg', self._gen_extid(), values)
 
+    @relate_to_record_through('Language', 'language_used_in')
+    def build_language_used(self, elem):
+        for e in self.build_language_based_entity(elem):
+            yield e
+
     @relate_to_record_through('LanguageDec', 'language_declaration_of')
+    def build_language_declaration(self, elem):
+        for e in self.build_language_based_entity(elem, extid="LanguageDec"):
+            yield e
+
     @filter_empty
     @elem_maybe_none
-    def build_language_declaration(self, elem):
-        """Build a `LanguageDec` external entity"""
+    def build_language_based_entity(self, elem, extid="Language"):
+        """Build a `Language` based external entity"""
         values = self.parse_tag_description(elem)
         language = self._elem_find(elem, 'eac:language')
         script = self._elem_find(elem, 'eac:script')
@@ -1029,7 +1047,7 @@
             values['language'] = set([text_type(etree.tostring(language).strip())])
         if script is not None and script.text:
             values['script'] = set([text_type(etree.tostring(script).strip())])
-        yield ExtEntity('LanguageDec', self._gen_extid(), values)
+        yield ExtEntity(extid, self._gen_extid(), values)
 
     @relate_to_record_through('Convention', 'convention_of')
     @add_child_for('Convention', 'has_citation')
diff -r 045bcf5cd1aa -r 87d6331393a7 cubicweb_eac/migration/0.9.0_Any.py
--- a/cubicweb_eac/migration/0.9.0_Any.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/cubicweb_eac/migration/0.9.0_Any.py	Tue Jun 25 18:02:18 2019 +0200
@@ -1,8 +1,10 @@
 add_attribute('History', 'abstract')
 add_entity_type('Event')
 add_entity_type('Convention')
+add_entity_type('Language')
 add_entity_type('LanguageDec')
 add_entity_type('MaintenanceAg')
 add_attribute('MaintenanceAg', 'maintenance_status')
 add_attribute('MaintenanceAg', 'publication_status')
 add_entity_type('EACFunctionRelation')
+add_attribute('EACResourceRelation', 'attributes')
diff -r 045bcf5cd1aa -r 87d6331393a7 cubicweb_eac/schema.py
--- a/cubicweb_eac/schema.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/cubicweb_eac/schema.py	Tue Jun 25 18:02:18 2019 +0200
@@ -193,6 +193,13 @@
     composite = 'object'
 
 
+class language_used_in(RelationDefinition):
+    subject = 'Language'
+    object = 'AuthorityRecord'
+    cardinality = '1*'
+    composite = 'object'
+
+
 class agency_of(RelationDefinition):
     subject = 'MaintenanceAg'
     object = 'AuthorityRecord'
@@ -281,12 +288,16 @@
     description = RichString(fulltextindexed=True)
 
 
-class LanguageDec(EntityType):
+class Language(EntityType):
     language = String(fulltextindexed=True)
     script = String(fulltextindexed=True)
     description = RichString(fulltextindexed=True)
 
 
+class LanguageDec(Language):
+    pass
+
+
 class MaintenanceAg(EntityType):
     agency_code = String(fulltextindexed=True)
     agency_name = String(fulltextindexed=True)
@@ -440,6 +451,7 @@
     resource_role = String(description=_('type or nature of the remote resource'),
                            internationalizable=True)
     description = RichString(fulltextindexed=True)
+    attributes = String(fulltextindexed=True)
 
 
 class resource_relation_agent(RelationDefinition):
diff -r 045bcf5cd1aa -r 87d6331393a7 test/data/FRAD033_EAC_00001_simplified.xml
--- a/test/data/FRAD033_EAC_00001_simplified.xml	Tue Jun 25 14:05:50 2019 +0200
+++ b/test/data/FRAD033_EAC_00001_simplified.xml	Tue Jun 25 18:02:18 2019 +0200
@@ -125,6 +125,20 @@
     <description>
       <existDates><dateRange><fromDate>1800</fromDate><toDate>2099</toDate></dateRange>
       </existDates>
+	<languageUsed>
+	  <language
+	      languageCode="eng">English
+	  </language>
+	  <script scriptCode="Latn">Latin
+	  </script>
+	</languageUsed>
+	<languageUsed>
+	  <language
+	      languageCode="spa">Spanish
+	  </language>
+	  <script scriptCode="Latn">Latin
+	  </script>
+	</languageUsed>
       <place>
           <placeRole>siege</placeRole>
           <placeEntry vocabularySource="http://catalogue.bnf.fr/ark:/12148/cb152418385">Bordeaux (Gironde, France)</placeEntry>
diff -r 045bcf5cd1aa -r 87d6331393a7 test/test_dataimport.py
--- a/test/test_dataimport.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/test/test_dataimport.py	Tue Jun 25 18:02:18 2019 +0200
@@ -209,6 +209,18 @@
               'mandate_agent': set(['FRAD033_EAC_00001']),
              },
             ),
+            ('Language', _gen_extid(),
+             {'language_used_in': ['FRAD033_EAC_00001'],
+              'language': set([u'<language xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" languageCode="eng">English\n\t  </language>']), # noqa
+              'script': set([u'<script xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" scriptCode="Latn">Latin\n\t  </script>']) # noqa
+             },
+            ),
+            ('Language', _gen_extid(),
+             {'language_used_in': ['FRAD033_EAC_00001'],
+              'language': set([u'<language xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" languageCode="spa">Spanish\n\t  </language>']), # noqa
+              'script': set([u'<script xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" scriptCode="Latn">Latin\n\t  </script>']) # noqa
+             },
+            ),
             ('History', _gen_extid(),
              {'abstract': set([u'Test of an abstract element']),
               'text': set(["\n".join((
@@ -220,8 +232,8 @@
               ]),
               'text_format': set([u'text/html']),
               'history_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['23', '22']),
-              'has_event': set(['24', '25']),
+              'has_citation': set(['25', '24']),
+              'has_event': set(['26', '27']),
              },
             ),
             ('Citation', _gen_extid(),
@@ -277,7 +289,7 @@
               'description': set([u'Organisation des réunions ...']),
               'description_format': set([u'text/plain']),
               'occupation_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['31']),
+              'has_citation': set(['33']),
               'equivalent_concept': set(['http://pifgadget.com']),
              },
             ),
@@ -290,7 +302,7 @@
                               u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                               u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
               'content_format': set([u'text/html']),
-              'has_citation': set(['33']),
+              'has_citation': set(['35']),
               'general_context_of': set(['FRAD033_EAC_00001']),
               }
             ),
@@ -347,6 +359,7 @@
             ),
             ('EACResourceRelation', _gen_extid(),
              {'agent_role': set([u'creatorOf']),
+              'attributes': set([u"{'{http://www.w3.org/1999/xlink}show': 'new', '{http://www.w3.org/1999/xlink}actuate': 'onRequest', '{http://www.w3.org/1999/xlink}type': 'simple', 'resourceRelationType': 'creatorOf'}"]), # noqa
               'resource_role': set([u'Fonds d\'archives']),
               'resource_relation_resource': set([
                   'http://gael.gironde.fr/ead.html?id=FRAD033_IR_N']),
@@ -442,12 +455,10 @@
         for tagname, sourceline in importer.not_visited():
             not_visited.setdefault(tagname, set([])).add(sourceline)
         self.assertEqual(not_visited,
-                         {'maintenanceStatus': set([12]),
-                          'publicationStatus': set([14]),
-                          'localControl': set([54]),
+                         {'localControl': set([54]),
                           'source': set([76]),  # empty.
-                          'structureOrGenealogy': set([189]),  # empty.
-                          'biogHist': set([229, 232]),  # empty.
+                          'structureOrGenealogy': set([203]),  # empty.
+                          'biogHist': set([243, 246]),  # empty.
                           })
 
     def test_mandate_under_mandates(self):
@@ -545,7 +556,7 @@
                                cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
             cnx.commit()
             created, updated = testutils.eac_import(cnx, fpath)
-            self.assertEqual(len(created), 50)
+            self.assertEqual(len(created), 52)
             self.assertEqual(updated, set())
             rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
             self.assertEqual(len(rset), 1)
@@ -690,7 +701,6 @@
         self.assertEqual(rrelation.xml_wrap.getvalue(),
                          '<he xmlns="urn:isbn:1-931666-33-4" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">joe</he>')  # noqa
         rset = cnx.find('EACFunctionRelation', r_type=u'performs')
-        self.assertEqual(len(rset), 1)
         func_relation = rset.one()
         self.assertEqual(func_relation.attributes,
                          u"{'{http://www.w3.org/1999/xlink}actuate': 'onLoad', '{http://www.w3.org/1999/xlink}arcrole': 'http://test_arcrole.lol.com', '{http://www.w3.org/1999/xlink}role': 'http://test_role.lmao.com'}") # noqa
@@ -703,7 +713,6 @@
         self.assertEqual(func_relation.function_relation_function[0].uri,
                          u'http://gael.gironde.fr/ead.html?id=FRAD033_IR_N')
         rset = cnx.find('EACFunctionRelation', r_type=u'controls')
-        self.assertEqual(len(rset), 1)
         func_relation = rset.one()
         self.assertEqual(func_relation.function_relation_agent[0], record)
         self.assertEqual(func_relation.function_relation_function[0].uri,
@@ -757,12 +766,15 @@
             self.assertEqual(cnx.find('AuthorityRecord').one().agent_kind[0].name,
                              'unknown-agent-kind')
 
-    def test_no_maintenance_tags(self):
+    def test_no_maintenance_agency_tag(self):
         with self.admin_access.repo_cnx() as cnx:
             with self.assertRaises(dataimport.MissingTag) as cm:
                 testutils.eac_import(cnx, self.datapath('no_maintenance_agency.xml'))
             self.assertEqual(cm.exception.tag, 'maintenanceAgency')
             self.assertEqual(cm.exception.tag_parent, 'control')
+
+    def test_no_maintenance_status_tag(self):
+        with self.admin_access.repo_cnx() as cnx:
             with self.assertRaises(dataimport.MissingTag) as cm:
                 testutils.eac_import(cnx, self.datapath('no_maintenance_status.xml'))
             self.assertEqual(cm.exception.tag, 'maintenanceStatus')
diff -r 045bcf5cd1aa -r 87d6331393a7 test/test_schema.py
--- a/test/test_schema.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/test/test_schema.py	Tue Jun 25 18:02:18 2019 +0200
@@ -111,6 +111,7 @@
                                     set(['AuthorityRecord'])},
             'Convention': {('convention_of', 'subject'): set(['AuthorityRecord'])},
             'LanguageDec': {('language_declaration_of', 'subject'): set(['AuthorityRecord'])},
+            'Language': {('language_used_in', 'subject'): set(['AuthorityRecord'])},
             'EACSource': {('source_agent', 'subject'): set(['AuthorityRecord'])},
             'Event': {('has_event', 'object'): set(['History'])},
             'GeneralContext': {('general_context_of', 'subject'): set(['AuthorityRecord'])},


More information about the saem-devel mailing list