[PATCH 10 of 10 eac] Add the changes requested and implement the handling of the <languagesUsed> and <languageUsed> tags

Guillaume Vandevelde guillaume.vandevelde at logilab.fr
Thu Jun 27 10:12:16 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1561478538 -7200
#      Tue Jun 25 18:02:18 2019 +0200
# Node ID 4fcb51b0d9207d2683b3d1749a61b3c786794e12
# Parent  dd2a299dac0876226f65b6007559898d80831020
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 4fcb51b0d920
Add the changes requested and implement the handling of the <languagesUsed> and <languageUsed> tags

Differential Revision: https://phab.logilab.fr/D3605

diff -r dd2a299dac08 -r 4fcb51b0d920 cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/cubicweb_eac/dataimport.py	Tue Jun 25 18:02:18 2019 +0200
@@ -75,12 +75,6 @@
         return dct
 
 
-def removekey(d, key):
-    r = dict(d)
-    del r[key]
-    return r
-
-
 def external_uri(uri):
     values = [text_type(uri)]
     return ExtEntity('ExternalUri', uri, {'uri': set(values), 'cwuri': set(values)})
@@ -452,6 +446,10 @@
         for mandate in self.find_nested(description, 'eac:mandate', 'eac:mandates'):
             for extentity in self.build_mandate(mandate):
                 yield extentity
+        # LanguagesUsed
+        for language in self.find_nested(description, 'eac:languageUsed', 'eac:languagesUsed'):
+            for extentity in self.build_language_used(language):
+                yield extentity
         # history
         for history in self._elem_findall(description, 'eac:biogHist'):
             for extentity in self.build_history(history):
@@ -860,6 +858,7 @@
     @add_xml_wrap_for('EACFunctionRelation')
     def build_function_relation(self, elem):
         """Build a relation between function entities
+
         yield an ExternalUri object, and an EACFunctionRelation
         object that make the link between the ExternalUri and
         the AuthorityRecord object"""
@@ -892,10 +891,10 @@
             elem,
             (('place_entry', 'eac:placeEntry'),
              ('relation_entry', 'eac:relationEntry'))))
-        attrib = {k: v for (k, v) in elem.attrib.items()}
-        attrib = reduce(removekey, ('functionRelationType',
-                                    '{%(xlink)s}href' % self.namespaces),
-                        attrib)
+        attrib = {k: v for (k, v) in dict(elem.attrib).items() if k not in {
+            'functionRelationType',
+            '{%(xlink)s}href' % self.namespaces
+        }}
         if attrib:
             values.update({'attributes': set([text_type(attrib)])})
         yield ExtEntity('EACFunctionRelation', self._gen_extid(), values)
@@ -926,6 +925,13 @@
         if dates:
             values.update(dates)
         values.update(self.parse_tag_description(elem))
+        attrib = {k: v for (k, v) in dict(elem.attrib).items() if k not in {
+            'relationRelationType',
+            '{%(xlink)s}role' % self.namespaces,
+            '{%(xlink)s}href' % self.namespaces,
+        }}
+        if attrib:
+            values.update({'attributes': set([text_type(attrib)])})
         yield ExtEntity('EACResourceRelation', self._gen_extid(), values)
 
     @filter_none
@@ -953,13 +959,16 @@
                 yield extentity
         publication_status = self._elem_find(control, 'eac:publicationStatus')
         p_status = publication_status.text.strip()
+        self.record_visited(publication_status, self.record)
         maintenance_status = self._elem_find(control, 'eac:maintenanceStatus')
         if maintenance_status is None:
             raise MissingTag('maintenanceStatus', 'control')
         m_status = maintenance_status.text.strip()
+        self.record_visited(maintenance_status, self.record)
         maintenance_agency = self._elem_find(control, 'eac:maintenanceAgency')
         if maintenance_agency is None:
             raise MissingTag('maintenanceAgency', 'control')
+        self.record_visited(maintenance_agency, self.record)
         if maintenance_agency is not None:
             yield next(self.build_maintenance_agency(maintenance_agency, m_status, p_status))
         builders = (('eac:sources/eac:source', self.build_source),
@@ -1017,11 +1026,20 @@
             values.update({'publication_status': set([text_type(p_status)])})
         yield ExtEntity('MaintenanceAg', self._gen_extid(), values)
 
+    @relate_to_record_through('Language', 'language_used_in')
+    def build_language_used(self, elem):
+        for e in self.build_language_based_entity(elem):
+            yield e
+
     @relate_to_record_through('LanguageDec', 'language_declaration_of')
+    def build_language_declaration(self, elem):
+        for e in self.build_language_based_entity(elem, extid="LanguageDec"):
+            yield e
+
     @filter_empty
     @elem_maybe_none
-    def build_language_declaration(self, elem):
-        """Build a `LanguageDec` external entity"""
+    def build_language_based_entity(self, elem, extid="Language"):
+        """Build a `Language` based external entity"""
         values = self.parse_tag_description(elem)
         language = self._elem_find(elem, 'eac:language')
         script = self._elem_find(elem, 'eac:script')
@@ -1029,7 +1047,7 @@
             values['language'] = set([text_type(etree.tostring(language).strip())])
         if script is not None and script.text:
             values['script'] = set([text_type(etree.tostring(script).strip())])
-        yield ExtEntity('LanguageDec', self._gen_extid(), values)
+        yield ExtEntity(extid, self._gen_extid(), values)
 
     @relate_to_record_through('Convention', 'convention_of')
     @add_child_for('Convention', 'has_citation')
diff -r dd2a299dac08 -r 4fcb51b0d920 cubicweb_eac/schema.py
--- a/cubicweb_eac/schema.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/cubicweb_eac/schema.py	Tue Jun 25 18:02:18 2019 +0200
@@ -193,6 +193,13 @@
     composite = 'object'
 
 
+class language_used_in(RelationDefinition):
+    subject = 'Language'
+    object = 'AuthorityRecord'
+    cardinality = '1*'
+    composite = 'object'
+
+
 class agency_of(RelationDefinition):
     subject = 'MaintenanceAg'
     object = 'AuthorityRecord'
@@ -281,12 +288,16 @@
     description = RichString(fulltextindexed=True)
 
 
-class LanguageDec(EntityType):
+class Language(EntityType):
     language = String(fulltextindexed=True)
     script = String(fulltextindexed=True)
     description = RichString(fulltextindexed=True)
 
 
+class LanguageDec(Language):
+    pass
+
+
 class MaintenanceAg(EntityType):
     agency_code = String(fulltextindexed=True)
     agency_name = String(fulltextindexed=True)
@@ -440,6 +451,7 @@
     resource_role = String(description=_('type or nature of the remote resource'),
                            internationalizable=True)
     description = RichString(fulltextindexed=True)
+    attributes = String(fulltextindexed=True)
 
 
 class resource_relation_agent(RelationDefinition):
diff -r dd2a299dac08 -r 4fcb51b0d920 test/data/FRAD033_EAC_00001_simplified.xml
--- a/test/data/FRAD033_EAC_00001_simplified.xml	Tue Jun 25 14:05:50 2019 +0200
+++ b/test/data/FRAD033_EAC_00001_simplified.xml	Tue Jun 25 18:02:18 2019 +0200
@@ -125,6 +125,20 @@
     <description>
       <existDates><dateRange><fromDate>1800</fromDate><toDate>2099</toDate></dateRange>
       </existDates>
+	<languageUsed>
+	  <language
+	      languageCode="eng">English
+	  </language>
+	  <script scriptCode="Latn">Latin
+	  </script>
+	</languageUsed>
+	<languageUsed>
+	  <language
+	      languageCode="spa">Spanish
+	  </language>
+	  <script scriptCode="Latn">Latin
+	  </script>
+	</languageUsed>
       <place>
           <placeRole>siege</placeRole>
           <placeEntry vocabularySource="http://catalogue.bnf.fr/ark:/12148/cb152418385">Bordeaux (Gironde, France)</placeEntry>
diff -r dd2a299dac08 -r 4fcb51b0d920 test/test_dataimport.py
--- a/test/test_dataimport.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/test/test_dataimport.py	Tue Jun 25 18:02:18 2019 +0200
@@ -209,6 +209,18 @@
               'mandate_agent': set(['FRAD033_EAC_00001']),
              },
             ),
+            ('Language', _gen_extid(),
+             {'language_used_in': ['FRAD033_EAC_00001'],
+              'language': set([u'<language xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" languageCode="eng">English\n\t  </language>']), # noqa
+              'script': set([u'<script xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" scriptCode="Latn">Latin\n\t  </script>']) # noqa
+             },
+            ),
+            ('Language', _gen_extid(),
+             {'language_used_in': ['FRAD033_EAC_00001'],
+              'language': set([u'<language xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" languageCode="spa">Spanish\n\t  </language>']), # noqa
+              'script': set([u'<script xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" scriptCode="Latn">Latin\n\t  </script>']) # noqa
+             },
+            ),
             ('History', _gen_extid(),
              {'abstract': set([u'Test of an abstract element']),
               'text': set(["\n".join((
@@ -220,8 +232,8 @@
               ]),
               'text_format': set([u'text/html']),
               'history_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['23', '22']),
-              'has_event': set(['24', '25']),
+              'has_citation': set(['25', '24']),
+              'has_event': set(['26', '27']),
              },
             ),
             ('Citation', _gen_extid(),
@@ -277,7 +289,7 @@
               'description': set([u'Organisation des réunions ...']),
               'description_format': set([u'text/plain']),
               'occupation_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['31']),
+              'has_citation': set(['33']),
               'equivalent_concept': set(['http://pifgadget.com']),
              },
             ),
@@ -290,7 +302,7 @@
                               u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                               u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
               'content_format': set([u'text/html']),
-              'has_citation': set(['33']),
+              'has_citation': set(['35']),
               'general_context_of': set(['FRAD033_EAC_00001']),
               }
             ),
@@ -347,6 +359,7 @@
             ),
             ('EACResourceRelation', _gen_extid(),
              {'agent_role': set([u'creatorOf']),
+              'attributes': set([u"{'{http://www.w3.org/1999/xlink}show': 'new', '{http://www.w3.org/1999/xlink}actuate': 'onRequest', '{http://www.w3.org/1999/xlink}type': 'simple', 'resourceRelationType': 'creatorOf'}"]), # noqa
               'resource_role': set([u'Fonds d\'archives']),
               'resource_relation_resource': set([
                   'http://gael.gironde.fr/ead.html?id=FRAD033_IR_N']),
@@ -442,12 +455,10 @@
         for tagname, sourceline in importer.not_visited():
             not_visited.setdefault(tagname, set([])).add(sourceline)
         self.assertEqual(not_visited,
-                         {'maintenanceStatus': set([12]),
-                          'publicationStatus': set([14]),
-                          'localControl': set([54]),
+                         {'localControl': set([54]),
                           'source': set([76]),  # empty.
-                          'structureOrGenealogy': set([189]),  # empty.
-                          'biogHist': set([229, 232]),  # empty.
+                          'structureOrGenealogy': set([203]),  # empty.
+                          'biogHist': set([243, 246]),  # empty.
                           })
 
     def test_mandate_under_mandates(self):
@@ -545,7 +556,7 @@
                                cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
             cnx.commit()
             created, updated = testutils.eac_import(cnx, fpath)
-            self.assertEqual(len(created), 50)
+            self.assertEqual(len(created), 52)
             self.assertEqual(updated, set())
             rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
             self.assertEqual(len(rset), 1)
@@ -690,7 +701,6 @@
         self.assertEqual(rrelation.xml_wrap.getvalue(),
                          '<he xmlns="urn:isbn:1-931666-33-4" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">joe</he>')  # noqa
         rset = cnx.find('EACFunctionRelation', r_type=u'performs')
-        self.assertEqual(len(rset), 1)
         func_relation = rset.one()
         self.assertEqual(func_relation.attributes,
                          u"{'{http://www.w3.org/1999/xlink}actuate': 'onLoad', '{http://www.w3.org/1999/xlink}arcrole': 'http://test_arcrole.lol.com', '{http://www.w3.org/1999/xlink}role': 'http://test_role.lmao.com'}") # noqa
@@ -703,7 +713,6 @@
         self.assertEqual(func_relation.function_relation_function[0].uri,
                          u'http://gael.gironde.fr/ead.html?id=FRAD033_IR_N')
         rset = cnx.find('EACFunctionRelation', r_type=u'controls')
-        self.assertEqual(len(rset), 1)
         func_relation = rset.one()
         self.assertEqual(func_relation.function_relation_agent[0], record)
         self.assertEqual(func_relation.function_relation_function[0].uri,
@@ -757,12 +766,15 @@
             self.assertEqual(cnx.find('AuthorityRecord').one().agent_kind[0].name,
                              'unknown-agent-kind')
 
-    def test_no_maintenance_tags(self):
+    def test_no_maintenance_agency_tag(self):
         with self.admin_access.repo_cnx() as cnx:
             with self.assertRaises(dataimport.MissingTag) as cm:
                 testutils.eac_import(cnx, self.datapath('no_maintenance_agency.xml'))
             self.assertEqual(cm.exception.tag, 'maintenanceAgency')
             self.assertEqual(cm.exception.tag_parent, 'control')
+
+    def test_no_maintenance_status_tag(self):
+        with self.admin_access.repo_cnx() as cnx:
             with self.assertRaises(dataimport.MissingTag) as cm:
                 testutils.eac_import(cnx, self.datapath('no_maintenance_status.xml'))
             self.assertEqual(cm.exception.tag, 'maintenanceStatus')
diff -r dd2a299dac08 -r 4fcb51b0d920 test/test_schema.py
--- a/test/test_schema.py	Tue Jun 25 14:05:50 2019 +0200
+++ b/test/test_schema.py	Tue Jun 25 18:02:18 2019 +0200
@@ -111,6 +111,7 @@
                                     set(['AuthorityRecord'])},
             'Convention': {('convention_of', 'subject'): set(['AuthorityRecord'])},
             'LanguageDec': {('language_declaration_of', 'subject'): set(['AuthorityRecord'])},
+            'Language': {('language_used_in', 'subject'): set(['AuthorityRecord'])},
             'EACSource': {('source_agent', 'subject'): set(['AuthorityRecord'])},
             'Event': {('has_event', 'object'): set(['History'])},
             'GeneralContext': {('general_context_of', 'subject'): set(['AuthorityRecord'])},


More information about the saem-devel mailing list