[PATCH 12 of 14 eac] Add support for <list>, <item> and <dateList>

Guillaume Vandevelde guillaume.vandevelde at logilab.fr
Tue Jul 2 14:54:01 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1561650369 -7200
#      Thu Jun 27 17:46:09 2019 +0200
# Node ID 907de60ffc8a32d6d3d0dea1207c456edce52ae9
# Parent  4e7a62a1630f3e0e254c36640d267744ad9276cc
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 907de60ffc8a
Add support for <list>, <item> and <dateList>

Support new tags imported as childs, next step is including them in every parent that support them.

Differential Revision: https://phab.logilab.fr/D3636

diff -r 4e7a62a1630f -r 907de60ffc8a cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py	Thu Jun 27 12:32:13 2019 +0200
+++ b/cubicweb_eac/dataimport.py	Thu Jun 27 17:46:09 2019 +0200
@@ -41,7 +41,7 @@
 
 ETYPES_ORDER_HINT = ('AgentKind', 'PhoneNumber', 'PostalAddress', 'AuthorityRecord',
                      'Convention', 'LanguageDec', 'Language', 'AgentPlace', 'MaintenanceAg',
-                     'Mandate', 'LegalStatus', 'History', 'Event', 'Structure', 'AgentFunction',
+                     'Mandate', 'LegalStatus', 'History', 'Structure', 'AgentFunction',
                      'Occupation', 'GeneralContext', 'AssociationRelation', 'ChronologicalRelation',
                      'HierarchicalRelation', 'EACResourceRelation', 'EACFunctionRelation',
                      'ParallelNames', 'ExternalUri', 'EACSource', 'Activity')
@@ -185,7 +185,8 @@
             builders = {'has_citation': self.build_citation,
                         'has_event': self.build_event,
                         'simple_name_relation': self.build_parallel_name_entry,
-                        'date_relation': self.build_parallel_date_entry}
+                        'date_relation': self.build_parallel_date_entry,
+                        'has_item': self.build_item}
             build_child = builders[relation]
             for extentity in func(self, elem):
                 if extentity.etype == etype:
@@ -446,21 +447,32 @@
     @add_child_for('ParallelNames', 'simple_name_relation')
     @add_child_for('ParallelNames', 'date_relation')
     def build_parallel(self, elem):
+        """For each nameEntryParallel build a new object linked to the
+        EAC-CPF document and get relations for all childrens"""
         values = self.values_from_xpaths(elem, (('authorized_form', 'eac:authorizedForm'),
                                                 ('alternative_form', 'eac:alternativeForm')))
         yield ExtEntity('ParallelNames', self._gen_extid(), values)
 
+    def extract_dates_from(self, elem, tag):
+        for date in self.find_nested(elem, 'eac:date', tag):
+            yield ExtEntity('DateEntity', self._gen_extid(),
+                            {'start_date': set([self.parse_date(date)]),
+                             'end_date': set([self.parse_date(date)])})
+        for date_range in self.find_nested(elem, 'eac:dateRange', tag):
+            yield ExtEntity('DateEntity', self._gen_extid(), self.parse_daterange(date_range))
+
     @filter_empty
     @filter_none
     def build_parallel_date_entry(self, elem):
-        for date in self.find_nested(elem, 'eac:date', 'eac:useDates'):
-            yield ExtEntity('DateEntity', self._gen_extid(),
-                            {'start_date': set([self.parse_date(date)]),
-                             'end_date': set([self.parse_date(date)])})
-        for date_range in self.find_nested(elem, 'eac:dateRange', 'eac:useDates'):
-            yield ExtEntity('DateEntity', self._gen_extid(), self.parse_daterange(date_range))
+        """Build DateEntity linked to a ParallelNames entity"""
+        for extentity in self.extract_dates_from(elem, 'eac:useDates'):
+            yield extentity
+        for date_set in self._elem_findall(elem, 'eac:dateSet'):
+            for extentity in self.extract_dates_from(self, date_set, 'eac:dateSet'):
+                yield extentity
 
     def build_parallel_name_entry(self, elem):
+        """Build NameEntryS linked to a ParallelNames entity"""
         for elem in self._elem_findall(elem, 'eac:nameEntry'):
             values = self.values_from_attrib(elem, (('language', 'lang'),
                                                     ('script_code', 'scriptCode')))
@@ -525,6 +537,16 @@
             for extentity in self.build_generalcontext(context):
                 yield extentity
 
+    def build_item(self, elem):
+        for item in self.find_nested(elem, 'eac:item', 'eac:list'):
+            values = {}
+            if item is not None and item.text.strip():
+                values['content'] = set([text_type(item.text.strip())])
+            span = self._elem_find(item, 'eac:span')
+            if span is not None:
+                values['span'] = set([text_type(etree.tostring(span).strip())])
+            yield ExtEntity('Item', self._gen_extid(), values)
+
     def find_nested(self, elem, tagname, innertag):
         """Return a list of element with `tagname` within `element` possibly
         nested within `innertag`.
@@ -669,6 +691,7 @@
             yield ExtEntity('Citation', self._gen_extid(), values)
 
     @relate_to_record_through('History', 'history_agent')
+    @add_child_for('History', 'has_item')
     @add_child_for('History', 'has_event')
     @add_child_for('History', 'has_citation')
     @elem_maybe_none
@@ -1058,9 +1081,8 @@
                     values['start'] = set([event_date])
                     values['end'] = set([event_date])
         values.update(self.parse_tag_description(elem, 'eac:eventDescription'))
-        agent = self._elem_find(elem, 'eac:agent')
-        if agent is not None and agent.text:
-            values['agent'] = set([text_type(agent.text)])
+        values.update(self.values_from_xpaths(elem, (('agent', 'eac:agent'),
+                                                     ('agent_type', 'eac:agentType'))))
         yield ExtEntity('Activity', self._gen_extid(), values)
 
     @relate_to_record_through('MaintenanceAg', 'agency_of')
diff -r 4e7a62a1630f -r 907de60ffc8a cubicweb_eac/migration/0.9.0_Any.py
--- a/cubicweb_eac/migration/0.9.0_Any.py	Thu Jun 27 12:32:13 2019 +0200
+++ b/cubicweb_eac/migration/0.9.0_Any.py	Thu Jun 27 17:46:09 2019 +0200
@@ -4,7 +4,6 @@
 add_entity_type('Language')
 add_entity_type('LanguageDec')
 add_entity_type('ParallelNames')
-add_entity_type('EACFunctionRelation')
 add_entity_type('MaintenanceAg')
 add_attribute('MaintenanceAg', 'maintenance_status')
 add_attribute('MaintenanceAg', 'publication_status')
@@ -18,3 +17,5 @@
 add_attribute('NameEntry', 'authorized_form')
 add_attribute('NameEntry', 'script_code')
 add_entity_type('DateEntity')
+add_entity_type('Item')
+add_attribute('Activity', 'agent_type')
diff -r 4e7a62a1630f -r 907de60ffc8a cubicweb_eac/schema.py
--- a/cubicweb_eac/schema.py	Thu Jun 27 12:32:13 2019 +0200
+++ b/cubicweb_eac/schema.py	Thu Jun 27 17:46:09 2019 +0200
@@ -51,6 +51,8 @@
 
 Activity.add_relation(String(description=_('the agent responsible for this activity'),
                              indexed=True, fulltextindexed=True), name='agent')
+Activity.add_relation(String(description=_('the type of the agent linked to the activity'),
+                             indexed=True, fulltextindexed=True), name='agent_type')
 
 
 @dated_entity_type
@@ -89,6 +91,19 @@
     inlined = True
 
 
+class Item(EntityType):
+    content = RichString(fulltextindexed=True)
+    span = RichString(fulltextindexed=True)
+
+
+class has_item(RelationDefinition):
+    subject = 'History'
+    object = 'Item'
+    cardinality = '*1'
+    composite = 'subject'
+    fulltext_container = 'subject'
+
+
 class ParallelNames(EntityType):
     authorized_form = String(fulltextindexed=True)
     alternative_form = String(fulltextindexed=True)
diff -r 4e7a62a1630f -r 907de60ffc8a test/data/FRAD033_EAC_00001_simplified.xml
--- a/test/data/FRAD033_EAC_00001_simplified.xml	Thu Jun 27 12:32:13 2019 +0200
+++ b/test/data/FRAD033_EAC_00001_simplified.xml	Thu Jun 27 17:46:09 2019 +0200
@@ -162,7 +162,7 @@
 	    <toDate>open
 	    </toDate>
 	  </dateRange>
-	  </useDates>
+	  </useDates>	  
 	</nameEntryParallel>
     </identity>
 
@@ -281,6 +281,26 @@
 	  </chronList>
 	  <chronList></chronList>
 	  <chronList><chronItem></chronItem></chronList>
+	  <list>
+	    <item>
+	      <span style="font-
+			   style:italic">1450-1950
+	      </span>
+	      (1929)
+	    </item>
+	    <item>
+	      <span style="font-style:italic">Globe
+	      Gliding
+	      </span>
+	      (1930)
+	    </item>
+	    <item>
+	      <span style="font-
+			   style:italic">Gems
+	      </span>
+	      (1931)
+	    </item>
+	  </list>
       </biogHist>
 
       <!--empty biogHist-->
diff -r 4e7a62a1630f -r 907de60ffc8a test/test_dataimport.py
--- a/test/test_dataimport.py	Thu Jun 27 12:32:13 2019 +0200
+++ b/test/test_dataimport.py	Thu Jun 27 17:46:09 2019 +0200
@@ -108,6 +108,7 @@
             ),
             ('Activity', _gen_extid(),
              {'type': set([u'create']),
+              'agent_type': [u'human'],
               'generated': set(['FRAD033_EAC_00001']),
               'start': set([datetime.datetime(2013, 4, 24, 5, 34, 41)]),
               'end': set([datetime.datetime(2013, 4, 24, 5, 34, 41)]),
@@ -118,6 +119,7 @@
             ('Activity', _gen_extid(),
              {'generated': set(['FRAD033_EAC_00001']),
               'type': set([u'modify']),
+              'agent_type': [u'human'],
               'start': set([datetime.datetime(2015, 1, 15, 7, 16, 33)]),
               'end': set([datetime.datetime(2015, 1, 15, 7, 16, 33)]),
               'agent': set([u'Delphine Jamet'])
@@ -152,7 +154,7 @@
               'description_format': set([u'text/html']), 'description': set([u'<p xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">Norme ISO 8601 :2004 \xc9l\xe9ments de donn\xe9es et formats d\u2019\xe9change -- \xc9change\n          d\u2019information -- Repr\xe9sentation de la date et de l\u2019heure.</p>']), # noqa
              },
             ),
-            ('AgentKind', 'agentkind/authority',
+            ('AgentKind', u'agentkind/authority',
              {'name': set([u'authority'])},
             ),
             ('NameEntry', _gen_extid(),
@@ -275,6 +277,7 @@
               'history_agent': set(['FRAD033_EAC_00001']),
               'has_citation': set(['32', '33']),
               'has_event': set(['35', '34']),
+              'has_item': set(['38', '37', '36'])
              },
             ),
             ('Citation', _gen_extid(),
@@ -293,6 +296,18 @@
               'end_date': set([datetime.date(1961, 1, 1)]),
               'start_date': set([datetime.date(1960, 1, 1)])}
             ),
+            ('Item', _gen_extid(),
+             {'span': set([u'<span xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" style="font-       style:italic">1450-1950\n\t      </span>\n\t      (1929)']) # noqa
+              },
+            ),
+            ('Item', _gen_extid(),
+             {'span': set([u'<span xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" style="font-style:italic">Globe\n\t      Gliding\n\t      </span>\n\t      (1930)']) # noqa
+             },
+            ),
+            ('Item', _gen_extid(),
+             {'span': set([u'<span xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" style="font-       style:italic">Gems\n\t      </span>\n\t      (1931)']) # noqa
+             },
+            ),
             ('Structure', _gen_extid(),
              {'description': set([u'<p xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">Pour accomplir ses missions ...</p>']),  # noqa
               'description_format': set([u'text/html']),
@@ -330,7 +345,7 @@
               'description': set([u'Organisation des réunions ...']),
               'description_format': set([u'text/plain']),
               'occupation_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['41']),
+              'has_citation': set(['44']),
               'equivalent_concept': set(['http://pifgadget.com']),
              },
             ),
@@ -343,7 +358,7 @@
                               u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                               u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
               'content_format': set([u'text/html']),
-              'has_citation': set(['43']),
+              'has_citation': set(['46']),
               'general_context_of': set(['FRAD033_EAC_00001']),
               }
             ),
@@ -499,7 +514,7 @@
                          {'localControl': set([54]),
                           'source': set([76]),  # empty.
                           'structureOrGenealogy': set([247]),  # empty.
-                          'biogHist': set([287, 290]),  # empty.
+                          'biogHist': set([307, 310]),  # empty.
                           })
 
     def test_mandate_under_mandates(self):
@@ -597,7 +612,7 @@
                                cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
             cnx.commit()
             created, updated = testutils.eac_import(cnx, fpath)
-            self.assertEqual(len(created), 60)
+            self.assertEqual(len(created), 63)
             self.assertEqual(updated, set())
             rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
             self.assertEqual(len(rset), 1)
diff -r 4e7a62a1630f -r 907de60ffc8a test/test_schema.py
--- a/test/test_schema.py	Thu Jun 27 12:32:13 2019 +0200
+++ b/test/test_schema.py	Thu Jun 27 17:46:09 2019 +0200
@@ -126,6 +126,7 @@
             'Structure': {('structure_agent', 'subject'): set(['AuthorityRecord'])},
             'ParallelNames': {('parallel_names_of', 'subject'): set(['AuthorityRecord'])},
             'NameEntryS': {('simple_name_relation', 'object'): set(['ParallelNames'])},
+            'Item': {('has_item', 'object'): set(['History'])}
         }
         struct = dict(
             (k, dict((rel, set(targets)) for rel, targets in v.items()))


More information about the saem-devel mailing list