[PATCH 2 of 3 eac] Add support for <chronList> under <biogHist> tag

Guillaume Vandevelde guillaume.vandevelde at logilab.fr
Thu Jul 4 11:21:53 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1560521034 -7200
#      Fri Jun 14 16:03:54 2019 +0200
# Node ID 7049863207c0a5bd19efa5d4291d59229243473b
# Parent  fbf72d6e87720d71bda8b13ad629e088b83009ab
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 7049863207c0
Add support for <chronList> under <biogHist> tag

Build a new Event entities as child of the History entity.

Added as a new entity for handling the case of multiples events under a <biogHist> tag.

The implementation will change in later commits. Replaced the filters and maps by list comprehension and modified the child support so it can be handled like the `Citation` entity.

Differential Revision: https://phab.logilab.fr/D3445

diff -r fbf72d6e8772 -r 7049863207c0 cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py	Thu Jun 13 11:49:59 2019 +0200
+++ b/cubicweb_eac/dataimport.py	Fri Jun 14 16:03:54 2019 +0200
@@ -40,7 +40,7 @@
 TYPE_MAPPING['human'] = u'person'
 
 ETYPES_ORDER_HINT = ('AgentKind', 'PhoneNumber', 'PostalAddress', 'AuthorityRecord',
-                     'AgentPlace', 'Mandate', 'LegalStatus', 'History',
+                     'AgentPlace', 'Mandate', 'LegalStatus', 'History', 'Event',
                      'Structure', 'AgentFunction', 'Occupation', 'GeneralContext',
                      'AssociationRelation', 'ChronologicalRelation', 'HierarchicalRelation',
                      'EACResourceRelation', 'ExternalUri', 'EACSource',
@@ -594,7 +594,40 @@
                       'text_format': set([desc_format])}
             if abstract is not None and abstract.text:
                 values['abstract'] = set([text_type(abstract.text)])
-            yield ExtEntity('History', self._gen_extid(), values)
+            history_obj = ExtEntity('History', self._gen_extid(), values)
+            # Handle <chronItem>
+            events = self._elem_findall(elem, './/eac:chronItem')
+            if events is not None:
+                events_ent = filter(lambda x: x is not None and x.values,
+                                    [self.build_event(e) for e in events])
+                r_etypes = [e.extid for e in events_ent]
+                map(history_obj.values.setdefault(
+                    'has_event', set()).add, r_etypes)
+                yield history_obj
+                for e in events_ent:
+                    yield e
+            else:
+                yield history_obj
+
+    def build_event(self, elem):
+        """Build en `Event` external entity."""
+        values = {}
+        date = self._elem_find(elem, 'eac:date')
+        date_range = self._elem_find(elem, 'eac:dateRange')
+        event = self._elem_find(elem, 'eac:event')
+        if event is not None and event.text:
+            sentences = filter((lambda x: x != ''),
+                               map(text_type,
+                               (map(str.strip, event.text.split('\n')))))
+            values['event'] = set(["".join(sentences)])
+        if date is None and date_range:
+            date_range = self.parse_daterange(date_range)
+            values.update(date_range)
+        elif date is not None:
+            values.update({'start_date': set([self.parse_date(date)]),
+                           'end_date': set([self.parse_date(date)])})
+        if values:
+            return ExtEntity('Event', self._gen_extid(), values)
 
     @elem_maybe_none
     @relate_to_record_through('Structure', 'structure_agent')
diff -r fbf72d6e8772 -r 7049863207c0 cubicweb_eac/migration/0.9.0_Any.py
--- a/cubicweb_eac/migration/0.9.0_Any.py	Thu Jun 13 11:49:59 2019 +0200
+++ b/cubicweb_eac/migration/0.9.0_Any.py	Fri Jun 14 16:03:54 2019 +0200
@@ -1,1 +1,2 @@
 add_attribute('History', 'abstract')
+add_entity_type('Event')
diff -r fbf72d6e8772 -r 7049863207c0 cubicweb_eac/schema.py
--- a/cubicweb_eac/schema.py	Thu Jun 13 11:49:59 2019 +0200
+++ b/cubicweb_eac/schema.py	Fri Jun 14 16:03:54 2019 +0200
@@ -265,6 +265,22 @@
     text = RichString(fulltextindexed=True)
 
 
+ at dated_entity_type
+class Event(EntityType):
+    """Number of dates and event description linked to an History object"""
+    event = RichString(fulltextindexed=True)
+    place_entry = String(fulltextindexed=True)
+
+
+class has_event(RelationDefinition):
+    subject = 'History'
+    object = 'Event'
+    cardinality = '*1'
+    composite = 'subject'
+    fulltext_container = 'subject'
+    description = _('Event.s with date.s for describing an historical event')
+
+
 class Structure(EntityType):
     """Information about the structure of an authority"""
     description = RichString(fulltextindexed=True)
diff -r fbf72d6e8772 -r 7049863207c0 test/data/FRAD033_EAC_00001_simplified.xml
--- a/test/data/FRAD033_EAC_00001_simplified.xml	Thu Jun 13 11:49:59 2019 +0200
+++ b/test/data/FRAD033_EAC_00001_simplified.xml	Fri Jun 14 16:03:54 2019 +0200
@@ -199,6 +199,30 @@
           <p>L'inspecteur Canardo</p>
           <citation xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.assemblee-nationale.fr/histoire/images-decentralisation/decentralisation/loi-du-22-decembre-1789-.pdf" xlink:type="simple"></citation>
           <citation xlink:href="http://pifgadget">Voir aussi pifgadget</citation>
+	  <chronList>
+	    <chronItem>
+	      <date standardDate="1957">1957
+	      </date>
+	      <event>Left Mer and moved to the mainland.
+	      Worked at various jobs including canecutter
+	      and railway labourer.
+	      </event>
+	    </chronItem>
+	    <chronItem>
+	      <dateRange>
+		<fromDate
+		    standardDate="1960">1960
+		</fromDate>
+		<toDate standardDate="1961">1961
+		</toDate>
+	      </dateRange>
+	      <event>Union representative, Townsville-
+	      Mount Isa rail construction project.
+	      </event>
+	    </chronItem>
+	  </chronList>
+	  <chronList></chronList>
+	  <chronList><chronItem></chronItem></chronList>
       </biogHist>
 
       <!--empty biogHist-->
diff -r fbf72d6e8772 -r 7049863207c0 test/test_dataimport.py
--- a/test/test_dataimport.py	Thu Jun 13 11:49:59 2019 +0200
+++ b/test/test_dataimport.py	Fri Jun 14 16:03:54 2019 +0200
@@ -182,7 +182,8 @@
              },
             ),
             ('History', _gen_extid(),
-             {'text': set(["\n".join((
+             {'abstract': set([u'Test of an abstract element']),
+              'text': set(["\n".join((
                      u'<p xmlns="urn:isbn:1-931666-33-4" '
                      u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                      u'xmlns:xlink="http://www.w3.org/1999/xlink">{0}</p>'
@@ -191,10 +192,23 @@
               ]),
               'text_format': set([u'text/html']),
               'history_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['16', '17']),
-              'abstract': set([u'Test of an abstract element']),
+              'has_citation': set(['18', '19']),
+              'has_event': set(['16', '17']),
              },
             ),
+            ('Event', _gen_extid(),
+             {'event': [u'Left Mer and moved to the mainland.'
+                        u'Worked at various jobs including canecutter'
+                        u'and railway labourer.'],
+              'end_date': set([datetime.date(1957, 1, 1)]),
+              'start_date': set([datetime.date(1957, 1, 1)])}
+            ),
+            ('Event', _gen_extid(),
+             {'event': set([u'Union representative, Townsville-'
+                            u'Mount Isa rail construction project.']),
+              'end_date': set([datetime.date(1961, 1, 1)]),
+              'start_date': set([datetime.date(1960, 1, 1)])}
+            ),
             ('Citation', _gen_extid(),
              {'uri': set(['http://www.assemblee-nationale.fr/histoire/images-decentralisation/decentralisation/loi-du-22-decembre-1789-.pdf'])},  # noqa
             ),
@@ -238,7 +252,7 @@
               'description': set([u'Organisation des réunions ...']),
               'description_format': set([u'text/plain']),
               'occupation_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['23']),
+              'has_citation': set(['25']),
               'equivalent_concept': set(['http://pifgadget.com']),
              },
             ),
@@ -251,7 +265,7 @@
                               u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                               u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
               'content_format': set([u'text/html']),
-              'has_citation': set(['25']),
+              'has_citation': set(['27']),
               'general_context_of': set(['FRAD033_EAC_00001']),
               }
             ),
@@ -367,7 +381,7 @@
                           'localControl': set([54]),
                           'source': set([76]),  # empty.
                           'structureOrGenealogy': set([189]),  # empty.
-                          'biogHist': set([205, 208]),  # empty.
+                          'biogHist': set([229, 232]),  # empty.
                           })
 
     def test_mandate_under_mandates(self):
@@ -465,7 +479,7 @@
                                cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
             cnx.commit()
             created, updated = testutils.eac_import(cnx, fpath)
-            self.assertEqual(len(created), 39)
+            self.assertEqual(len(created), 41)
             self.assertEqual(updated, set())
             rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
             self.assertEqual(len(rset), 1)
@@ -509,6 +523,18 @@
                                                     format=u'text/plain').strip(),
                          u"La loi du 22 décembre 1789, en divisant ...\n\nL'inspecteur Canardo")
 
+    def _check_event(self, cnx, record):
+        rset = cnx.find('Event', record)
+        self.assertEqual(len(rset), 2)
+        self.assertEqual(rset[0].printable_value('text',
+                                                 format=u'text/plain').strip(),
+                         u'Left Mer and moved to the mainland.'
+                         u'Worked at various jobs including canecutter'
+                         u'and railway labourer.')
+        self.assertEqual(rset[1].printable_value('text',
+                                                 format=u'text/plain').strip(),
+                         u'Union representative, Townsville-\nMount Isa rail construction project.')
+
     def _check_mandate(self, cnx, record):
         rset = cnx.find('Mandate', mandate_agent=record)
         self.assertEqual(len(rset), 1)
diff -r fbf72d6e8772 -r 7049863207c0 test/test_schema.py
--- a/test/test_schema.py	Thu Jun 13 11:49:59 2019 +0200
+++ b/test/test_schema.py	Fri Jun 14 16:03:54 2019 +0200
@@ -108,6 +108,7 @@
             'EACResourceRelation': {('resource_relation_agent', 'subject'):
                                     set(['AuthorityRecord'])},
             'EACSource': {('source_agent', 'subject'): set(['AuthorityRecord'])},
+            'Event': {('has_event', 'object'): set(['History'])},
             'GeneralContext': {('general_context_of', 'subject'): set(['AuthorityRecord'])},
             'History': {('history_agent', 'subject'): set(['AuthorityRecord'])},
             'LegalStatus': {('legal_status_agent', 'subject'): set(['AuthorityRecord'])},


More information about the saem-devel mailing list