[PATCH eac V2] Add support for <chronList> under <biogHist> tag

Guillaume Vandevelde guillaume.vandevelde at logilab.fr
Wed Jul 10 10:08:08 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1560762819 -7200
#      Mon Jun 17 11:13:39 2019 +0200
# Node ID 8f92f8fb2ede33e2c0ec2956fc9d435a5987188c
# Parent  c6719ba14ca67516c6f2bd932fe562c2a593e376
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 8f92f8fb2ede
Add support for <chronList> under <biogHist> tag

Build a new Event entities as child of the History entity.

Added as a new entity for handling the case of multiples events under a <biogHist> tag.
Modified the `add_citation_for` decorator so it can handle any child appending.
Modified the History child creation and concatenation so it can use this system.

diff -r c6719ba14ca6 -r 8f92f8fb2ede cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py	Thu Jul 04 15:10:27 2019 +0200
+++ b/cubicweb_eac/dataimport.py	Mon Jun 17 11:13:39 2019 +0200
@@ -19,7 +19,7 @@
 
 from collections import deque
 import datetime
-from functools import wraps
+from functools import wraps, partial
 import inspect
 import logging
 from uuid import uuid4
@@ -40,7 +40,7 @@
 TYPE_MAPPING['human'] = u'person'
 
 ETYPES_ORDER_HINT = ('AgentKind', 'PhoneNumber', 'PostalAddress', 'AuthorityRecord',
-                     'AgentPlace', 'Mandate', 'LegalStatus', 'History',
+                     'AgentPlace', 'Mandate', 'LegalStatus', 'History', 'HistoricalEvent',
                      'Structure', 'AgentFunction', 'Occupation', 'GeneralContext',
                      'AssociationRelation', 'ChronologicalRelation', 'HierarchicalRelation',
                      'EACResourceRelation', 'ExternalUri', 'EACSource',
@@ -153,24 +153,29 @@
     return decorator
 
 
-def add_citations_for(etype):
+def add_child_for(etype, relation, builder):
     """Handle import of citation tag for `etype` ExtEntity that is yielded by
     decorated method.
     """
     def decorator(func):
         @wraps(func)
         def wrapper(self, elem):
+            build_child = getattr(self, builder)
             for extentity in func(self, elem):
                 if extentity.etype == etype:
-                    for citation in self.build_citation(elem):
+                    for child in build_child(elem):
                         extentity.values.setdefault(
-                            'has_citation', set()).add(citation.extid)
-                        yield citation
+                            relation, set()).add(child.extid)
+                        yield child
                 yield extentity
         return wrapper
     return decorator
 
 
+add_citation = partial(add_child_for, relation='has_citation', builder='build_citation')
+add_event = partial(add_child_for, relation='has_event', builder='build_event')
+
+
 def require_tag(tagname):
     """Method decorator handling a mandatory tag within a XML element."""
     def warn(self, elem):
@@ -522,7 +527,7 @@
     @elem_maybe_none
     @relate_to_record_through('LegalStatus', 'legal_status_agent')
     @filter_empty
-    @add_citations_for('LegalStatus')
+    @add_citation('LegalStatus')
     @equivalent_concept('eac:term', 'LegalStatus')
     def build_legal_status(self, elem, **kwargs):
         """Build a `LegalStatus` external entity.
@@ -545,7 +550,7 @@
     @elem_maybe_none
     @relate_to_record_through('Mandate', 'mandate_agent')
     @filter_empty
-    @add_citations_for('Mandate')
+    @add_citation('Mandate')
     @equivalent_concept('eac:term', 'Mandate')
     def build_mandate(self, elem, **kwargs):
         """Build a `Mandate` external entity.
@@ -583,7 +588,8 @@
             yield ExtEntity('Citation', self._gen_extid(), values)
 
     @relate_to_record_through('History', 'history_agent')
-    @add_citations_for('History')
+    @add_event('History')
+    @add_citation('History')
     @elem_maybe_none
     def build_history(self, elem):
         """Build a `History` external entity."""
@@ -596,6 +602,30 @@
                 values['abstract'] = set([text_type(abstract.text)])
             yield ExtEntity('History', self._gen_extid(), values)
 
+    @filter_none
+    @filter_empty
+    @elem_maybe_none
+    def build_event(self, elem):
+        """Build en `HistoricalEvent` external entity."""
+        citems = self._elem_findall(elem, './/eac:chronItem')
+        if citems is not None:
+            for citem in citems:
+                values = {}
+                date = self._elem_find(citem, 'eac:date')
+                date_range = self._elem_find(citem, 'eac:dateRange')
+                event = self._elem_find(citem, 'eac:event')
+                if event is not None and event.text:
+                    sentences = [text_type(line.strip()) for line in event.text.split('\n')]
+                    filtered = [e for e in sentences if e != '']
+                    values['event'] = set(["".join(filtered)])
+                    if date is None and date_range:
+                        date_range = self.parse_daterange(date_range)
+                        values.update(date_range)
+                    elif date is not None:
+                        values.update({'start_date': set([self.parse_date(date)]),
+                                       'end_date': set([self.parse_date(date)])})
+                    yield ExtEntity('HistoricalEvent', self._gen_extid(), values)
+
     @elem_maybe_none
     @relate_to_record_through('Structure', 'structure_agent')
     def build_structure(self, elem):
@@ -608,7 +638,7 @@
 
     @relate_to_record_through('AgentPlace', 'place_agent')
     @filter_empty
-    @add_citations_for('AgentPlace')
+    @add_citation('AgentPlace')
     @equivalent_concept('eac:placeEntry', 'AgentPlace')
     def build_place(self, elem):
         """Build a AgentPlace external entity"""
@@ -639,7 +669,7 @@
 
     @relate_to_record_through('AgentFunction', 'function_agent')
     @filter_empty
-    @add_citations_for('AgentFunction')
+    @add_citation('AgentFunction')
     @equivalent_concept('eac:term', 'AgentFunction')
     def build_function(self, elem):
         """Build a `AgentFunction`s external entities"""
@@ -651,7 +681,7 @@
 
     @relate_to_record_through('Occupation', 'occupation_agent')
     @filter_empty
-    @add_citations_for('Occupation')
+    @add_citation('Occupation')
     @equivalent_concept('eac:term', 'Occupation')
     def build_occupation(self, elem):
         """Build a `Occupation`s external entities"""
@@ -666,7 +696,7 @@
         yield ExtEntity('Occupation', self._gen_extid(), values)
 
     @relate_to_record_through('GeneralContext', 'general_context_of')
-    @add_citations_for('GeneralContext')
+    @add_citation('GeneralContext')
     def build_generalcontext(self, elem):
         """Build a `GeneralContext` external entity"""
         content, content_format = self.parse_tag_content(elem)
diff -r c6719ba14ca6 -r 8f92f8fb2ede cubicweb_eac/migration/0.9.0_Any.py
--- a/cubicweb_eac/migration/0.9.0_Any.py	Thu Jul 04 15:10:27 2019 +0200
+++ b/cubicweb_eac/migration/0.9.0_Any.py	Mon Jun 17 11:13:39 2019 +0200
@@ -1,1 +1,2 @@
 add_attribute('History', 'abstract')
+add_entity_type('HistoricalEvent')
diff -r c6719ba14ca6 -r 8f92f8fb2ede cubicweb_eac/schema.py
--- a/cubicweb_eac/schema.py	Thu Jul 04 15:10:27 2019 +0200
+++ b/cubicweb_eac/schema.py	Mon Jun 17 11:13:39 2019 +0200
@@ -265,6 +265,22 @@
     text = RichString(fulltextindexed=True)
 
 
+ at dated_entity_type
+class HistoricalEvent(EntityType):
+    """Events linked to an History object"""
+    event = RichString(fulltextindexed=True)
+    place_entry = String(fulltextindexed=True)
+
+
+class has_event(RelationDefinition):
+    subject = 'History'
+    object = 'HistoricalEvent'
+    cardinality = '*1'
+    composite = 'subject'
+    fulltext_container = 'subject'
+    description = _('HistoricalEvent with date for describing an historical event')
+
+
 class Structure(EntityType):
     """Information about the structure of an authority"""
     description = RichString(fulltextindexed=True)
diff -r c6719ba14ca6 -r 8f92f8fb2ede test/data/FRAD033_EAC_00001_simplified.xml
--- a/test/data/FRAD033_EAC_00001_simplified.xml	Thu Jul 04 15:10:27 2019 +0200
+++ b/test/data/FRAD033_EAC_00001_simplified.xml	Mon Jun 17 11:13:39 2019 +0200
@@ -199,6 +199,30 @@
           <p>L'inspecteur Canardo</p>
           <citation xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="http://www.assemblee-nationale.fr/histoire/images-decentralisation/decentralisation/loi-du-22-decembre-1789-.pdf" xlink:type="simple"></citation>
           <citation xlink:href="http://pifgadget">Voir aussi pifgadget</citation>
+	  <chronList>
+	    <chronItem>
+	      <date standardDate="1957">1957
+	      </date>
+	      <event>Left Mer and moved to the mainland.
+	      Worked at various jobs including canecutter
+	      and railway labourer.
+	      </event>
+	    </chronItem>
+	    <chronItem>
+	      <dateRange>
+		<fromDate
+		    standardDate="1960">1960
+		</fromDate>
+		<toDate standardDate="1961">1961
+		</toDate>
+	      </dateRange>
+	      <event>Union representative, Townsville-
+	      Mount Isa rail construction project.
+	      </event>
+	    </chronItem>
+	  </chronList>
+	  <chronList></chronList>
+	  <chronList><chronItem></chronItem></chronList>
       </biogHist>
 
       <!--empty biogHist-->
diff -r c6719ba14ca6 -r 8f92f8fb2ede test/test_dataimport.py
--- a/test/test_dataimport.py	Thu Jul 04 15:10:27 2019 +0200
+++ b/test/test_dataimport.py	Mon Jun 17 11:13:39 2019 +0200
@@ -182,7 +182,8 @@
              },
             ),
             ('History', _gen_extid(),
-             {'text': set(["\n".join((
+             {'abstract': set([u'Test of an abstract element']),
+              'text': set(["\n".join((
                      u'<p xmlns="urn:isbn:1-931666-33-4" '
                      u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                      u'xmlns:xlink="http://www.w3.org/1999/xlink">{0}</p>'
@@ -192,7 +193,7 @@
               'text_format': set([u'text/html']),
               'history_agent': set(['FRAD033_EAC_00001']),
               'has_citation': set(['16', '17']),
-              'abstract': set([u'Test of an abstract element']),
+              'has_event': set(['18', '19']),
              },
             ),
             ('Citation', _gen_extid(),
@@ -201,6 +202,19 @@
             ('Citation', _gen_extid(),
              {'uri': set(['http://pifgadget']), 'note': set(['Voir aussi pifgadget'])},
             ),
+            ('HistoricalEvent', _gen_extid(),
+             {'event': [u'Left Mer and moved to the mainland.'
+                        u'Worked at various jobs including canecutter'
+                        u'and railway labourer.'],
+              'end_date': set([datetime.date(1957, 1, 1)]),
+              'start_date': set([datetime.date(1957, 1, 1)])}
+            ),
+            ('HistoricalEvent', _gen_extid(),
+             {'event': set([u'Union representative, Townsville-'
+                            u'Mount Isa rail construction project.']),
+              'end_date': set([datetime.date(1961, 1, 1)]),
+              'start_date': set([datetime.date(1960, 1, 1)])}
+            ),
             ('Structure', _gen_extid(),
              {'description': set([u'<p xmlns="urn:isbn:1-931666-33-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">Pour accomplir ses missions ...</p>']),  # noqa
               'description_format': set([u'text/html']),
@@ -238,7 +252,7 @@
               'description': set([u'Organisation des réunions ...']),
               'description_format': set([u'text/plain']),
               'occupation_agent': set(['FRAD033_EAC_00001']),
-              'has_citation': set(['23']),
+              'has_citation': set(['25']),
               'equivalent_concept': set(['http://pifgadget.com']),
              },
             ),
@@ -251,7 +265,7 @@
                               u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
                               u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
               'content_format': set([u'text/html']),
-              'has_citation': set(['25']),
+              'has_citation': set(['27']),
               'general_context_of': set(['FRAD033_EAC_00001']),
               }
             ),
@@ -335,7 +349,7 @@
             ('ExternalUri', 'http://catalogue.bnf.fr/ark:/12148/cb152418385',
              {'uri': set([u'http://catalogue.bnf.fr/ark:/12148/cb152418385']),
               'cwuri': set([u'http://catalogue.bnf.fr/ark:/12148/cb152418385'])},
-            ),
+           ),
             ('ExternalUri', 'http://pifgadget.com',
              {'uri': set([u'http://pifgadget.com']),
               'cwuri': set([u'http://pifgadget.com'])},
@@ -367,7 +381,7 @@
                           'localControl': set([54]),
                           'source': set([76]),  # empty.
                           'structureOrGenealogy': set([189]),  # empty.
-                          'biogHist': set([205, 208]),  # empty.
+                          'biogHist': set([229, 232]),  # empty.
                           })
 
     def test_mandate_under_mandates(self):
@@ -465,7 +479,7 @@
                                cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
             cnx.commit()
             created, updated = testutils.eac_import(cnx, fpath)
-            self.assertEqual(len(created), 39)
+            self.assertEqual(len(created), 41)
             self.assertEqual(updated, set())
             rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
             self.assertEqual(len(rset), 1)
@@ -509,6 +523,8 @@
         self.assertEqual(entity.printable_value('text',
                                                 format=u'text/plain').strip(),
                          u"La loi du 22 décembre 1789, en divisant ...\n\nL'inspecteur Canardo")
+        events = rset.one().has_event
+        self.assertEqual(len(events), 2)
 
     def _check_mandate(self, cnx, record):
         rset = cnx.find('Mandate', mandate_agent=record)
diff -r c6719ba14ca6 -r 8f92f8fb2ede test/test_schema.py
--- a/test/test_schema.py	Thu Jul 04 15:10:27 2019 +0200
+++ b/test/test_schema.py	Mon Jun 17 11:13:39 2019 +0200
@@ -108,6 +108,7 @@
             'EACResourceRelation': {('resource_relation_agent', 'subject'):
                                     set(['AuthorityRecord'])},
             'EACSource': {('source_agent', 'subject'): set(['AuthorityRecord'])},
+            'HistoricalEvent': {('has_event', 'object'): set(['History'])},
             'GeneralContext': {('general_context_of', 'subject'): set(['AuthorityRecord'])},
             'History': {('history_agent', 'subject'): set(['AuthorityRecord'])},
             'LegalStatus': {('legal_status_agent', 'subject'): set(['AuthorityRecord'])},


More information about the saem-devel mailing list