[PATCH 2 of 3 eac] Add support for <chronList> under <biogHist> tag

Denis Laxalde denis.laxalde at logilab.fr
Thu Jul 4 15:56:13 CEST 2019


Guillaume Vandevelde a écrit :
> # HG changeset patch
> # User Guillaume Vandevelde <gvandevelde at logilab.fr>
> # Date 1560521034 -7200
> #      Fri Jun 14 16:03:54 2019 +0200
> # Node ID 7049863207c0a5bd19efa5d4291d59229243473b
> # Parent  fbf72d6e87720d71bda8b13ad629e088b83009ab
> # Available At http://hg.logilab.org/review/cubes/eac
> #              hg pull http://hg.logilab.org/review/cubes/eac -r 7049863207c0
> Add support for <chronList> under <biogHist> tag
> 
> Build a new Event entities as child of the History entity.
> 
> Added as a new entity for handling the case of multiples events under a <biogHist> tag.
> 
> The implementation will change in later commits. Replaced the filters and maps by list comprehension and modified the child support so it can be handled like the `Citation` entity.
> 
> Differential Revision: https://phab.logilab.fr/D3445

I get a test failure:

  __________________________________________ EACExportTC.test_richstring_rest ___________________________________________
  
  self = <test_entities.EACExportTC testMethod=test_richstring_rest>
  
      def test_richstring_rest(self):
          with self.admin_access.cnx() as cnx:
              alice = testutils.authority_record(cnx, u'Alice')
              desc = u'`pif <http://gadget.com>`_ is *red*'
              desc_html = (
                  u'<a class="reference" href="http://gadget.com">pif</a> '
                  u'is <em>red</em>'
              )
              mandate = cnx.create_entity(
                  'Mandate', term=u'w',
                  description=desc, description_format=u'text/rest',
                  mandate_agent=alice)
              cnx.commit()
              serializer = alice.cw_adapt_to('EAC-CPF')
              ptag, = serializer._eac_richstring_paragraph_elements(
  >               mandate, 'description')
  E           ValueError: need more than 0 values to unpack
  
  test/test_entities.py:113: ValueError

Also, can you run i18ncube and update messages catalog?

See other remarks below.

> diff -r fbf72d6e8772 -r 7049863207c0 cubicweb_eac/dataimport.py
> --- a/cubicweb_eac/dataimport.py	Thu Jun 13 11:49:59 2019 +0200
> +++ b/cubicweb_eac/dataimport.py	Fri Jun 14 16:03:54 2019 +0200
> @@ -40,7 +40,7 @@
>  TYPE_MAPPING['human'] = u'person'
>  
>  ETYPES_ORDER_HINT = ('AgentKind', 'PhoneNumber', 'PostalAddress', 'AuthorityRecord',
> -                     'AgentPlace', 'Mandate', 'LegalStatus', 'History',
> +                     'AgentPlace', 'Mandate', 'LegalStatus', 'History', 'Event',
>                       'Structure', 'AgentFunction', 'Occupation', 'GeneralContext',
>                       'AssociationRelation', 'ChronologicalRelation', 'HierarchicalRelation',
>                       'EACResourceRelation', 'ExternalUri', 'EACSource',
> @@ -594,7 +594,40 @@
>                        'text_format': set([desc_format])}
>              if abstract is not None and abstract.text:
>                  values['abstract'] = set([text_type(abstract.text)])
> -            yield ExtEntity('History', self._gen_extid(), values)
> +            history_obj = ExtEntity('History', self._gen_extid(), values)
> +            # Handle <chronItem>
> +            events = self._elem_findall(elem, './/eac:chronItem')
> +            if events is not None:
> +                events_ent = filter(lambda x: x is not None and x.values,
> +                                    [self.build_event(e) for e in events])
> +                r_etypes = [e.extid for e in events_ent]
> +                map(history_obj.values.setdefault(
> +                    'has_event', set()).add, r_etypes)

That's too much map/filter, please refactor to use common python
patterns.

> +                yield history_obj
> +                for e in events_ent:
> +                    yield e
> +            else:
> +                yield history_obj

I'm not sure if the order in which objects are yielded matters, can't we
yield history_obj at the end in both cases? (If so, the "else:" can be
dropped.)

> +
> +    def build_event(self, elem):
> +        """Build en `Event` external entity."""
> +        values = {}
> +        date = self._elem_find(elem, 'eac:date')
> +        date_range = self._elem_find(elem, 'eac:dateRange')
> +        event = self._elem_find(elem, 'eac:event')
> +        if event is not None and event.text:
> +            sentences = filter((lambda x: x != ''),
> +                               map(text_type,
> +                               (map(str.strip, event.text.split('\n')))))

Same here: too many map/filter.

> +            values['event'] = set(["".join(sentences)])
> +        if date is None and date_range:
> +            date_range = self.parse_daterange(date_range)
> +            values.update(date_range)
> +        elif date is not None:
> +            values.update({'start_date': set([self.parse_date(date)]),
> +                           'end_date': set([self.parse_date(date)])})
> +        if values:
> +            return ExtEntity('Event', self._gen_extid(), values)
>  
>      @elem_maybe_none
>      @relate_to_record_through('Structure', 'structure_agent')

> diff -r fbf72d6e8772 -r 7049863207c0 cubicweb_eac/schema.py
> --- a/cubicweb_eac/schema.py	Thu Jun 13 11:49:59 2019 +0200
> +++ b/cubicweb_eac/schema.py	Fri Jun 14 16:03:54 2019 +0200
> @@ -265,6 +265,22 @@
>      text = RichString(fulltextindexed=True)
>  
>  
> + at dated_entity_type
> +class Event(EntityType):
> +    """Number of dates and event description linked to an History object"""

I don't understand this docstring, can you rephrase it? IIUC, this is
supposed to be equivalent to the chronItem tag. Also, Event is perhaps a
bit too generic (and can cause conflict with other schemata), maybe HistoricalEvent?

> +    event = RichString(fulltextindexed=True)
> +    place_entry = String(fulltextindexed=True)
> +
> +
> +class has_event(RelationDefinition):
> +    subject = 'History'
> +    object = 'Event'
> +    cardinality = '*1'
> +    composite = 'subject'
> +    fulltext_container = 'subject'
> +    description = _('Event.s with date.s for describing an historical event')

These ".s" are unusual, can you remove them and conform to the existing
phrasing style?

> +
> +
>  class Structure(EntityType):
>      """Information about the structure of an authority"""
>      description = RichString(fulltextindexed=True)

> diff -r fbf72d6e8772 -r 7049863207c0 test/test_dataimport.py
> --- a/test/test_dataimport.py	Thu Jun 13 11:49:59 2019 +0200
> +++ b/test/test_dataimport.py	Fri Jun 14 16:03:54 2019 +0200
> @@ -182,7 +182,8 @@
>               },
>              ),
>              ('History', _gen_extid(),
> -             {'text': set(["\n".join((
> +             {'abstract': set([u'Test of an abstract element']),

This diff line is strange because it's just a move. Can you keep it at
its original place?

> +              'text': set(["\n".join((
>                       u'<p xmlns="urn:isbn:1-931666-33-4" '
>                       u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
>                       u'xmlns:xlink="http://www.w3.org/1999/xlink">{0}</p>'
> @@ -191,10 +192,23 @@
>                ]),
>                'text_format': set([u'text/html']),
>                'history_agent': set(['FRAD033_EAC_00001']),
> -              'has_citation': set(['16', '17']),
> -              'abstract': set([u'Test of an abstract element']),

(^ the original place)

> +              'has_citation': set(['18', '19']),
> +              'has_event': set(['16', '17']),
>               },
>              ),
> +            ('Event', _gen_extid(),
> +             {'event': [u'Left Mer and moved to the mainland.'
> +                        u'Worked at various jobs including canecutter'
> +                        u'and railway labourer.'],
> +              'end_date': set([datetime.date(1957, 1, 1)]),
> +              'start_date': set([datetime.date(1957, 1, 1)])}
> +            ),
> +            ('Event', _gen_extid(),
> +             {'event': set([u'Union representative, Townsville-'
> +                            u'Mount Isa rail construction project.']),
> +              'end_date': set([datetime.date(1961, 1, 1)]),
> +              'start_date': set([datetime.date(1960, 1, 1)])}
> +            ),
>              ('Citation', _gen_extid(),
>               {'uri': set(['http://www.assemblee-nationale.fr/histoire/images-decentralisation/decentralisation/loi-du-22-decembre-1789-.pdf'])},  # noqa
>              ),
> @@ -238,7 +252,7 @@
>                'description': set([u'Organisation des réunions ...']),
>                'description_format': set([u'text/plain']),
>                'occupation_agent': set(['FRAD033_EAC_00001']),
> -              'has_citation': set(['23']),
> +              'has_citation': set(['25']),
>                'equivalent_concept': set(['http://pifgadget.com']),
>               },
>              ),
> @@ -251,7 +265,7 @@
>                                u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
>                                u'xmlns:xlink="http://www.w3.org/1999/xlink">very famous</p>']),
>                'content_format': set([u'text/html']),
> -              'has_citation': set(['25']),
> +              'has_citation': set(['27']),
>                'general_context_of': set(['FRAD033_EAC_00001']),
>                }
>              ),
> @@ -367,7 +381,7 @@
>                            'localControl': set([54]),
>                            'source': set([76]),  # empty.
>                            'structureOrGenealogy': set([189]),  # empty.
> -                          'biogHist': set([205, 208]),  # empty.
> +                          'biogHist': set([229, 232]),  # empty.
>                            })
>  
>      def test_mandate_under_mandates(self):
> @@ -465,7 +479,7 @@
>                                 cwuri=u'http://data.culture.fr/thesaurus/page/ark:/67717/T1-1074')
>              cnx.commit()
>              created, updated = testutils.eac_import(cnx, fpath)
> -            self.assertEqual(len(created), 39)
> +            self.assertEqual(len(created), 41)
>              self.assertEqual(updated, set())
>              rset = cnx.find('AuthorityRecord', isni=u'22330001300016')
>              self.assertEqual(len(rset), 1)
> @@ -509,6 +523,18 @@
>                                                      format=u'text/plain').strip(),
>                           u"La loi du 22 décembre 1789, en divisant ...\n\nL'inspecteur Canardo")
>  
> +    def _check_event(self, cnx, record):

This method does not appear to be called anywhere. (In fact, I think
it's buggy ;)).

> +        rset = cnx.find('Event', record)
> +        self.assertEqual(len(rset), 2)
> +        self.assertEqual(rset[0].printable_value('text',
> +                                                 format=u'text/plain').strip(),
> +                         u'Left Mer and moved to the mainland.'
> +                         u'Worked at various jobs including canecutter'
> +                         u'and railway labourer.')
> +        self.assertEqual(rset[1].printable_value('text',
> +                                                 format=u'text/plain').strip(),
> +                         u'Union representative, Townsville-\nMount Isa rail construction project.')
> +
>      def _check_mandate(self, cnx, record):
>          rset = cnx.find('Mandate', mandate_agent=record)
>          self.assertEqual(len(rset), 1)



More information about the saem-devel mailing list