[PATCH eac] Add `values_from_xpath` function

Denis Laxalde denis.laxalde at logilab.fr
Fri Aug 9 11:01:35 CEST 2019


Katia Saurfelt a écrit :
> # HG changeset patch
> # User Guillaume Vandevelde <gvandevelde at logilab.fr>
> # Date 1564500696 -7200
> #      Tue Jul 30 17:31:36 2019 +0200
> # Node ID 40c725208339f1ec2d3086149d33bb8d6ab17096
> # Parent  fb45891c263f42f74b33c971ed39bfe5c4972fd6
> # Available At http://hg.logilab.org/review/cubes/eac
> #              hg pull http://hg.logilab.org/review/cubes/eac -r 40c725208339
> Add `values_from_xpath` function
> 
> This function automate the recurent check of the values.

I don't see here how this would help. So in this case, it would be
clearer to have another patch where this new helper is getting used.

> diff --git a/cubicweb_eac/dataimport.py b/cubicweb_eac/dataimport.py
> --- a/cubicweb_eac/dataimport.py
> +++ b/cubicweb_eac/dataimport.py
> @@ -19,7 +19,7 @@ Context -- Corporate Bodies, Persons, an
>  
>  from collections import deque
>  import datetime
> -from functools import wraps, partial
> +from functools import wraps, partial, reduce
>  import inspect
>  import logging
>  from uuid import uuid4
> @@ -64,6 +64,17 @@ class MissingTag(RuntimeError):
>          self.tag_parent = tag_parent
>  
>  
> +def safe_append(finder, dct, var_tuple):
> +    var_name, var_xpath = var_tuple
> +    var = finder(var_xpath)
> +    if var is not None and var.text:
> +        new_dct = dct.copy()

Why .copy()?

> +        new_dct[var_name] = set([text_type(var.text)])
> +        return new_dct
> +    else:
> +        return dct
> +
> +
>  def external_uri(uri):
>      values = [text_type(uri)]
>      return ExtEntity('ExternalUri', uri, {'uri': set(values), 'cwuri': set(values)})
> @@ -293,6 +304,16 @@ class EACCPFImporter(object):
>              return trace_extentity(self)(attr)
>          return attr
>  
> +    def values_from_xpaths(self, elem, name_path_tuples, values=None):
> +        """build a `values` dict from xpath requests
> +        partial is used for building a function with the signature
> +        append_func :: dict -> (varname, xpath) -> dict
> +        and use it in the reduce"""
> +        values = values or {}
> +        finder = partial(self._elem_find, elem)
> +        append_func = partial(safe_append, finder)
> +        return reduce(append_func, name_path_tuples, values)

This is totally cryptic to me. I'd suggest to avoid functional patterns
that are unidiomatic in python.

> +
>      def record_visited(self, elem, extentity):
>          assert extentity.extid, extentity
>          self._visited.setdefault(elem, set([])).add(extentity.extid)
> diff --git a/test/test_dataimport.py b/test/test_dataimport.py
> --- a/test/test_dataimport.py
> +++ b/test/test_dataimport.py
> @@ -23,6 +23,7 @@ from os.path import join, dirname
>  import sys
>  import unittest
>  
> +from lxml import etree
>  from six import reraise
>  from six.moves import map
>  
> @@ -32,6 +33,16 @@ from cubicweb.devtools.testlib import Cu
>  
>  from cubicweb_eac import dataimport, testutils
>  
> +XML_TEST = """
> +<note>
> +<to>Tove</to>
> +<from>Jani</from>
> +<heading>Reminder</heading>
> +<body>Hey!</body>
> +<empty></empty>
> +</note>
> +"""
> +
>  
>  def mock_(string):
>      return string
> @@ -431,6 +442,31 @@ class EACXMLParserTC(unittest.TestCase):
>                            'biogHist': set([229, 232]),  # empty.
>                            })
>  
> +    def test_values_from_xpath(self):
> +        fname = "FRAD033_EAC_00001_simplified.xml"
> +        fpath = self.datapath(fname)
> +        self.root = etree.fromstring(XML_TEST)
> +        import_log = SimpleImportLog(fpath)
> +        extid_generator = map(str, count()).next
> +        importer = dataimport.EACCPFImporter(fpath, import_log, mock_,
> +                                             extid_generator=extid_generator)
> +        values = importer.values_from_xpaths(
> +            self.root,
> +            (('to_value', 'to'),
> +             ('from_value', 'from'),
> +             ('heading_value', 'heading'),
> +             ('body_value', 'body'),
> +             ('empty_value', 'empty'))
> +        )
> +        print(11, values)

print

> +        self.assertEquals(

Deprecated method.

> +            values,
> +            {'to_value': set([u'Tove']),
> +             'from_value': set([u'Jani']),
> +             'heading_value': set([u'Reminder']),
> +             'body_value': set([u'Hey!'])}
> +        )
> +
>      def test_mandate_under_mandates(self):
>          """In FRAD033_EAC_00003.xml, <mandate> element are within <mandates>."""
>          entities = list(self.file_extentities('FRAD033_EAC_00003.xml'))
> 

-- 
Denis Laxalde
Logilab         https://www.logilab.fr



More information about the saem-devel mailing list