[PATCH eac] Add `values_from_xpath` function

gvandevelde guillaume.vandevelde at logilab.fr
Fri Aug 9 11:37:24 CEST 2019


On 8/9/19 11:01 AM, Denis Laxalde wrote:
> Katia Saurfelt a écrit :
>> # HG changeset patch
>> # User Guillaume Vandevelde <gvandevelde at logilab.fr>
>> # Date 1564500696 -7200
>> #      Tue Jul 30 17:31:36 2019 +0200
>> # Node ID 40c725208339f1ec2d3086149d33bb8d6ab17096
>> # Parent  fb45891c263f42f74b33c971ed39bfe5c4972fd6
>> # Available At http://hg.logilab.org/review/cubes/eac
>> #              hg pull http://hg.logilab.org/review/cubes/eac -r 40c725208339
>> Add `values_from_xpath` function
>>
>> This function automate the recurent check of the values.
> I don't see here how this would help. So in this case, it would be
> clearer to have another patch where this new helper is getting used.

It wanted to avoir the redundant `if x is not None and x.text`

and just have a way to fill the `values` dictionnary by declaring

which attribute name correspond to which xpath value.


You want me to merge this changeset with the next where it is used ?

>> diff --git a/cubicweb_eac/dataimport.py b/cubicweb_eac/dataimport.py
>> --- a/cubicweb_eac/dataimport.py
>> +++ b/cubicweb_eac/dataimport.py
>> @@ -19,7 +19,7 @@ Context -- Corporate Bodies, Persons, an
>>   
>>   from collections import deque
>>   import datetime
>> -from functools import wraps, partial
>> +from functools import wraps, partial, reduce
>>   import inspect
>>   import logging
>>   from uuid import uuid4
>> @@ -64,6 +64,17 @@ class MissingTag(RuntimeError):
>>           self.tag_parent = tag_parent
>>   
>>   
>> +def safe_append(finder, dct, var_tuple):
>> +    var_name, var_xpath = var_tuple
>> +    var = finder(var_xpath)
>> +    if var is not None and var.text:
>> +        new_dct = dct.copy()
> Why .copy()?

To be sure that my function is pure.

Not really necessary for my current usage of this function.

I wanted to remove side effects like if i wanted to use this function in an

unstacking recursive process, the dictionnary

at a given depth would not be modified by earlier calls on the stack.

>> +        new_dct[var_name] = set([text_type(var.text)])
>> +        return new_dct
>> +    else:
>> +        return dct
>> +
>> +
>>   def external_uri(uri):
>>       values = [text_type(uri)]
>>       return ExtEntity('ExternalUri', uri, {'uri': set(values), 'cwuri': set(values)})
>> @@ -293,6 +304,16 @@ class EACCPFImporter(object):
>>               return trace_extentity(self)(attr)
>>           return attr
>>   
>> +    def values_from_xpaths(self, elem, name_path_tuples, values=None):
>> +        """build a `values` dict from xpath requests
>> +        partial is used for building a function with the signature
>> +        append_func :: dict -> (varname, xpath) -> dict
>> +        and use it in the reduce"""
>> +        values = values or {}
>> +        finder = partial(self._elem_find, elem)
>> +        append_func = partial(safe_append, finder)
>> +        return reduce(append_func, name_path_tuples, values)
> This is totally cryptic to me. I'd suggest to avoid functional patterns
> that are unidiomatic in python.

Okay,

I wanted to explore the possibility in python, and I wasn't really 
convinced.

>> +
>>       def record_visited(self, elem, extentity):
>>           assert extentity.extid, extentity
>>           self._visited.setdefault(elem, set([])).add(extentity.extid)
>> diff --git a/test/test_dataimport.py b/test/test_dataimport.py
>> --- a/test/test_dataimport.py
>> +++ b/test/test_dataimport.py
>> @@ -23,6 +23,7 @@ from os.path import join, dirname
>>   import sys
>>   import unittest
>>   
>> +from lxml import etree
>>   from six import reraise
>>   from six.moves import map
>>   
>> @@ -32,6 +33,16 @@ from cubicweb.devtools.testlib import Cu
>>   
>>   from cubicweb_eac import dataimport, testutils
>>   
>> +XML_TEST = """
>> +<note>
>> +<to>Tove</to>
>> +<from>Jani</from>
>> +<heading>Reminder</heading>
>> +<body>Hey!</body>
>> +<empty></empty>
>> +</note>
>> +"""
>> +
>>   
>>   def mock_(string):
>>       return string
>> @@ -431,6 +442,31 @@ class EACXMLParserTC(unittest.TestCase):
>>                             'biogHist': set([229, 232]),  # empty.
>>                             })
>>   
>> +    def test_values_from_xpath(self):
>> +        fname = "FRAD033_EAC_00001_simplified.xml"
>> +        fpath = self.datapath(fname)
>> +        self.root = etree.fromstring(XML_TEST)
>> +        import_log = SimpleImportLog(fpath)
>> +        extid_generator = map(str, count()).next
>> +        importer = dataimport.EACCPFImporter(fpath, import_log, mock_,
>> +                                             extid_generator=extid_generator)
>> +        values = importer.values_from_xpaths(
>> +            self.root,
>> +            (('to_value', 'to'),
>> +             ('from_value', 'from'),
>> +             ('heading_value', 'heading'),
>> +             ('body_value', 'body'),
>> +             ('empty_value', 'empty'))
>> +        )
>> +        print(11, values)
> print
Oups
>> +        self.assertEquals(
> Deprecated method.
Okay
>> +            values,
>> +            {'to_value': set([u'Tove']),
>> +             'from_value': set([u'Jani']),
>> +             'heading_value': set([u'Reminder']),
>> +             'body_value': set([u'Hey!'])}
>> +        )
>> +
>>       def test_mandate_under_mandates(self):
>>           """In FRAD033_EAC_00003.xml, <mandate> element are within <mandates>."""
>>           entities = list(self.file_extentities('FRAD033_EAC_00003.xml'))
>>



More information about the saem-devel mailing list