[PATCH eac] Add `values_from_xpath` function

Katia Saurfelt katia.saurfelt at logilab.fr
Fri Aug 9 10:29:10 CEST 2019


# HG changeset patch
# User Guillaume Vandevelde <gvandevelde at logilab.fr>
# Date 1564500696 -7200
#      Tue Jul 30 17:31:36 2019 +0200
# Node ID 40c725208339f1ec2d3086149d33bb8d6ab17096
# Parent  fb45891c263f42f74b33c971ed39bfe5c4972fd6
# Available At http://hg.logilab.org/review/cubes/eac
#              hg pull http://hg.logilab.org/review/cubes/eac -r 40c725208339
Add `values_from_xpath` function

This function automate the recurent check of the values.

diff --git a/cubicweb_eac/dataimport.py b/cubicweb_eac/dataimport.py
--- a/cubicweb_eac/dataimport.py
+++ b/cubicweb_eac/dataimport.py
@@ -19,7 +19,7 @@ Context -- Corporate Bodies, Persons, an
 
 from collections import deque
 import datetime
-from functools import wraps, partial
+from functools import wraps, partial, reduce
 import inspect
 import logging
 from uuid import uuid4
@@ -64,6 +64,17 @@ class MissingTag(RuntimeError):
         self.tag_parent = tag_parent
 
 
+def safe_append(finder, dct, var_tuple):
+    var_name, var_xpath = var_tuple
+    var = finder(var_xpath)
+    if var is not None and var.text:
+        new_dct = dct.copy()
+        new_dct[var_name] = set([text_type(var.text)])
+        return new_dct
+    else:
+        return dct
+
+
 def external_uri(uri):
     values = [text_type(uri)]
     return ExtEntity('ExternalUri', uri, {'uri': set(values), 'cwuri': set(values)})
@@ -293,6 +304,16 @@ class EACCPFImporter(object):
             return trace_extentity(self)(attr)
         return attr
 
+    def values_from_xpaths(self, elem, name_path_tuples, values=None):
+        """build a `values` dict from xpath requests
+        partial is used for building a function with the signature
+        append_func :: dict -> (varname, xpath) -> dict
+        and use it in the reduce"""
+        values = values or {}
+        finder = partial(self._elem_find, elem)
+        append_func = partial(safe_append, finder)
+        return reduce(append_func, name_path_tuples, values)
+
     def record_visited(self, elem, extentity):
         assert extentity.extid, extentity
         self._visited.setdefault(elem, set([])).add(extentity.extid)
diff --git a/test/test_dataimport.py b/test/test_dataimport.py
--- a/test/test_dataimport.py
+++ b/test/test_dataimport.py
@@ -23,6 +23,7 @@ from os.path import join, dirname
 import sys
 import unittest
 
+from lxml import etree
 from six import reraise
 from six.moves import map
 
@@ -32,6 +33,16 @@ from cubicweb.devtools.testlib import Cu
 
 from cubicweb_eac import dataimport, testutils
 
+XML_TEST = """
+<note>
+<to>Tove</to>
+<from>Jani</from>
+<heading>Reminder</heading>
+<body>Hey!</body>
+<empty></empty>
+</note>
+"""
+
 
 def mock_(string):
     return string
@@ -431,6 +442,31 @@ class EACXMLParserTC(unittest.TestCase):
                           'biogHist': set([229, 232]),  # empty.
                           })
 
+    def test_values_from_xpath(self):
+        fname = "FRAD033_EAC_00001_simplified.xml"
+        fpath = self.datapath(fname)
+        self.root = etree.fromstring(XML_TEST)
+        import_log = SimpleImportLog(fpath)
+        extid_generator = map(str, count()).next
+        importer = dataimport.EACCPFImporter(fpath, import_log, mock_,
+                                             extid_generator=extid_generator)
+        values = importer.values_from_xpaths(
+            self.root,
+            (('to_value', 'to'),
+             ('from_value', 'from'),
+             ('heading_value', 'heading'),
+             ('body_value', 'body'),
+             ('empty_value', 'empty'))
+        )
+        print(11, values)
+        self.assertEquals(
+            values,
+            {'to_value': set([u'Tove']),
+             'from_value': set([u'Jani']),
+             'heading_value': set([u'Reminder']),
+             'body_value': set([u'Hey!'])}
+        )
+
     def test_mandate_under_mandates(self):
         """In FRAD033_EAC_00003.xml, <mandate> element are within <mandates>."""
         entities = list(self.file_extentities('FRAD033_EAC_00003.xml'))



More information about the saem-devel mailing list