[PATCH eac V2] Remove namespaces in the string generated for items attribute

Frank Bessou frank.bessou at logilab.fr
Thu Sep 5 16:15:24 CEST 2019


Applied, thanks :)

On 05/09/2019 11:00, Frank Bessou wrote:
> # HG changeset patch
> # User Guillaume Vandevelde <gvandevelde at logilab.fr>
> # Date 1567671556 -7200
> #      Thu Sep 05 10:19:16 2019 +0200
> # Node ID 9665e8aad5fd778deb605c88e378bf8cfdb9d8cd
> # Parent  268a347314f17a04559a48d3bd851863cb92e158
> # Available At http://hg.logilab.org/review/cubes/eac
> #              hg pull http://hg.logilab.org/review/cubes/eac -r 9665e8aad5fd
> Remove namespaces in the string generated for items attribute
> 
> diff --git a/cubicweb_eac/dataimport.py b/cubicweb_eac/dataimport.py
> --- a/cubicweb_eac/dataimport.py
> +++ b/cubicweb_eac/dataimport.py
> @@ -21,6 +21,7 @@ from collections import deque
>   import copy
>   import json
>   import datetime
> +import re
>   from functools import wraps, partial
>   import inspect
>   import logging
> @@ -344,12 +345,15 @@ class EACCPFImporter(object):
>                   child.tag = 'li'
>               values.append(list_elem)
>           return u"\n".join(
> -            etree.tostring(
> -                item, encoding=text_type,
> -                method='html').strip()
> +            self._tostring_without_namespace(item)
>               for item in values
>               if len(item) != 0 or item.text)
>   
> +    def _tostring_without_namespace(self, elem):
> +        # FIXME: This is a hack to remove the xmlns namespace when serializing an xml element
> +        result = etree.tostring(elem, encoding=text_type, method='html').strip()
> +        return re.sub(r'xmlns\S*\"', "", result)
> +
>       def record_visited(self, elem, extentity):
>           assert extentity.extid, extentity
>           self._visited.setdefault(elem, set([])).add(extentity.extid)
> diff --git a/test/test_dataimport.py b/test/test_dataimport.py
> --- a/test/test_dataimport.py
> +++ b/test/test_dataimport.py
> @@ -316,16 +316,14 @@ class EACXMLParserTC(unittest.TestCase):
>                                             u"L'inspecteur Canardo"])
>                 ]),
>                 'text_format': set([u'text/html']),
> -              'items': set([u'<ul xmlns="urn:isbn:1-931666-33-4" '
> -                            u'xmlns:xlink="http://www.w3.org/1999'
> -                            u'/xlink">\n\t    <li>\n\t      <span style="font-       '
> -                            u'style:italic">1450-1950\n\t      </span>\n\t      '
> -                            u'(1929)\n\t    </li>\n\t    <li>\n\t      <span '
> -                            u'style="font-style:italic">Globe\n\t      '
> -                            u'Gliding\n\t      </span>\n\t      (1930)\n\t    '
> -                            u'</li>\n\t    <li>\n\t      <span style="font-'
> -                            u'       style:italic">Gems\n\t      </span>\n\t'
> -                            u'      (1931)\n\t    </li>\n\t  </ul>']),
> +              'items': set([u'<ul  >\n\t    <li>\n\t      <span style="font-'
> +                            u'       style:italic">1450-1950\n\t      </span'
> +                            u'>\n\t      (1929)\n\t    </li>\n\t    <li>\n\t'
> +                            u'      <span style="font-style:italic">Globe\n\t'
> +                            u'      Gliding\n\t      </span>\n\t      (1930)\n\t'
> +                            u'    </li>\n\t    <li>\n\t      <span style="fo'
> +                            u'nt-       style:italic">Gems\n\t      </span'
> +                            u'>\n\t      (1931)\n\t    </li>\n\t  </ul>']),
>                 'items_format': set([u'text/html']),
>                 'history_agent': set(['authorityrecord-FRAD033_EAC_00001']),
>                 },
> 

-- 
Frank Bessou
Logilab         https://www.logilab.fr



More information about the saem-devel mailing list