[PATCH 2 of 2 seda] [profile gen] Avoid duplicated format ids / mime types in enum

Sylvain Thenault sylvain.thenault at logilab.fr
Thu Dec 14 14:34:59 CET 2017


# HG changeset patch
# User Sylvain Thénault <sylvain.thenault at logilab.fr>
# Date 1513258452 -3600
#      Thu Dec 14 14:34:12 2017 +0100
# Node ID 4e02b3b72a8eb094e8afaa1de861161f725698cb
# Parent  87199226a0916ac69652ef2e51ac193e15128544
# Available At https://hg.logilab.org/review/cubes/seda
#              hg pull https://hg.logilab.org/review/cubes/seda -r 4e02b3b72a8e
[profile gen] Avoid duplicated format ids / mime types in enum

There may be cases where several concepts use the same label, mostly in the case
of format identifier but do the same thing for mime types as a safety belt.

diff --git a/cubicweb_seda/entities/profile_generation.py b/cubicweb_seda/entities/profile_generation.py
--- a/cubicweb_seda/entities/profile_generation.py
+++ b/cubicweb_seda/entities/profile_generation.py
@@ -889,18 +889,18 @@ class SEDA1XSDExport(SEDA2ExportAdapter)
     def xsd_attachment(self, parent, data_object):
         _safe_concept = partial(_safe_concept_value, concepts_language=self.concepts_language)
 
         format_id = data_object.format_id
         if format_id is not None:
-            format_ids = [_concept_value(concept, self.concepts_language)
-                          for concept in format_id.concepts]
+            format_ids = sorted(set(_concept_value(concept, self.concepts_language)
+                                    for concept in format_id.concepts))
         else:
             format_ids = []
         mime_type = data_object.mime_type
         if mime_type is not None:
-            mime_types = [_concept_value(concept, self.concepts_language)
-                          for concept in mime_type.concepts]
+            mime_types = sorted(set(_concept_value(concept, self.concepts_language)
+                                    for concept in mime_type.concepts))
         else:
             mime_types = []
         encoding = data_object.encoding
         self.element_schema(parent, 'Attachment', 'qdt:ArchivesBinaryObjectType',
                             xsd_attributes=[
diff --git a/test/test_profile_generation.py b/test/test_profile_generation.py
--- a/test/test_profile_generation.py
+++ b/test/test_profile_generation.py
@@ -1016,10 +1016,28 @@ class OldSEDAExportTC(RelaxNGTestMixin, 
             root = etree.Element('test-root')
             adapter.xsd_children(root, unit)
             self.assertEqual([node.attrib['name'] for node in root],
                              ['ArchiveObject', 'Document'])
 
+    def test_duplicated_format_id(self):
+        with self.admin_access.cnx() as cnx:
+            scheme = testutils.scheme_for_type(cnx, 'file_category', None,
+                                               u'fmt/123', u'fmt/123')
+            concepts = scheme.reverse_in_scheme
+
+            transfer = cnx.create_entity('SEDAArchiveTransfer', title=u'my profile',
+                                         simplified_profile=True)
+            bdo = testutils.create_data_object(transfer)
+            bdo.format_id.cw_set(seda_format_id_to=concepts)
+
+            adapter = transfer.cw_adapt_to('SEDA-1.0.rng')
+            root = etree.Element('test-root')
+            adapter.xsd_attachment(root, bdo)
+
+            self.assertEqual([node.text for node in self.xpath(root, '//rng:value')],
+                             ['fmt/123'])
+
 
 class SEDAExportUnitTest(unittest.TestCase):
 
     def test_concepts_languages(self):
         self.assertEqual(pg.SEDA1XSDExport.concepts_language, 'seda-1')


More information about the saem-devel mailing list