[PATCH 4 of 7 saem_ref] [ark] Add a regexp for "external" ARK identifiers

Denis Laxalde denis.laxalde at logilab.fr
Wed Feb 21 12:03:29 CET 2018


# HG changeset patch
# User Denis Laxalde <denis.laxalde at logilab.fr>
# Date 1519202767 -3600
#      Wed Feb 21 09:46:07 2018 +0100
# Node ID 606373c1a4aaabe08e676b389ab05ca32dd0c59a
# Parent  b8dd5a269fd5d090c7f371fe74b4d87be9185355
# Available At http://hg.logilab.org/review/cubes/saem_ref
#              hg pull http://hg.logilab.org/review/cubes/saem_ref -r 606373c1a4aa
# EXP-Topic ark/unique-overall
[ark] Add a regexp for "external" ARK identifiers

We will soon need to insert ARK identifiers from an external source
(i.e. not generated by ourselves); those identifiers won't have the
constraints we impose for internal identifiers (length, prefix, control
character) so add a regexp to match those.

diff --git a/cubicweb_saem_ref/ark.py b/cubicweb_saem_ref/ark.py
--- a/cubicweb_saem_ref/ark.py
+++ b/cubicweb_saem_ref/ark.py
@@ -34,8 +34,18 @@ ARK_RGX = re.compile(
     ),
 )
 
+# Regexp for "external" ARK identifiers (i.e. with no length, prefix or
+# control character).
+EXT_ARK_RGX = re.compile(
+    r'^(ark:/)?(?P<naan>\d+)/(?P<name>\w+)(/(?P<qualifier>\w+))?$',
+)
 
-match = ARK_RGX.match
+
+def match(string, external=False, **kwargs):
+    if external:
+        return EXT_ARK_RGX.match(string, **kwargs)
+    else:
+        return ARK_RGX.match(string, **kwargs)
 
 
 def generate_ark(cnx, naan):
diff --git a/test/test_ark.py b/test/test_ark.py
--- a/test/test_ark.py
+++ b/test/test_ark.py
@@ -50,12 +50,24 @@ class ArkRgxTC(TestCase):
         self.assertEqual(match.group('naan'), '12345')
         self.assertEqual(match.group('name'), 'rfqqqqqqqg')
 
+    def test_ext(self):
+        match = ark_match('ark:/12345/name', external=True)
+        self.assertTrue(match)
+        self.assertEqual(match.group('naan'), '12345')
+        self.assertEqual(match.group('name'), 'name')
+
     def test_no_scheme(self):
         match = ark_match('12345/rf%sg' % ('q' * 7))
         self.assertTrue(match)
         self.assertEqual(match.group('naan'), '12345')
         self.assertEqual(match.group('name'), 'rfqqqqqqqg')
 
+    def test_ext_no_scheme(self):
+        match = ark_match('12345/name', external=True)
+        self.assertTrue(match)
+        self.assertEqual(match.group('naan'), '12345')
+        self.assertEqual(match.group('name'), 'name')
+
     def test_qualified(self):
         ark = '0/rfr8hps6ng/h4050n4j02'
         match = ark_match(ark)
@@ -64,6 +76,14 @@ class ArkRgxTC(TestCase):
         self.assertEqual(match.group('name'), 'rfr8hps6ng')
         self.assertEqual(match.group('qualifier'), 'h4050n4j02')
 
+    def test_ext_qualified(self):
+        ark = '666/name/qualifier'
+        match = ark_match(ark, external=True)
+        self.assertTrue(match)
+        self.assertEqual(match.group('naan'), '666')
+        self.assertEqual(match.group('name'), 'name')
+        self.assertEqual(match.group('qualifier'), 'qualifier')
+
     def test_tesutils_match_ark_qualified(self):
         ark = '0/rfr8hps6ng/h4050n4j02'
 



More information about the saem-devel mailing list