[PATCH 1 of 3 saem_ref v3] [ark] Introduce data declaration for "ark" table and PL/pgSQL function for identifier generation

Denis Laxalde denis.laxalde at logilab.fr
Wed Feb 14 16:22:12 CET 2018


# HG changeset patch
# User Denis Laxalde <denis.laxalde at logilab.fr>
# Date 1518430204 -3600
#      Mon Feb 12 11:10:04 2018 +0100
# Node ID 1697d9a202be0649a03f71d078ff81733779545b
# Parent  357ae924c8b9228ff7fd135174e8e472e6ea3954
# Available At http://hg.logilab.org/review/cubes/saem_ref
#              hg pull http://hg.logilab.org/review/cubes/saem_ref -r 1697d9a202be
# EXP-Topic ark/postgres-generated
[ark] Introduce data declaration for "ark" table and  PL/pgSQL function for identifier generation

We add two SQL scripts in schema/ directory, one to install the
"pgcrypto" extension (needed for random()) and one containing "ark"
table definition and a couple of function to generate unique and random
identifiers:

* `gen_ark_name` cares about the "name" part of ARK identifiers and
  returns a random text value ensuring structural constraints are
  fulfilled (no vowels, no more than 3 consecutive letters, possibly a
  prefix and a trailing control character)
* `random_char` is a simpler helper producing a random string from a set
  of characters
* `gen_ark` is the main function that will be used to generate records
  in the "ark" table; it loops by trying to INSERT a record with
  randomly generated column values (only the "name" part for now) and
  will return once there is no unique constraint violation

Python tests added in test_ark.py. As noted in comments of the test case
class, we use CubicWebTC despite we don't need access to a CubicWeb
database (nor schema): this is only by convenience to get the postgresql
cluster setup (and teardown) at the cost for more expensive test
runtime.

Related to extranet #46881387.

diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,7 @@
 recursive-include cubicweb_saem_ref/data *.gif *.jpg *.png *.ico *.css *.js *.xml
 recursive-include test/data *.xml *.xsd *.rdf *.csv
 include test/*.py test/data/bootstrap_cubes
+include cubicweb_saem_ref/schema/*.sql
 include cubicweb_saem_ref/i18n/*.po
 include cubicweb_saem_ref/i18n/*.pot
 include cubicweb_saem_ref/migration/*.py
diff --git a/cubicweb_saem_ref/schema/_ext.postgres.sql b/cubicweb_saem_ref/schema/_ext.postgres.sql
new file mode 100644
--- /dev/null
+++ b/cubicweb_saem_ref/schema/_ext.postgres.sql
@@ -0,0 +1,1 @@
+CREATE EXTENSION IF NOT EXISTS pgcrypto;
diff --git a/cubicweb_saem_ref/schema/ark.postgres.sql b/cubicweb_saem_ref/schema/ark.postgres.sql
new file mode 100644
--- /dev/null
+++ b/cubicweb_saem_ref/schema/ark.postgres.sql
@@ -0,0 +1,73 @@
+CREATE TABLE ark (
+    name TEXT NOT NULL,
+    qualifier TEXT NOT NULL DEFAULT '',
+    CONSTRAINT qualified_name PRIMARY KEY (name, qualifier)
+);
+
+-- Return a part of an ARK identifier of `len` length with `prefix` prefix and
+-- `control_char` trailing character.
+CREATE OR REPLACE
+FUNCTION gen_ark_name(len INTEGER, prefix TEXT, control_char TEXT)
+RETURNS text AS $$
+DECLARE
+    ark TEXT;
+    letters TEXT;
+    numbers TEXT;
+    characters TEXT;
+    next_char TEXT;
+    tail TEXT;
+BEGIN
+    ASSERT len > length(prefix) + length(control_char);
+    letters := 'bcdfghjkmnpqrstvwxz';
+    numbers := '0123456789';
+    characters := letters || numbers;
+    -- Prefix ARK identifier.
+    ark := prefix;
+    LOOP
+        tail := substring(ark, length(ark) - 2, 3);
+        -- No more than 3 consecutive consonants.
+        IF tail ~ '([a-z]){3}' THEN
+            next_char := random_char(numbers);
+        ELSE
+            next_char := random_char(characters);
+        END IF;
+        ark := ark || next_char;
+        IF length(ark) = len - 1 THEN
+            EXIT;
+        END IF;
+    END LOOP;
+    -- Add control character.
+    RETURN ark || control_char;
+END;
+$$ language 'plpgsql';
+
+-- Return an random character within given string
+CREATE OR REPLACE FUNCTION random_char(chars TEXT) RETURNS text AS $$
+BEGIN
+    RETURN substr(chars, (random() * (length(chars) -1) + 1)::INTEGER, 1);
+END;
+$$ language 'plpgsql';
+
+-- Insert a record in "ark" table ensuring uniqueness constraints are
+-- fulfilled.
+CREATE OR REPLACE
+FUNCTION gen_ark(len INTEGER, prefix TEXT, control_char TEXT)
+RETURNS TEXT AS $$
+DECLARE
+    ark_name TEXT;
+BEGIN
+    LOOP
+        BEGIN
+            INSERT INTO ark
+                VALUES (gen_ark_name(len, prefix, control_char), DEFAULT)
+                RETURNING name INTO ark_name;
+            RETURN ark_name;
+        EXCEPTION
+            WHEN unique_violation THEN
+                -- Continue and try with another "name".
+                RAISE WARNING 'unique_violation';
+                NULL;
+        END;
+    END LOOP;
+END;
+$$ LANGUAGE plpgsql;
diff --git a/dev-requirements.txt b/dev-requirements.txt
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,3 +1,4 @@
 mock
+psycopg2
 pytest
 webtest
diff --git a/test/test_ark.py b/test/test_ark.py
new file mode 100644
--- /dev/null
+++ b/test/test_ark.py
@@ -0,0 +1,91 @@
+# copyright 2018 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr -- mailto:contact at logilab.fr
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as publishged by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+"""cubicweb-saem-ref tests for ARK identifier generation."""
+
+import psycopg2
+
+from cubicweb.devtools import (
+    PostgresApptestConfiguration,
+    testlib,
+)
+
+import testutils
+
+
+def setUpModule():
+    testutils.startpgcluster(__file__)
+
+
+class ArkServiceTC(testlib.CubicWebTC):
+    # This could be a plain unittest.TestCase as we don't use CubicWeb's
+    # database. But on the other hand, CubicWebTC makes it easier to get a
+    # postgres configuration handling setup/teardown.
+
+    configcls = PostgresApptestConfiguration
+
+    def connect(self):
+        return self.repo.system_source.get_connection()
+
+    def test_unique_generated(self):
+        """We should be able to produce 20 ARK identifiers of length 4 with a
+        2-characters prefix and 1 control character.
+        """
+        n = 20
+        arks = []
+        with self.connect() as conn:
+            with conn.cursor() as cu:
+                for _ in range(n):
+                    cu.execute("SELECT * from gen_ark(%s, %s, %s);",
+                               (4, 'rf', 'x'))
+                    arks.append(cu.fetchone()[0])
+                    conn.commit()
+                cu.execute("SELECT * from ark")
+                r = cu.fetchall()
+        self.assertEqual(len(set(r)), n, len(r))
+        self.assertCountEqual([ark for ark, _ in r], arks)
+
+    def test_qualifier(self):
+        def insert(qualifier=None):
+            with self.connect() as conn:
+                with conn.cursor() as cu:
+                    cu.execute("INSERT INTO ark VALUES (%s, %s);",
+                               ("he", qualifier))
+        insert("ah")
+        with self.assertRaises(psycopg2.IntegrityError) as cm:
+            insert("ah")
+        self.assertIn(
+            'duplicate key value violates unique constraint "qualified_name"',
+            str(cm.exception),
+        )
+        insert("he")
+
+    def test_dup(self):
+        def insert():
+            with self.connect() as conn:
+                with conn.cursor() as cu:
+                    cu.execute("INSERT INTO ark VALUES ('he', 'aha');")
+        insert()
+        with self.assertRaises(psycopg2.IntegrityError) as cm:
+            insert()
+        self.assertIn(
+            'duplicate key value violates unique constraint "qualified_name"',
+            str(cm.exception),
+        )
+
+
+if __name__ == '__main__':
+    import unittest
+    unittest.main()



More information about the saem-devel mailing list