[PATCH 1 of 3 saem_ref v2] [ark] Introduce data declaration for "ark" table and PL/pgSQL function for identifier generation

Philippe Pepiot philippe.pepiot at logilab.fr
Wed Feb 14 16:14:05 CET 2018


On 14/02/2018, Denis Laxalde wrote:
> # HG changeset patch
> # User Denis Laxalde <denis.laxalde at logilab.fr>
> # Date 1518430204 -3600
> #      Mon Feb 12 11:10:04 2018 +0100
> # Node ID 85c64e5dd3da4a4a2dd59057abc8d3554b6bcba1
> # Parent  357ae924c8b9228ff7fd135174e8e472e6ea3954
> # Available At http://hg.logilab.org/review/cubes/saem_ref
> #              hg pull http://hg.logilab.org/review/cubes/saem_ref -r 85c64e5dd3da
> # EXP-Topic ark/postgres-generated
> [ark] Introduce data declaration for "ark" table and  PL/pgSQL function for identifier generation
> 
> We add two SQL scripts in schema/ directory, one to install the
> "pgcrypto" extension (needed for random()) and one containing "ark"
> table definition and a couple of function to generate unique and random
> identifiers:
> 
> * `gen_ark_name` cares about the "name" part of ARK identifiers and
>   returns a random text value ensuring structural constraints are
>   fulfilled (no vowels, no more than 3 consecutive letters, possibly a
>   prefix and a trailing control character)
> * `random_char` is a simpler helper producing a random string from a set
>   of characters
> * `gen_ark` is the main function that will be used to generate records
>   in the "ark" table; it loops by trying to INSERT a record with
>   randomly generated column values (only the "name" part for now) and
>   will return once there is no unique constraint violation
> 
> Python tests added in test_ark.py. As noted in comments of the test case
> class, we use CubicWebTC despite we don't need access to a CubicWeb
> database (nor schema): this is only by convenience to get the postgresql
> cluster setup (and teardown) at the cost for more expensive test
> runtime.
> 
> Related to extranet #46881387.
> 
> diff --git a/MANIFEST.in b/MANIFEST.in
> --- a/MANIFEST.in
> +++ b/MANIFEST.in
> @@ -1,6 +1,7 @@
>  recursive-include cubicweb_saem_ref/data *.gif *.jpg *.png *.ico *.css *.js *.xml
>  recursive-include test/data *.xml *.xsd *.rdf *.csv
>  include test/*.py test/data/bootstrap_cubes
> +include cubicweb_saem_ref/schema/*.sql
>  include cubicweb_saem_ref/i18n/*.po
>  include cubicweb_saem_ref/i18n/*.pot
>  include cubicweb_saem_ref/migration/*.py
> diff --git a/cubicweb_saem_ref/schema/_ext.postgres.sql b/cubicweb_saem_ref/schema/_ext.postgres.sql
> new file mode 100644
> --- /dev/null
> +++ b/cubicweb_saem_ref/schema/_ext.postgres.sql
> @@ -0,0 +1,1 @@
> +CREATE EXTENSION IF NOT EXISTS pgcrypto;
> diff --git a/cubicweb_saem_ref/schema/ark.postgres.sql b/cubicweb_saem_ref/schema/ark.postgres.sql
> new file mode 100644
> --- /dev/null
> +++ b/cubicweb_saem_ref/schema/ark.postgres.sql
> @@ -0,0 +1,73 @@
> +CREATE TABLE ark (
> +    name TEXT NOT NULL,
> +    qualifier TEXT NOT NULL DEFAULT '',
> +    CONSTRAINT qualified_name PRIMARY KEY (name, qualifier)
> +);
> +
> +-- Return a part of an ARK identifier of `len` length with `prefix` prefix and
> +-- `control_char` trailing character.
> +CREATE OR REPLACE
> +FUNCTION gen_ark_name(len INTEGER, prefix TEXT, control_char TEXT)
> +RETURNS text AS $$
> +DECLARE
> +    ark TEXT;
> +    letters TEXT;
> +    numbers TEXT;
> +    characters TEXT;
> +    next_char TEXT;
> +    tail TEXT;
> +BEGIN
> +    ASSERT len > length(prefix) + length(control_char);
> +    letters := 'bcdfghjkmnpqrstvwxz';
> +    numbers := '0123456789';
> +    characters := letters || numbers;
> +    -- Prefix ARK identifier.
> +    ark := prefix;
> +    LOOP
> +        tail := substring(ark, length(ark) - 2, 3);
> +        -- No more than 3 consecutive consonants.
> +        IF tail ~ '([a-z]){3}' THEN
> +            next_char := random_char(numbers);
> +        ELSE
> +            next_char := random_char(characters);
> +        END IF;
> +        ark := ark || next_char;
> +        IF length(ark) = len - 1 THEN
> +            EXIT;
> +        END IF;
> +    END LOOP;
> +    -- Add control character.
> +    RETURN ark || control_char;
> +END;
> +$$ language 'plpgsql';
> +
> +-- Return an random character within given string
> +CREATE OR REPLACE FUNCTION random_char(chars TEXT) RETURNS text AS $$
> +BEGIN
> +    RETURN substr(chars, (random() * (length(chars) -1) + 1)::INTEGER, 1);
> +END;
> +$$ language 'plpgsql';
> +
> +-- Insert a record in "ark" table ensuring uniqueness constraints are
> +-- fulfilled.
> +CREATE OR REPLACE
> +FUNCTION gen_ark(len INTEGER, prefix TEXT, control_char TEXT)
> +RETURNS TEXT AS $$
> +DECLARE
> +    ark_name TEXT;
> +BEGIN
> +    LOOP
> +        BEGIN
> +            INSERT INTO ark
> +                VALUES (gen_ark_name(len, prefix, control_char), DEFAULT)
> +                RETURNING name INTO ark_name;
> +            RETURN ark_name;
> +        EXCEPTION
> +            WHEN unique_violation THEN
> +                -- Continue and try with another "name".
> +                RAISE WARNING 'unique_violation';
> +                NULL;
> +        END;
> +    END LOOP;
> +END;
> +$$ LANGUAGE plpgsql;
> diff --git a/dev-requirements.txt b/dev-requirements.txt
> --- a/dev-requirements.txt
> +++ b/dev-requirements.txt
> @@ -1,3 +1,4 @@
>  mock
> +psycopg2
>  pytest
>  webtest
> diff --git a/test/test_ark.py b/test/test_ark.py
> new file mode 100644
> --- /dev/null
> +++ b/test/test_ark.py
> @@ -0,0 +1,89 @@
> +# copyright 2018 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
> +# contact http://www.logilab.fr -- mailto:contact at logilab.fr
> +#
> +# This program is free software: you can redistribute it and/or modify it under
> +# the terms of the GNU Lesser General Public License as publishged by the Free
> +# Software Foundation, either version 2.1 of the License, or (at your option)
> +# any later version.
> +#
> +# This program is distributed in the hope that it will be useful, but WITHOUT
> +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
> +# details.
> +#
> +# You should have received a copy of the GNU Lesser General Public License along
> +# with this program. If not, see <http://www.gnu.org/licenses/>.
> +"""cubicweb-saem-ref tests for ARK identifier generation."""
> +
> +from cubicweb.devtools import (
> +    PostgresApptestConfiguration,
> +    testlib,
> +)
> +
> +import testutils
> +
> +
> +def setUpModule():
> +    testutils.startpgcluster(__file__)
> +
> +
> +class ArkServiceTC(testlib.CubicWebTC):
> +    # This could be a plain unittest.TestCase as we don't use CubicWeb's
> +    # database. But on the other hand, CubicWebTC makes it easier to get a
> +    # postgres configuration handling setup/teardown.
> +
> +    configcls = PostgresApptestConfiguration
> +
> +    def connect(self):
> +        return self.repo.system_source.get_connection()
> +
> +    def test_unique_generated(self):
> +        """We should be able to produce 20 ARK identifiers of length 4 with a
> +        2-characters prefix and 1 control character.
> +        """
> +        n = 20
> +        arks = []
> +        with self.connect() as conn:
> +            with conn.cursor() as cu:
> +                for _ in range(n):
> +                    cu.execute("SELECT * from gen_ark(%s, %s, %s);",
> +                               (4, 'rf', 'x'))
> +                    arks.append(cu.fetchone()[0])
> +                    conn.commit()
> +                cu.execute("SELECT * from ark")
> +                r = cu.fetchall()
> +        self.assertEqual(len(set(r)), n, len(r))
> +        self.assertCountEqual([ark for ark, _ in r], arks)
> +
> +    def test_qualifier(self):
> +        def insert(qualifier=None):
> +            with self.connect() as conn:
> +                with conn.cursor() as cu:
> +                    cu.execute("INSERT INTO ark VALUES (%s, %s);",
> +                               ("he", qualifier))
> +        insert("ah")
> +        with self.assertRaises(psycopg2.IntegrityError) as cm:

Missing import of psycopg2

> +            insert("ah")
> +        self.assertIn(
> +            'duplicate key value violates unique constraint "qualified_name"',
> +            str(cm.exception),
> +        )
> +        insert("he")
> +
> +    def test_dup(self):
> +        def insert():
> +            with self.connect() as conn:
> +                with conn.cursor() as cu:
> +                    cu.execute("INSERT INTO ark VALUES ('he', 'aha');")
> +        insert()
> +        with self.assertRaises(psycopg2.IntegrityError) as cm:
> +            insert()
> +        self.assertIn(
> +            'duplicate key value violates unique constraint "qualified_name"',
> +            str(cm.exception),
> +        )
> +
> +
> +if __name__ == '__main__':
> +    import unittest
> +    unittest.main()
> 



More information about the saem-devel mailing list