[saem-devel] [PATCH V2] [views] switch to pyramid views

Philippe Pepiot philippe.pepiot at logilab.fr
Tue Feb 14 13:43:11 CET 2017


# HG changeset patch
# User Philippe Pepiot <philippe.pepiot at logilab.fr>
# Date 1487075346 -3600
#      Tue Feb 14 13:29:06 2017 +0100
# Node ID 5af13fdab86b0d3bc3b40a9090c7758b34e09c3c
# Parent  7c62d81ae3c3ff7d1215063ee7766eacae355d66
# Available At https://hg.logilab.org/review/cubes/oaipmh
#              hg pull https://hg.logilab.org/review/cubes/oaipmh -r 5af13fdab86b
# Tested at https://jenkins.logilab.org/job/cubicweb-oaipmh/6/
[views] switch to pyramid views

Use pyramid views instead of cubicweb views.

Use a single predicate to match verb and handle errors in errors(), if errors()
don't return None verb_content() is not called.

Fix some tests where metadataPrefix wasn't set explicitely, it was working
before since req.form keep old values along requests.

Depend on cubicweb[pyramid] >= 3.24.5

Related to extranet #11855076

diff --git a/__init__.py b/__init__.py
--- a/__init__.py
+++ b/__init__.py
@@ -90,3 +90,7 @@ class ResumptionToken(object):
         """Return string encoding state of the resumption token."""
         if self:
             return base64.urlsafe_b64encode(str(self))
+
+
+def includeme(config):
+    config.include('.pviews')
diff --git a/__pkginfo__.py b/__pkginfo__.py
--- a/__pkginfo__.py
+++ b/__pkginfo__.py
@@ -19,7 +19,7 @@ description = 'OAI-PMH server for CubicW
 web = 'http://www.cubicweb.org/project/%s' % distname
 
 __depends__ = {
-    'cubicweb': '>= 3.22.1',
+    'cubicweb[pyramid]': '>= 3.24.5',
     'six': '>= 1.4.0',
     'python-dateutil': None,
     'isodate': None,
diff --git a/entities.py b/entities.py
--- a/entities.py
+++ b/entities.py
@@ -263,13 +263,17 @@ class OAIPMHRecordAdapter(EntityAdapter)
         """
         return datetime_isoformat(self.date)
 
-    def metadata(self, prefix):
+    def metadata(self, prefix, request=None):
         """Return an XML-formatted representation of adapted entity."""
         try:
             _, vid = self.metadata_formats[prefix]
         except KeyError:
             raise NoRecordsMatch('unsupported metadata prefix "{0}"'.format(prefix))
-        data = self._cw.view(vid, w=None, rset=self.entity.as_rset())
+        if request is not None:
+            from cubicweb.pyramid.core import render_view
+            data = render_view(request, vid, w=None, rset=self.entity.as_rset())
+        else:
+            data = self._cw.view(vid, w=None, rset=self.entity.as_rset())
         if isinstance(data, unicode):
             # Underlying view may be 'binary' or not.
             data = data.encode('utf-8')
diff --git a/pviews.py b/pviews.py
new file mode 100644
--- /dev/null
+++ b/pviews.py
@@ -0,0 +1,524 @@
+# copyright 2017 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr -- mailto:contact at logilab.fr
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+"""cubicweb-oaipmh pyramid views for OAI-PMH export
+
+Set hierarchy specification
+---------------------------
+
+Sets_ are optional construct for selective harvesting. The scheme used to
+define the syntax of sets is usually specific to the community using the
+OAI-PMH exchange protocol. This OAI-PMH implementation exposes the following
+set hierarchy.
+
+- The first level of hierarchy refers to the entity type to perform the
+  selective request on, e.g. for a `ListIdentifiers` verb:
+
+      <baseurl>/oai?verb=ListIdentifiers&set=agent
+
+  would return the identifiers of entities of type Agent found in the repository.
+
+- The second level of hierarchy refers to a filtering criterion on selected
+  entity type, usually an attribute with respect to the application schema,
+  and is tight to a value of this criterion (attribute) to filter entities on.
+  For instance:
+
+      <baseurl>/oai?verb=ListIdentifiers&set=agent:kind:person
+
+  would return the identifiers of entities of type Agent of kind 'person'.
+
+.. _Set:
+.. _Sets: http://www.openarchives.org/OAI/openarchivesprotocol.html#Set
+"""
+
+from datetime import datetime, timedelta
+
+from isodate import datetime_isoformat
+import dateutil.parser
+from lxml import etree
+from lxml.builder import E, ElementMaker
+import pytz
+
+from pyramid.response import Response
+from pyramid.view import view_config
+
+from cubes.oaipmh import utcnow, OAIError, ResumptionToken
+
+
+def utcparse(timestr):
+    """Parse a date/time string as an UTC datetime."""
+    date = dateutil.parser.parse(timestr)
+    if date.tzinfo is None:
+        if 'T' in timestr:
+            raise ValueError('cannot parse a date with time but no timezone')
+        else:
+            # No time, assume UTC.
+            return date.replace(tzinfo=pytz.utc)
+    else:
+        # Convert to UTC.
+        return date.astimezone(pytz.utc)
+
+
+def filter_none(mapping):
+    """Return a dict from `mapping` with None values filtered out."""
+    out = {}
+    for key, val in mapping.iteritems():
+        if val is not None:
+            out[key] = val
+    return out
+
+
+def oai_records(rset):
+    """Yield OAIRecord items from a result set."""
+    for entity in rset.entities():
+        record = entity.cw_adapt_to('IOAIPMHRecord')
+        if not record:
+            continue
+        yield OAIRecord(record)
+
+
+def xml_metadataformat(prefix, mformat):
+    """Return the XML representation of a MetadataFormat object."""
+    return E.metadataFormat(
+        E.metadataPrefix(prefix),
+        E.schema(mformat.schema),
+        E.metadataNamespace(mformat.namespace),
+    )
+
+
+class IdDoesNotExist(OAIError):
+    """The value of the identifier argument is unknown or illegal in this
+    repository.
+    """
+
+    def __init__(self, identifier):
+        msg = 'no entity with OAI identifier {0} in repository'.format(
+            identifier)
+        errors = {'idDoesNotExist': msg}
+        super(IdDoesNotExist, self).__init__(errors)
+
+
+class OAIRequest(object):
+    """Represent an OAI-PMH request."""
+
+    @classmethod
+    def from_request(cls, baseurl, request):
+        form = request.params
+        return cls(
+            baseurl,
+            setspec=form.get('set'),
+            verb=form.get('verb'),
+            identifier=form.get('identifier'),
+            from_date=form.get('from'),
+            until_date=form.get('until'),
+            resumption_token=form.get('resumptionToken'),
+            metadata_prefix=form.get('metadataPrefix'),
+        )
+
+    def __init__(self, baseurl, verb=None, setspec=None, identifier=None,
+                 from_date=None, until_date=None, resumption_token=None,
+                 metadata_prefix=None):
+        self.baseurl = baseurl
+        self.verb = verb
+        self.setspec = setspec
+        self.identifier = identifier
+        self.errors = {}
+        self.resumption_token = ResumptionToken.parse(resumption_token)
+        # Parse "from" and "until" dates, which are, by specification
+        # expressed in UTC.
+        from_date = self.resumption_token.from_date or from_date
+        if from_date is not None:
+            from_date = utcparse(from_date)
+        until_date = self.resumption_token.until_date or until_date
+        if until_date is not None:
+            until_date = utcparse(until_date)
+        if (from_date is not None and until_date is not None and
+                from_date > until_date):
+            msg = 'the from argument must be less than or equal to the until argument'
+            self.errors['badArgument'] = msg
+        self.from_date = from_date
+        self.until_date = until_date
+        self.metadata_prefix = self.resumption_token.metadata_prefix or metadata_prefix
+
+    def __repr__(self):
+        return etree.tostring(self.to_xml())
+
+    def rset_from_identifier(self, cnx):
+        """Return a ResultSet corresponding to request identifier."""
+        oai = cnx.vreg['components'].select('oai', cnx)
+        return oai.match_identifier(self.identifier)
+
+    def rset(self, cnx):
+        """Return a result set with a resumptionToken information from OAI-PMH
+        request by using any provided set specifier, from/until dates and
+        previous token.
+        """
+        dates = {'from_date': self.from_date,
+                 'until_date': self.until_date}
+        oai = cnx.vreg['components'].select('oai', cnx)
+        rset, next_eid = oai.match(
+            self.setspec, metadata_prefix=self.metadata_prefix,
+            from_eid=self.resumption_token.eid, **dates)
+        if not rset:
+            if self.resumption_token:
+                raise OAIError(
+                    {'badResumptionToken': ('The value of the resumptionToken '
+                                            'argument is invalid or expired.')})
+            raise OAIError(
+                {'noRecordsMatch': ('The combination of the values of the '
+                                    'from, until, and set arguments results '
+                                    'in an empty list.')})
+        return rset, next_eid
+
+    def new_token(self, eid):
+        """Return a resumptionToken XML element or None built from request
+        parameters and the `eid` of the next entity to return upon
+        continuation of the request.
+
+        * return a resumptionToken with a value if there are more result to be
+        fetched;
+        * return an empty resumptionToken if this response completes and the
+        request contains a resumptionToken;
+        * None otherwise.
+        """
+        if eid is not None:
+            if self.resumption_token:
+                # Reuse previous token, just updating "eid" field.
+                token = self.resumption_token
+                token.eid = eid
+            else:
+                token = ResumptionToken(eid, self.setspec, self.from_date,
+                                        self.until_date, self.metadata_prefix)
+            expire = datetime_isoformat(
+                datetime.now(pytz.utc) + timedelta(hours=1))
+            return E.resumptionToken(token.encode(), expirationDate=expire)
+        elif self.resumption_token:
+            return E.resumptionToken()
+
+    def to_xml(self, errors=None):
+        if not errors:
+            # In cases where the request that generated this response resulted
+            # in a badVerb or badArgument error condition, the repository must
+            # return the base URL of the protocol request only. Attributes
+            # must not be provided in these cases.
+            attributes = {
+                'verb': self.verb,
+                'identifier': self.identifier,
+                'set': self.setspec,
+            }
+            if self.from_date:
+                attributes['from'] = datetime_isoformat(self.from_date)
+            if self.until_date:
+                attributes['until'] = datetime_isoformat(self.until_date)
+            if self.metadata_prefix:
+                attributes['metadataPrefix'] = self.metadata_prefix
+            if self.resumption_token:
+                attributes['resumptionToken'] = self.resumption_token.encode()
+        else:
+            attributes = {}
+        attributes = filter_none(attributes)
+        return E.request(self.baseurl, **attributes)
+
+
+class OAIResponse(object):
+    """Represent an OAI-PMH response."""
+
+    def __init__(self, oai_request):
+        self.oai_request = oai_request
+
+    @staticmethod
+    def _build_errors(errors):
+        """Return a list of <error> tag from `errors` dict."""
+        return [E.error(msg, code=code)
+                for code, msg in errors.iteritems()]
+
+    def body(self, content=None, errors=None):
+        """Return a list of body items of the OAI-PMH response."""
+        def check_content(content):
+            assert content is not None, (
+                'unexpected empty content while no error got reported')
+
+        oai_request = self.oai_request
+        if errors:
+            return self._build_errors(errors)
+        check_content(content)
+        try:
+            return [E(oai_request.verb, *content)]
+        except OAIError as exc:
+            return self._build_errors(exc.errors)
+        except TypeError:
+            # Usually something wrong with `content` generator, try to unpack
+            # it to get a meaningful error.
+            content = list(content)
+            check_content(content)
+            return [E(oai_request.verb, *content)]
+
+    def to_xml(self, content=None, errors=()):
+        date = E.responseDate(datetime_isoformat(utcnow()))
+        request = self.oai_request.to_xml(errors)
+        body_elems = self.body(content, errors=errors)
+        nsmap = {None: 'http://www.openarchives.org/OAI/2.0/',
+                 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}
+        maker = ElementMaker(nsmap=nsmap)
+        attributes = {
+            '{%s}schemaLocation' % nsmap['xsi']: ' '.join([
+                'http://www.openarchives.org/OAI/2.0/',
+                'http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd'
+            ])
+        }
+        return maker('OAI-PMH', date, request, *body_elems, **attributes)
+
+
+class OAIRecord(object):
+    """Represent an OAI record built from an entity adapted as IOAIPMHRecord.
+    """
+
+    def __init__(self, record):
+        self.record = record
+
+    def header(self, prefix):
+        """The <header> part of an OAI-PMH record.
+
+        See http://www.openarchives.org/OAI/openarchivesprotocol.html#header
+        for a description of elements of a "header".
+        """
+        if prefix not in self.record.metadata_formats:
+            raise OAIError(
+                {'noRecordsMatch': 'unsupported metadata prefix "{0}"'.format(prefix)})
+        # TODO: add setSpec tag for each record.
+        tags = []
+        for tag in ('identifier', 'datestamp'):
+            value = getattr(self.record, tag)
+            if value:
+                tags.append(E(tag, value))
+        attrs = {}
+        if self.record.deleted:
+            attrs['status'] = 'deleted'
+        return E.header(*tags, **attrs)
+
+    def metadata(self, prefix, request=None):
+        """The <metadata> part of an OAI-PMH record."""
+        if self.record.deleted:
+            return None
+        metadata = self.record.metadata(prefix, request=request)
+        if metadata:
+            return E.metadata(etree.XML(metadata))
+        return None
+
+    def metadata_formats(self):
+        for prefix, (fmt, _) in self.record.metadata_formats.iteritems():
+            yield xml_metadataformat(prefix, fmt)
+
+    def to_xml(self, prefix, request=None):
+        """Return the <record> XML element."""
+        elems = [self.header(prefix)]
+        metadata = self.metadata(prefix, request=request)
+        if metadata is not None:  # deleted record
+            elems.append(metadata)
+        return E.record(*elems)
+
+
+class OAIView(object):
+    """Base class for any OAI view, subclasses should either implement
+    `errors` or `verb_content` methods.
+    """
+
+    @staticmethod
+    def verb_content():
+        return
+
+    @staticmethod
+    def errors():
+        """Return the errors of the OAI-PMH request."""
+        return
+
+    def __init__(self, request):
+        self.request = request
+        self._cw = request.cw_cnx
+        baseurl = request.cw_cnx.build_url('oai')
+        self.oai_request = OAIRequest.from_request(
+            baseurl, request)
+
+    def __call__(self):
+        encoding = self._cw.encoding
+        assert encoding == 'UTF-8', 'unexpected encoding {0}'.format(encoding)
+        content = '<?xml version="1.0" encoding="%s"?>\n' % encoding
+        oai_response = OAIResponse(self.oai_request)
+        # combine errors coming from view selection with those of request
+        # processing.
+        errors = self.errors() or {}
+        verb_content = self.verb_content() if not errors else None
+        errors.update(self.oai_request.errors)
+        response_elem = oai_response.to_xml(verb_content, errors=errors)
+        content += etree.tostring(response_elem, encoding='utf-8')
+        return Response(content)
+
+
+ at view_config(route_name='oai')
+class OAIBaseView(OAIView):
+    """Base view for OAI-PMH request with no or bad "verb" specified.
+
+    `verb` request parameter is necessary in our implementation.
+    """
+
+    def errors(self):
+        verb = self.request.params.get('verb')
+        if verb:
+            return {'badVerb': 'illegal verb "{0}"'.format(self.oai_request.verb)}
+        else:
+            return {'badVerb': 'no verb specified'}
+
+
+ at view_config(route_name='oai', verb='Identify')
+class OAIIdentifyView(OAIView):
+    """View handling verb="Identify" requests."""
+
+    def errors(self):
+        if set(self.request.params) - set(['verb', 'vid']):
+            return {'badArgument': 'Identify accepts no argument'}
+
+    def verb_content(self):
+        oai = self._cw.vreg['components'].select('oai', self._cw)
+        yield E('repositoryName', self._cw.property_value('ui.site-title'))
+        # XXX Should match the URL rewrite rule.
+        yield E('baseURL', self._cw.build_url('oai'))
+        yield E('protocolVersion', '2.0')
+        try:
+            admin_email = self._cw.vreg.config['admin-email']
+        except KeyError:
+            pass
+        else:
+            yield E('adminEmail', admin_email)
+        oldest = self._cw.execute(
+            'Any D WHERE X creation_date D, X identity MX'
+            ' WITH MX BEING (Any MIN(X))')[0][0]
+        yield E('earliestDatestamp', datetime_isoformat(oldest))
+        yield E('deletedRecord', oai.deleted_handling)
+        yield E('granularity', 'YYYY-MM-DDThh:mm:ssZ')
+
+
+ at view_config(route_name='oai', verb='ListMetadataFormats')
+class OAIListMetadatFormatsByIdentifierView(OAIView):
+    """View handling verb="ListMetadataFormats" requests."""
+
+    def verb_content(self):
+        identifier = self.request.params.get('identifier')
+        if identifier:
+            rset = self.oai_request.rset_from_identifier(self._cw)
+            if not rset:
+                raise IdDoesNotExist(self.oai_request.identifier)
+            for record in oai_records(rset):
+                for fmt in record.metadata_formats():
+                    yield fmt
+        else:
+            oai = self._cw.vreg['components'].select('oai', self._cw)
+            for prefix, fmt in oai.metadata_formats():
+                yield xml_metadataformat(prefix, fmt)
+
+
+ at view_config(route_name='oai', verb='ListSets')
+class OAIListSetsView(OAIView):
+    """View handling verb="ListSets" requests."""
+
+    @staticmethod
+    def build_set(spec, name=u''):
+        """Return a "set" element"""
+        return E('set', E.setSpec(spec), E.setName(name))
+
+    def verb_content(self):
+        oai = self._cw.vreg['components'].select('oai', self._cw)
+        for spec, description in oai.setspecs():
+            yield self.build_set(spec, name=self._cw._(description))
+
+
+ at view_config(route_name='oai', verb='ListIdentifiers')
+class OAIListIdentifiersWithSetView(OAIView):
+    """View handling verb="ListIdentifiers" requests with "set" selection."""
+
+    def errors(self):
+        if 'metadataPrefix' not in self.request.params:
+            return {'badArgument': ('ListIdentifiers verb requires a "metadataPrefix" '
+                                    'restriction')}
+
+    def verb_content(self):
+        rset, token = self.oai_request.rset(self._cw)
+        for record in oai_records(rset):
+            yield record.header(self.oai_request.metadata_prefix)
+        new_token = self.oai_request.new_token(token)
+        if new_token is not None:
+            yield new_token
+
+
+ at view_config(route_name='oai', verb='ListRecords')
+class OAIListRecordsView(OAIView):
+    """View handling verb="ListRecords"."""
+
+    def errors(self):
+        params = set(self.request.params) & set({'metadataPrefix', 'resumptionToken'})
+        if not params:
+            return {'badArgument': ('ListRecords verb requires a "metadataPrefix" '
+                                    'restriction')}
+
+    def verb_content(self):
+        rset, token = self.oai_request.rset(self._cw)
+        for record in oai_records(rset):
+            yield record.to_xml(self.oai_request.metadata_prefix, request=self.request)
+        new_token = self.oai_request.new_token(token)
+        if new_token is not None:
+            yield new_token
+
+
+ at view_config(route_name='oai', verb='GetRecord')
+class OAIGetRecordView(OAIView):
+    """View handling verb="GetRecord" with proper arguments."""
+
+    def errors(self):
+        params = set(self.request.params) & set({'identifier', 'metadataPrefix'})
+        if not params:
+            return {'badArgument': ('GetRecord verb requires "identifier" and '
+                                    '"metadataPrefix" arguments')}
+        elif params == set({'identifier'}):
+            return {'badArgument': 'GetRecord verb requires "metadataPrefix" restriction'}
+        elif params == set({'metadataPrefix'}):
+            return {'badArgument': 'GetRecord verb requires "identifier" restriction'}
+
+    def verb_content(self):
+        rset = self.oai_request.rset_from_identifier(self._cw)
+        for record in oai_records(rset):
+            if record is not None:
+                yield record.to_xml(self.oai_request.metadata_prefix, request=self.request)
+                break
+        else:
+            raise IdDoesNotExist(self.oai_request.identifier)
+
+
+class VerbPredicate(object):
+
+    def __init__(self, val, config):
+        self.val = val
+
+    def text(self):
+        return 'verb = %s' % (self.val,)
+
+    phash = text
+
+    def __call__(self, context, request):
+        return request.params.get('verb') == self.val
+
+
+def includeme(config):
+    config.add_route('oai', '/poai')
+    config.add_view_predicate('verb', VerbPredicate)
+    config.scan(__name__)
diff --git a/test/data/pyramid.ini b/test/data/pyramid.ini
new file mode 100644
--- /dev/null
+++ b/test/data/pyramid.ini
@@ -0,0 +1,7 @@
+[main]
+# to get traceback properly in tests
+cubicweb.bwcompat = no
+# avoid noise in test output
+cubicweb.session.secret = secret
+cubicweb.auth.authtkt.session.secret = secret
+cubicweb.auth.authtkt.persistent.secret = secret
diff --git a/test/test_oaipmh.py b/test/test_oaipmh.py
--- a/test/test_oaipmh.py
+++ b/test/test_oaipmh.py
@@ -30,10 +30,11 @@ from six import text_type
 from logilab.common import tempattr
 
 from cubicweb.devtools.testlib import CubicWebTC
+from cubicweb.pyramid.test import PyramidCWTest
 
 from cubes.oaipmh import utcnow, MetadataFormat, ResumptionToken
 from cubes.oaipmh.entities import OAIComponent, NoRecordsMatch
-from cubes.oaipmh.views import OAIError, OAIRequest
+from cubes.oaipmh.pviews import OAIError, OAIRequest
 
 
 def agent(cnx, name=None, kind=u'person', **kwargs):
@@ -261,7 +262,7 @@ def xmlpp(string):
     print(etree.tostring(etree.fromstring(string), pretty_print=True))
 
 
-class OAIPMHViewsTC(CubicWebTC, OAITestMixin):
+class OAIPMHViewsTC(PyramidCWTest, OAITestMixin):
 
     _validate_xml = True
     _debug_xml = True
@@ -278,8 +279,7 @@ class OAIPMHViewsTC(CubicWebTC, OAITestM
         xmlschema.assertValid(root)
 
     def oai_request(self, req, **formparams):
-        req.form.update(formparams)
-        out = self.app_handle_request(req, 'oai')
+        out = self.webapp.get('/poai', formparams).body
         if self._validate_xml:
             self.assertXmlValid(out, self.datapath('OAI-PMH.xsd'), debug=self._debug_xml)
         return out
@@ -423,10 +423,10 @@ class OAIPMHViewsTC(CubicWebTC, OAITestM
 
         def check_request(expected_identifiers, token=None):
             with self.admin_access.web_request() as req:
+                kwargs = {'resumptionToken': token} if token is not None else {}
                 result = self.oai_request(
                     req, verb='ListIdentifiers', set='agent',
-                    metadataPrefix='oai_dc',
-                    resumptionToken=token)
+                    metadataPrefix='oai_dc', **kwargs)
                 # Ensure there are as many <identifier> tag than expected items.
                 self.assertEqual(
                     result.count('<identifier>'), len(expected_identifiers))
@@ -491,6 +491,7 @@ class OAIPMHViewsTC(CubicWebTC, OAITestM
             self.assertIn('<identifier>{0}</identifier>'.format(alice), result)
             self.assertEqual(result.count('<identifier>'), 2)
             result = self.oai_request(req, verb='ListIdentifiers',
+                                      metadataPrefix='oai_dc',
                                       set='agent:kind:person')
             self.assertIn('<identifier>{0}</identifier>'.format(alice),
                           result)
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -5,6 +5,7 @@ envlist = py27,flake8
 sitepackages = true
 deps =
     pytest
+    webtest
 commands =
   {envpython} -m pytest {posargs:{toxinidir}/test}
 
diff --git a/views.py b/views.py
deleted file mode 100644
--- a/views.py
+++ /dev/null
@@ -1,585 +0,0 @@
-# copyright 2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
-# contact http://www.logilab.fr -- mailto:contact at logilab.fr
-#
-# This program is free software: you can redistribute it and/or modify it under
-# the terms of the GNU Lesser General Public License as published by the Free
-# Software Foundation, either version 2.1 of the License, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
-# details.
-#
-# You should have received a copy of the GNU Lesser General Public License along
-# with this program. If not, see <http://www.gnu.org/licenses/>.
-"""cubicweb-oaipmh views for OAI-PMH export
-
-Set hierarchy specification
----------------------------
-
-Sets_ are optional construct for selective harvesting. The scheme used to
-define the syntax of sets is usually specific to the community using the
-OAI-PMH exchange protocol. This OAI-PMH implementation exposes the following
-set hierarchy.
-
-- The first level of hierarchy refers to the entity type to perform the
-  selective request on, e.g. for a `ListIdentifiers` verb:
-
-      <baseurl>/oai?verb=ListIdentifiers&set=agent
-
-  would return the identifiers of entities of type Agent found in the repository.
-
-- The second level of hierarchy refers to a filtering criterion on selected
-  entity type, usually an attribute with respect to the application schema,
-  and is tight to a value of this criterion (attribute) to filter entities on.
-  For instance:
-
-      <baseurl>/oai?verb=ListIdentifiers&set=agent:kind:person
-
-  would return the identifiers of entities of type Agent of kind 'person'.
-
-.. _Set:
-.. _Sets: http://www.openarchives.org/OAI/openarchivesprotocol.html#Set
-"""
-
-from datetime import datetime, timedelta
-
-from isodate import datetime_isoformat
-import dateutil.parser
-from lxml import etree
-from lxml.builder import E, ElementMaker
-import pytz
-
-from logilab.common.registry import objectify_predicate
-
-from cubicweb.predicates import ExpectedValuePredicate, match_form_params
-from cubicweb.view import View
-from cubicweb.web.views import urlrewrite
-
-from cubes.oaipmh import utcnow, OAIError, ResumptionToken
-
-
-def utcparse(timestr):
-    """Parse a date/time string as an UTC datetime."""
-    date = dateutil.parser.parse(timestr)
-    if date.tzinfo is None:
-        if 'T' in timestr:
-            raise ValueError('cannot parse a date with time but no timezone')
-        else:
-            # No time, assume UTC.
-            return date.replace(tzinfo=pytz.utc)
-    else:
-        # Convert to UTC.
-        return date.astimezone(pytz.utc)
-
-
-class match_verb(ExpectedValuePredicate):
-    """Predicate checking `verb` request form parameter presence and value.
-
-    Return 2 in case of match, for precedence over
-    ``match_form_params('verb')``.
-    """
-
-    def __call__(self, cls, req, rset=None, **kwargs):
-        verb = req.form.get('verb')
-        if verb is None:
-            return 0
-        return 2 * int(verb in self.expected)
-
-
-def filter_none(mapping):
-    """Return a dict from `mapping` with None values filtered out."""
-    out = {}
-    for key, val in mapping.iteritems():
-        if val is not None:
-            out[key] = val
-    return out
-
-
-def oai_records(rset):
-    """Yield OAIRecord items from a result set."""
-    for entity in rset.entities():
-        record = entity.cw_adapt_to('IOAIPMHRecord')
-        if not record:
-            continue
-        yield OAIRecord(record)
-
-
-def xml_metadataformat(prefix, mformat):
-    """Return the XML representation of a MetadataFormat object."""
-    return E.metadataFormat(
-        E.metadataPrefix(prefix),
-        E.schema(mformat.schema),
-        E.metadataNamespace(mformat.namespace),
-    )
-
-
-class IdDoesNotExist(OAIError):
-    """The value of the identifier argument is unknown or illegal in this
-    repository.
-    """
-
-    def __init__(self, identifier):
-        msg = 'no entity with OAI identifier {0} in repository'.format(
-            identifier)
-        errors = {'idDoesNotExist': msg}
-        super(IdDoesNotExist, self).__init__(errors)
-
-
-class OAIRequest(object):
-    """Represent an OAI-PMH request."""
-
-    @classmethod
-    def from_request(cls, baseurl, request):
-        form = request.form
-        return cls(
-            baseurl,
-            setspec=form.get('set'),
-            verb=form.get('verb'),
-            identifier=form.get('identifier'),
-            from_date=form.get('from'),
-            until_date=form.get('until'),
-            resumption_token=form.get('resumptionToken'),
-            metadata_prefix=form.get('metadataPrefix'),
-        )
-
-    def __init__(self, baseurl, verb=None, setspec=None, identifier=None,
-                 from_date=None, until_date=None, resumption_token=None,
-                 metadata_prefix=None):
-        self.baseurl = baseurl
-        self.verb = verb
-        self.setspec = setspec
-        self.identifier = identifier
-        self.errors = {}
-        self.resumption_token = ResumptionToken.parse(resumption_token)
-        # Parse "from" and "until" dates, which are, by specification
-        # expressed in UTC.
-        from_date = self.resumption_token.from_date or from_date
-        if from_date is not None:
-            from_date = utcparse(from_date)
-        until_date = self.resumption_token.until_date or until_date
-        if until_date is not None:
-            until_date = utcparse(until_date)
-        if (from_date is not None and until_date is not None and
-                from_date > until_date):
-            msg = 'the from argument must be less than or equal to the until argument'
-            self.errors['badArgument'] = msg
-        self.from_date = from_date
-        self.until_date = until_date
-        self.metadata_prefix = self.resumption_token.metadata_prefix or metadata_prefix
-
-    def __repr__(self):
-        return etree.tostring(self.to_xml())
-
-    def rset_from_identifier(self, cnx):
-        """Return a ResultSet corresponding to request identifier."""
-        oai = cnx.vreg['components'].select('oai', cnx)
-        return oai.match_identifier(self.identifier)
-
-    def rset(self, cnx):
-        """Return a result set with a resumptionToken information from OAI-PMH
-        request by using any provided set specifier, from/until dates and
-        previous token.
-        """
-        dates = {'from_date': self.from_date,
-                 'until_date': self.until_date}
-        oai = cnx.vreg['components'].select('oai', cnx)
-        rset, next_eid = oai.match(
-            self.setspec, metadata_prefix=self.metadata_prefix,
-            from_eid=self.resumption_token.eid, **dates)
-        if not rset:
-            if self.resumption_token:
-                raise OAIError(
-                    {'badResumptionToken': ('The value of the resumptionToken '
-                                            'argument is invalid or expired.')})
-            raise OAIError(
-                {'noRecordsMatch': ('The combination of the values of the '
-                                    'from, until, and set arguments results '
-                                    'in an empty list.')})
-        return rset, next_eid
-
-    def new_token(self, eid):
-        """Return a resumptionToken XML element or None built from request
-        parameters and the `eid` of the next entity to return upon
-        continuation of the request.
-
-        * return a resumptionToken with a value if there are more result to be
-        fetched;
-        * return an empty resumptionToken if this response completes and the
-        request contains a resumptionToken;
-        * None otherwise.
-        """
-        if eid is not None:
-            if self.resumption_token:
-                # Reuse previous token, just updating "eid" field.
-                token = self.resumption_token
-                token.eid = eid
-            else:
-                token = ResumptionToken(eid, self.setspec, self.from_date,
-                                        self.until_date, self.metadata_prefix)
-            expire = datetime_isoformat(
-                datetime.now(pytz.utc) + timedelta(hours=1))
-            return E.resumptionToken(token.encode(), expirationDate=expire)
-        elif self.resumption_token:
-            return E.resumptionToken()
-
-    def to_xml(self, errors=None):
-        if not errors:
-            # In cases where the request that generated this response resulted
-            # in a badVerb or badArgument error condition, the repository must
-            # return the base URL of the protocol request only. Attributes
-            # must not be provided in these cases.
-            attributes = {
-                'verb': self.verb,
-                'identifier': self.identifier,
-                'set': self.setspec,
-            }
-            if self.from_date:
-                attributes['from'] = datetime_isoformat(self.from_date)
-            if self.until_date:
-                attributes['until'] = datetime_isoformat(self.until_date)
-            if self.metadata_prefix:
-                attributes['metadataPrefix'] = self.metadata_prefix
-            if self.resumption_token:
-                attributes['resumptionToken'] = self.resumption_token.encode()
-        else:
-            attributes = {}
-        attributes = filter_none(attributes)
-        return E.request(self.baseurl, **attributes)
-
-
-class OAIResponse(object):
-    """Represent an OAI-PMH response."""
-
-    def __init__(self, oai_request):
-        self.oai_request = oai_request
-
-    @staticmethod
-    def _build_errors(errors):
-        """Return a list of <error> tag from `errors` dict."""
-        return [E.error(msg, code=code)
-                for code, msg in errors.iteritems()]
-
-    def body(self, content=None, errors=None):
-        """Return a list of body items of the OAI-PMH response."""
-        def check_content(content):
-            assert content is not None, (
-                'unexpected empty content while no error got reported')
-
-        oai_request = self.oai_request
-        if errors:
-            return self._build_errors(errors)
-        check_content(content)
-        try:
-            return [E(oai_request.verb, *content)]
-        except OAIError as exc:
-            return self._build_errors(exc.errors)
-        except TypeError:
-            # Usually something wrong with `content` generator, try to unpack
-            # it to get a meaningful error.
-            content = list(content)
-            check_content(content)
-            return [E(oai_request.verb, *content)]
-
-    def to_xml(self, content=None, errors=()):
-        date = E.responseDate(datetime_isoformat(utcnow()))
-        request = self.oai_request.to_xml(errors)
-        body_elems = self.body(content, errors=errors)
-        nsmap = {None: 'http://www.openarchives.org/OAI/2.0/',
-                 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}
-        maker = ElementMaker(nsmap=nsmap)
-        attributes = {
-            '{%s}schemaLocation' % nsmap['xsi']: ' '.join([
-                'http://www.openarchives.org/OAI/2.0/',
-                'http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd'
-            ])
-        }
-        return maker('OAI-PMH', date, request, *body_elems, **attributes)
-
-
-class OAIRecord(object):
-    """Represent an OAI record built from an entity adapted as IOAIPMHRecord.
-    """
-
-    def __init__(self, record):
-        self.record = record
-
-    def header(self, prefix):
-        """The <header> part of an OAI-PMH record.
-
-        See http://www.openarchives.org/OAI/openarchivesprotocol.html#header
-        for a description of elements of a "header".
-        """
-        if prefix not in self.record.metadata_formats:
-            raise OAIError(
-                {'noRecordsMatch': 'unsupported metadata prefix "{0}"'.format(prefix)})
-        # TODO: add setSpec tag for each record.
-        tags = []
-        for tag in ('identifier', 'datestamp'):
-            value = getattr(self.record, tag)
-            if value:
-                tags.append(E(tag, value))
-        attrs = {}
-        if self.record.deleted:
-            attrs['status'] = 'deleted'
-        return E.header(*tags, **attrs)
-
-    def metadata(self, prefix):
-        """The <metadata> part of an OAI-PMH record."""
-        if self.record.deleted:
-            return None
-        metadata = self.record.metadata(prefix)
-        if metadata:
-            return E.metadata(etree.XML(metadata))
-        return None
-
-    def metadata_formats(self):
-        for prefix, (fmt, _) in self.record.metadata_formats.iteritems():
-            yield xml_metadataformat(prefix, fmt)
-
-    def to_xml(self, prefix):
-        """Return the <record> XML element."""
-        elems = [self.header(prefix)]
-        metadata = self.metadata(prefix)
-        if metadata is not None:  # deleted record
-            elems.append(metadata)
-        return E.record(*elems)
-
-
-class OAIPMHRewriter(urlrewrite.SimpleReqRewriter):
-    rules = [('/oai', dict(vid='oai'))]
-
-
-class OAIView(View):
-    """Base class for any OAI view, subclasses should either implement
-    `errors` or `verb_content` methods.
-    """
-    __regid__ = 'oai'
-    __abstract__ = True
-    templatable = False
-    content_type = 'text/xml'
-    binary = True
-
-    @staticmethod
-    def verb_content():
-        return
-
-    @staticmethod
-    def errors():
-        """Return the errors of the OAI-PMH request."""
-        return {}
-
-    def __init__(self, *args, **kwargs):
-        super(OAIView, self).__init__(*args, **kwargs)
-        self.oai_request = OAIRequest.from_request(
-            self._cw.build_url('oai'), self._cw)
-
-    def call(self):
-        encoding = self._cw.encoding
-        assert encoding == 'UTF-8', 'unexpected encoding {0}'.format(encoding)
-        self.w('<?xml version="1.0" encoding="%s"?>\n' % encoding)
-        oai_response = OAIResponse(self.oai_request)
-        # combine errors coming from view selection with those of request
-        # processing.
-        errors = self.errors()
-        errors.update(self.oai_request.errors)
-        response_elem = oai_response.to_xml(self.verb_content(), errors=errors)
-        self.w(etree.tostring(response_elem, encoding='utf-8'))
-
-
-class OAIBaseView(OAIView):
-    """Base view for OAI-PMH request with no "verb" specified.
-
-    `verb` request parameter is necessary in our implementation.
-    """
-
-    def errors(self):
-        return {'badVerb': 'no verb specified'}
-
-
-class OAIWithVerbView(OAIView):
-    """Base view for OAI-PMH request with a "verb" specified.
-
-    This view generated an error as the implementation relies on explicit view
-    to handle supported verbs.
-    """
-    __select__ = match_form_params('verb')
-
-    def errors(self):
-        """Return the errors of the OAI-PMH request."""
-        return {'badVerb': 'illegal verb "{0}"'.format(self.oai_request.verb)}
-
-
-class OAIIdentifyView(OAIView):
-    """View handling verb="Identify" requests."""
-    __select__ = match_verb('Identify')
-
-    def verb_content(self):
-        oai = self._cw.vreg['components'].select('oai', self._cw)
-        yield E('repositoryName', self._cw.property_value('ui.site-title'))
-        # XXX Should match the URL rewrite rule.
-        yield E('baseURL', self._cw.build_url('oai'))
-        yield E('protocolVersion', '2.0')
-        try:
-            admin_email = self._cw.vreg.config['admin-email']
-        except KeyError:
-            pass
-        else:
-            yield E('adminEmail', admin_email)
-        oldest = self._cw.execute(
-            'Any D WHERE X creation_date D, X identity MX'
-            ' WITH MX BEING (Any MIN(X))')[0][0]
-        yield E('earliestDatestamp', datetime_isoformat(oldest))
-        yield E('deletedRecord', oai.deleted_handling)
-        yield E('granularity', 'YYYY-MM-DDThh:mm:ssZ')
-
-
- at objectify_predicate
-def no_params_in_form(cls, req, **kwargs):
-    """Return 1 if req.form only has "verb" (and "vid") parameters."""
-    extra = set(req.form) - set(['verb', 'vid'])
-    return 0 if extra else 1
-
-
-class OAIIdentifyBadArgumentsView(OAIView):
-    """View handling verb="Identify" requests but with bad arguments"""
-    __select__ = match_verb('Identify') & ~no_params_in_form()
-
-    def errors(self):
-        return {
-            'badArgument': 'Identify accepts no argument'}
-
-
-class OAIListMetadatFormatsView(OAIView):
-    """View handling verb="ListMetadataFormats" requests for the whole
-    repository.
-    """
-    __select__ = match_verb('ListMetadataFormats')
-
-    def verb_content(self):
-        oai = self._cw.vreg['components'].select('oai', self._cw)
-        for prefix, fmt in oai.metadata_formats():
-            yield xml_metadataformat(prefix, fmt)
-
-
-class OAIListMetadatFormatsByIdentifierView(OAIView):
-    """View handling verb="ListMetadataFormats" requests."""
-    __select__ = (match_verb('ListMetadataFormats')
-                  & match_form_params('identifier'))
-
-    def verb_content(self):
-        rset = self.oai_request.rset_from_identifier(self._cw)
-        if not rset:
-            raise IdDoesNotExist(self.oai_request.identifier)
-        for record in oai_records(rset):
-            for fmt in record.metadata_formats():
-                yield fmt
-
-
-class OAIListSetsView(OAIView):
-    """View handling verb="ListSets" requests."""
-    __select__ = match_verb('ListSets')
-
-    @staticmethod
-    def build_set(spec, name=u''):
-        """Return a "set" element"""
-        return E('set', E.setSpec(spec), E.setName(name))
-
-    def verb_content(self):
-        oai = self._cw.vreg['components'].select('oai', self._cw)
-        for spec, description in oai.setspecs():
-            yield self.build_set(spec, name=self._cw._(description))
-
-
-class OAIListIdentifiersView(OAIView):
-    """View handling verb="ListIdentifiers" requests.
-
-    This view returns an error as it handles cases where no "set" selection is
-    specified.
-    """
-    __select__ = match_verb('ListIdentifiers')
-
-    def errors(self):
-        return {
-            'badArgument': 'ListIdentifiers verb requires a "metadataPrefix" restriction'}
-
-
-class OAIListIdentifiersWithSetView(OAIView):
-    """View handling verb="ListIdentifiers" requests with "set" selection."""
-    __select__ = match_verb('ListIdentifiers') & match_form_params('metadataPrefix')
-
-    def verb_content(self):
-        rset, token = self.oai_request.rset(self._cw)
-        for record in oai_records(rset):
-            yield record.header(self.oai_request.metadata_prefix)
-        new_token = self.oai_request.new_token(token)
-        if new_token is not None:
-            yield new_token
-
-
-class OAIListRecordsErrorView(OAIView):
-    """View handling verb="ListRecords" requests when arguments are missing."""
-    __select__ = match_verb('ListRecords')
-
-    def errors(self):
-        return {
-            'badArgument': 'ListRecords verb requires a "metadataPrefix" restriction'}
-
-
-class OAIListRecordsView(OAIView):
-    """View handling verb="ListRecords"."""
-    __select__ = (match_verb('ListRecords')
-                  & (match_form_params('metadataPrefix')
-                     | match_form_params('resumptionToken')))
-
-    def verb_content(self):
-        rset, token = self.oai_request.rset(self._cw)
-        for record in oai_records(rset):
-            yield record.to_xml(self.oai_request.metadata_prefix)
-        new_token = self.oai_request.new_token(token)
-        if new_token is not None:
-            yield new_token
-
-
-class OAIGetRecordErrorView(OAIView):
-    """View handling verb="GetRecord" requests when arguments are missing."""
-    __select__ = match_verb('GetRecord')
-
-    def errors(self):
-        return {'badArgument': ('GetRecord verb requires "identifier" and '
-                                '"metadataPrefix" arguments')}
-
-
-class OAIGetRecordMissingIdentifierView(OAIView):
-    """View handling verb="GetRecord" requests when "identifier" is missing.
-    """
-    __select__ = match_verb('GetRecord') & match_form_params('metadataPrefix')
-
-    def errors(self):
-        return {'badArgument': 'GetRecord verb requires "identifier" restriction'}
-
-
-class OAIGetRecordMissingMetadataPrefixView(OAIView):
-    """View handling verb="GetRecord" requests when metadataPrefix argument is
-    missing.
-    """
-    __select__ = match_verb('GetRecord') & match_form_params('identifier')
-
-    def errors(self):
-        return {'badArgument': 'GetRecord verb requires "metadataPrefix" restriction'}
-
-
-class OAIGetRecordView(OAIView):
-    """View handling verb="GetRecord" with proper arguments."""
-    __select__ = (match_verb('GetRecord')
-                  & match_form_params('identifier', 'metadataPrefix'))
-
-    def verb_content(self):
-        rset = self.oai_request.rset_from_identifier(self._cw)
-        for record in oai_records(rset):
-            if record is not None:
-                yield record.to_xml(self.oai_request.metadata_prefix)
-                break
-        else:
-            raise IdDoesNotExist(self.oai_request.identifier)



More information about the saem-devel mailing list