rero · sebdeleze · Feb 12, 2021 · Feb 1, 2021
diff --git a/sonar/modules/documents/serializers/__init__.py b/sonar/modules/documents/serializers/__init__.py
@@ -26,7 +26,14 @@
     search_responsify
 
 from sonar.modules.documents.serializers.dc import SonarDublinCoreSerializer
+from sonar.modules.documents.serializers.google_scholar import \
+    SonarGoogleScholarSerializer
+from sonar.modules.documents.serializers.schemaorg import \
+    SonarSchemaOrgSerializer
 from sonar.modules.documents.serializers.schemas.dc import DublinCoreV1
+from sonar.modules.documents.serializers.schemas.google_scholar import \
+    GoogleScholarV1
+from sonar.modules.documents.serializers.schemas.schemaorg import SchemaOrgV1
 from sonar.modules.organisations.api import OrganisationRecord
 from sonar.modules.serializers import JSONSerializer as _JSONSerializer
 from sonar.modules.users.api import current_user_record
@@ -76,6 +83,11 @@ def post_process_serialize_search(self, results, pid_fetcher):
 json_v1 = JSONSerializer(DocumentSchemaV1)
 #: Dublin Core serializer
 dc_v1 = SonarDublinCoreSerializer(DublinCoreV1, replace_refs=True)
+#: schema.org serializer
+schemaorg_v1 = SonarSchemaOrgSerializer(SchemaOrgV1, replace_refs=True)
+#: google scholar serializer
+google_scholar_v1 = SonarGoogleScholarSerializer(GoogleScholarV1,
+                                                 replace_refs=True)
 
 # Records-REST serializers
 # ========================

diff --git a/sonar/modules/documents/serializers/google_scholar.py b/sonar/modules/documents/serializers/google_scholar.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Google scholar serializer."""
+
+from invenio_records_rest.serializers.base import PreprocessorMixin, \
+    SerializerMixinInterface
+from invenio_records_rest.serializers.marshmallow import MarshmallowMixin
+
+
+class SonarGoogleScholarSerializer(SerializerMixinInterface, MarshmallowMixin,
+                                   PreprocessorMixin):
+    """Google scholar serializer."""
+
+    def dump(self, obj, context=None):
+        """Serialize object with schema.
+
+        Mandatory to override this method, as invenio-records-rest does not
+        use the right way to dump objects (compatible with marshmallow 3.9).
+        """
+        return self.schema_class(context=context).dump(obj)
+
+    def transform_record(self, pid, record, links_factory=None, **kwargs):
+        """Transform record in metas for Google scholar."""
+        data = super(SonarGoogleScholarSerializer,
+                     self).transform_record(pid, record, links_factory,
+                                            **kwargs)
+
+        metas = []
+        meta_template = '<meta name="citation_{key}" content="{value}">'
+        for key, value in data.items():
+            if isinstance(value, list):
+                for listValue in value:
+                    metas.append(meta_template.format(key=key,
+                                                      value=listValue))
+            else:
+                metas.append(meta_template.format(key=key, value=value))
+
+        return '\n'.join(metas)
diff --git a/sonar/modules/documents/serializers/schemaorg.py b/sonar/modules/documents/serializers/schemaorg.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""schema.org serializer."""
+
+from invenio_records_rest.serializers.base import PreprocessorMixin, \
+    SerializerMixinInterface
+from invenio_records_rest.serializers.marshmallow import MarshmallowMixin
+
+
+class SonarSchemaOrgSerializer(SerializerMixinInterface, MarshmallowMixin,
+                               PreprocessorMixin):
+    """Marshmallow based schema.org serializer for records."""
+
+    def dump(self, obj, context=None):
+        """Serialize object with schema.
+
+        Mandatory to override this method, as invenio-records-rest does not
+        use the right way to dump objects (compatible with marshmallow 3.9).
+        """
+        return self.schema_class(context=context).dump(obj)
diff --git a/sonar/modules/documents/serializers/schemas/__init__.py b/sonar/modules/documents/serializers/schemas/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Schemas for serialization."""
diff --git a/sonar/modules/documents/serializers/schemas/base_schema.py b/sonar/modules/documents/serializers/schemas/base_schema.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Base schema for marshmallow serialization."""
+
+import re
+
+from flask import request
+from marshmallow import Schema, pre_dump
+
+from sonar.modules.documents.api import DocumentRecord
+from sonar.modules.documents.utils import has_external_urls_for_files, \
+    populate_files_properties
+
+
+class BaseSchema(Schema):
+    """Base schema for marshmallow serialization."""
+
+    @pre_dump
+    def pre_dump(self, item, **kwargs):
+        """Do some transformations in record before dumping it.
+
+        - Store the main file to use it in methods.
+        - Check if files must point to an external URL.
+        - Populate restrictions, thumbnail and URL in files.
+
+        :param item: Item object to process
+        :returns: Modified item
+        """
+        if not item['metadata'].get('_files'):
+            return item
+
+        # Store the main file
+        main_file = self.get_main_file(item)
+        if main_file:
+            item['metadata']['mainFile'] = main_file
+
+        # Check if organisation record forces to point file to an external url
+        item['metadata']['external_url'] = has_external_urls_for_files(
+            item['metadata'])
+
+        # Add restriction, link and thumbnail to files
+        populate_files_properties(item['metadata'])
+
+        return item
+
+    def get_main_file(self, obj):
+        """Return the main file.
+
+        :param obj: Record dict.
+        :returns: Main file or None.
+        """
+        files = [
+            file for file in obj['metadata'].get('_files', [])
+            if file.get('type') == 'file'
+        ]
+        files = sorted(files, key=lambda file: file.get('order', 100))
+        return files[0] if files else None
+
+    def get_id(self, obj):
+        """Get id."""
+        return DocumentRecord.get_permanent_link(request.host_url,
+                                                 obj['metadata']['pid'])
+
+    def get_title(self, obj):
+        """Get title."""
+        for title in obj['metadata'].get('title', []):
+            return title['mainTitle'][0]['value']
+
+        return None
+
+    def get_start_date(self, obj):
+        """Get start date."""
+        for provision_activity in obj['metadata'].get('provisionActivity', []):
+            if provision_activity[
+                    'type'] == 'bf:Publication' and provision_activity.get(
+                        'startDate'):
+                return provision_activity['startDate']
+
+        return None
+
+    def get_keywords(self, obj):
+        """Get keywords."""
+        items = []
+
+        for subjects in obj['metadata'].get('subjects', []):
+            items = items + subjects['label']['value']
+
+        return items
+
+    def get_url(self, obj):
+        """Get url."""
+        if obj['metadata'].get('mainFile', {}).get('links'):
+            if obj['metadata']['mainFile']['links'].get('download'):
+                return '{host}{image}'.format(
+                    host=request.host_url.rstrip('/'),
+                    image=obj['metadata']['mainFile']['links']['download'])
+
+            if obj['metadata']['mainFile']['links'].get('external'):
+                return obj['metadata']['mainFile']['links']['external']
+
+        return None
+
+    def get_pages(self, obj):
+        """Get pages.
+
+        :param obj: Record dict.
+        :returns: Pages stored in partOf
+        """
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('numberingPages'):
+                return part_of['numberingPages']
+
+        return None
+
+    def get_first_page(self, obj):
+        """Get the first page.
+
+        :param obj: Record dict.
+        :returns: The first page.
+        """
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('numberingPages'):
+                matches = re.match(r'^([0-9]+)', part_of['numberingPages'])
+
+                return matches.group(1) if matches else None
+
+        return None
+
+    def get_last_page(self, obj):
+        """Get the last page.
+
+        :param obj: Record dict.
+        :returns: The last page.
+        """
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('numberingPages'):
+                matches = re.match(r'^[0-9]+\-([0-9]+)',
+                                   part_of['numberingPages'])
+
+                return matches.group(1) if matches else None
+
+        return None
diff --git a/sonar/modules/documents/serializers/schemas/dc.py b/sonar/modules/documents/serializers/schemas/dc.py
@@ -20,15 +20,15 @@
 import re
 
 from flask import request
-from marshmallow import Schema, fields, pre_dump
+from marshmallow import fields
 
 from sonar.modules.documents.api import DocumentRecord
-from sonar.modules.documents.utils import has_external_urls_for_files, \
-    populate_files_properties
 from sonar.modules.documents.views import part_of_format
 
+from .base_schema import BaseSchema
 
-class DublinCoreV1(Schema):
+
+class DublinCoreV1(BaseSchema):
     """Schema for records v1 in JSON."""
 
     contributors = fields.Method('get_contributors')
@@ -46,34 +46,6 @@ class DublinCoreV1(Schema):
     titles = fields.Method('get_titles')
     types = fields.Method('get_types')
 
-    @pre_dump
-    def pre_dump(self, item, **kwargs):
-        """Do some transformations in record before dumping it.
-
-        - Store the main file to use it in methods.
-        - Check if files must point to an external URL.
-        - Populate restrictions, thumbnail and URL in files.
-
-        :param item: Item object to process
-        :returns: Modified item
-        """
-        if not item['metadata'].get('_files'):
-            return item
-
-        # Store the main file
-        main_file = self.get_main_file(item)
-        if main_file:
-            item['metadata']['mainFile'] = main_file
-
-        # Check if organisation record forces to point file to an external url
-        item['metadata']['external_url'] = has_external_urls_for_files(
-            item['metadata'])
-
-        # Add restriction, link and thumbnail to files
-        populate_files_properties(item['metadata'])
-
-        return item
-
     def get_contributors(self, obj):
         """Get contributors."""
         items = []
@@ -307,16 +279,3 @@ def format_contributor(self, contributor):
             data += ' ({info})'.format(info=' : '.join(info))
 
         return data
-
-    def get_main_file(self, obj):
-        """Return the main file.
-
-        :param obj: Record dict.
-        :returns: Main file or None.
-        """
-        files = [
-            file for file in obj['metadata'].get('_files', [])
-            if file.get('type') == 'file'
-        ]
-        files = sorted(files, key=lambda file: file.get('order', 100))
-        return files[0] if files else None