From e870ec408df805f4c95b224fe9ea829449ff3fe5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Se=CC=81bastien=20De=CC=81le=CC=80ze?=
 <sebastien.deleze@rero.ch>
Date: Mon, 1 Feb 2021 13:23:52 +0100
Subject: [PATCH] documents: add metadata in detail view.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Adds JSON-LD data for schema.org.
* Adds citation metadata for Google Scholar.
* Adds title and meta description.
* Creates a base serializer schema and moves some functions into it.
* Creates a serializer for schema.org metadata.
* Creates a serializer for Google scholar metadata.
* Closes #98.
* Closes #99.

Co-Authored-by: Sébastien Délèze <sebastien.deleze@rero.ch>
---
 .../modules/documents/serializers/__init__.py |  12 +
 .../documents/serializers/google_scholar.py   |  53 ++
 .../documents/serializers/schemaorg.py        |  35 ++
 .../documents/serializers/schemas/__init__.py |  18 +
 .../serializers/schemas/base_schema.py        | 157 ++++++
 .../documents/serializers/schemas/dc.py       |  49 +-
 .../serializers/schemas/google_scholar.py     |  95 ++++
 .../serializers/schemas/schemaorg.py          | 175 +++++++
 .../documents/templates/documents/record.html |  16 +-
 sonar/modules/documents/views.py              |  18 +-
 .../serializers/test_google_scholar_schema.py | 405 +++++++++++++++
 .../serializers/test_schemaorg_schema.py      | 463 ++++++++++++++++++
 12 files changed, 1449 insertions(+), 47 deletions(-)
 create mode 100644 sonar/modules/documents/serializers/google_scholar.py
 create mode 100644 sonar/modules/documents/serializers/schemaorg.py
 create mode 100644 sonar/modules/documents/serializers/schemas/__init__.py
 create mode 100644 sonar/modules/documents/serializers/schemas/base_schema.py
 create mode 100644 sonar/modules/documents/serializers/schemas/google_scholar.py
 create mode 100644 sonar/modules/documents/serializers/schemas/schemaorg.py
 create mode 100644 tests/unit/documents/serializers/test_google_scholar_schema.py
 create mode 100644 tests/unit/documents/serializers/test_schemaorg_schema.py

diff --git a/sonar/modules/documents/serializers/__init__.py b/sonar/modules/documents/serializers/__init__.py
index d0e18c0f..295ae8d1 100644
--- a/sonar/modules/documents/serializers/__init__.py
+++ b/sonar/modules/documents/serializers/__init__.py
@@ -26,7 +26,14 @@
     search_responsify
 
 from sonar.modules.documents.serializers.dc import SonarDublinCoreSerializer
+from sonar.modules.documents.serializers.google_scholar import \
+    SonarGoogleScholarSerializer
+from sonar.modules.documents.serializers.schemaorg import \
+    SonarSchemaOrgSerializer
 from sonar.modules.documents.serializers.schemas.dc import DublinCoreV1
+from sonar.modules.documents.serializers.schemas.google_scholar import \
+    GoogleScholarV1
+from sonar.modules.documents.serializers.schemas.schemaorg import SchemaOrgV1
 from sonar.modules.organisations.api import OrganisationRecord
 from sonar.modules.serializers import JSONSerializer as _JSONSerializer
 from sonar.modules.users.api import current_user_record
@@ -76,6 +83,11 @@ def post_process_serialize_search(self, results, pid_fetcher):
 json_v1 = JSONSerializer(DocumentSchemaV1)
 #: Dublin Core serializer
 dc_v1 = SonarDublinCoreSerializer(DublinCoreV1, replace_refs=True)
+#: schema.org serializer
+schemaorg_v1 = SonarSchemaOrgSerializer(SchemaOrgV1, replace_refs=True)
+#: google scholar serializer
+google_scholar_v1 = SonarGoogleScholarSerializer(GoogleScholarV1,
+                                                 replace_refs=True)
 
 # Records-REST serializers
 # ========================
diff --git a/sonar/modules/documents/serializers/google_scholar.py b/sonar/modules/documents/serializers/google_scholar.py
new file mode 100644
index 00000000..567928e0
--- /dev/null
+++ b/sonar/modules/documents/serializers/google_scholar.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Google scholar serializer."""
+
+from invenio_records_rest.serializers.base import PreprocessorMixin, \
+    SerializerMixinInterface
+from invenio_records_rest.serializers.marshmallow import MarshmallowMixin
+
+
+class SonarGoogleScholarSerializer(SerializerMixinInterface, MarshmallowMixin,
+                                   PreprocessorMixin):
+    """Google scholar serializer."""
+
+    def dump(self, obj, context=None):
+        """Serialize object with schema.
+
+        Mandatory to override this method, as invenio-records-rest does not
+        use the right way to dump objects (compatible with marshmallow 3.9).
+        """
+        return self.schema_class(context=context).dump(obj)
+
+    def transform_record(self, pid, record, links_factory=None, **kwargs):
+        """Transform record in metas for Google scholar."""
+        data = super(SonarGoogleScholarSerializer,
+                     self).transform_record(pid, record, links_factory,
+                                            **kwargs)
+
+        metas = []
+        meta_template = '<meta name="citation_{key}" content="{value}">'
+        for key, value in data.items():
+            if isinstance(value, list):
+                for listValue in value:
+                    metas.append(meta_template.format(key=key,
+                                                      value=listValue))
+            else:
+                metas.append(meta_template.format(key=key, value=value))
+
+        return '\n'.join(metas)
diff --git a/sonar/modules/documents/serializers/schemaorg.py b/sonar/modules/documents/serializers/schemaorg.py
new file mode 100644
index 00000000..9e30a6ba
--- /dev/null
+++ b/sonar/modules/documents/serializers/schemaorg.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""schema.org serializer."""
+
+from invenio_records_rest.serializers.base import PreprocessorMixin, \
+    SerializerMixinInterface
+from invenio_records_rest.serializers.marshmallow import MarshmallowMixin
+
+
+class SonarSchemaOrgSerializer(SerializerMixinInterface, MarshmallowMixin,
+                               PreprocessorMixin):
+    """Marshmallow based schema.org serializer for records."""
+
+    def dump(self, obj, context=None):
+        """Serialize object with schema.
+
+        Mandatory to override this method, as invenio-records-rest does not
+        use the right way to dump objects (compatible with marshmallow 3.9).
+        """
+        return self.schema_class(context=context).dump(obj)
diff --git a/sonar/modules/documents/serializers/schemas/__init__.py b/sonar/modules/documents/serializers/schemas/__init__.py
new file mode 100644
index 00000000..0ec9e641
--- /dev/null
+++ b/sonar/modules/documents/serializers/schemas/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Schemas for serialization."""
diff --git a/sonar/modules/documents/serializers/schemas/base_schema.py b/sonar/modules/documents/serializers/schemas/base_schema.py
new file mode 100644
index 00000000..6a18bed2
--- /dev/null
+++ b/sonar/modules/documents/serializers/schemas/base_schema.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Base schema for marshmallow serialization."""
+
+import re
+
+from flask import request
+from marshmallow import Schema, pre_dump
+
+from sonar.modules.documents.api import DocumentRecord
+from sonar.modules.documents.utils import has_external_urls_for_files, \
+    populate_files_properties
+
+
+class BaseSchema(Schema):
+    """Base schema for marshmallow serialization."""
+
+    @pre_dump
+    def pre_dump(self, item, **kwargs):
+        """Do some transformations in record before dumping it.
+
+        - Store the main file to use it in methods.
+        - Check if files must point to an external URL.
+        - Populate restrictions, thumbnail and URL in files.
+
+        :param item: Item object to process
+        :returns: Modified item
+        """
+        if not item['metadata'].get('_files'):
+            return item
+
+        # Store the main file
+        main_file = self.get_main_file(item)
+        if main_file:
+            item['metadata']['mainFile'] = main_file
+
+        # Check if organisation record forces to point file to an external url
+        item['metadata']['external_url'] = has_external_urls_for_files(
+            item['metadata'])
+
+        # Add restriction, link and thumbnail to files
+        populate_files_properties(item['metadata'])
+
+        return item
+
+    def get_main_file(self, obj):
+        """Return the main file.
+
+        :param obj: Record dict.
+        :returns: Main file or None.
+        """
+        files = [
+            file for file in obj['metadata'].get('_files', [])
+            if file.get('type') == 'file'
+        ]
+        files = sorted(files, key=lambda file: file.get('order', 100))
+        return files[0] if files else None
+
+    def get_id(self, obj):
+        """Get id."""
+        return DocumentRecord.get_permanent_link(request.host_url,
+                                                 obj['metadata']['pid'])
+
+    def get_title(self, obj):
+        """Get title."""
+        for title in obj['metadata'].get('title', []):
+            return title['mainTitle'][0]['value']
+
+        return None
+
+    def get_start_date(self, obj):
+        """Get start date."""
+        for provision_activity in obj['metadata'].get('provisionActivity', []):
+            if provision_activity[
+                    'type'] == 'bf:Publication' and provision_activity.get(
+                        'startDate'):
+                return provision_activity['startDate']
+
+        return None
+
+    def get_keywords(self, obj):
+        """Get keywords."""
+        items = []
+
+        for subjects in obj['metadata'].get('subjects', []):
+            items = items + subjects['label']['value']
+
+        return items
+
+    def get_url(self, obj):
+        """Get url."""
+        if obj['metadata'].get('mainFile', {}).get('links'):
+            if obj['metadata']['mainFile']['links'].get('download'):
+                return '{host}{image}'.format(
+                    host=request.host_url.rstrip('/'),
+                    image=obj['metadata']['mainFile']['links']['download'])
+
+            if obj['metadata']['mainFile']['links'].get('external'):
+                return obj['metadata']['mainFile']['links']['external']
+
+        return None
+
+    def get_pages(self, obj):
+        """Get pages.
+
+        :param obj: Record dict.
+        :returns: Pages stored in partOf
+        """
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('numberingPages'):
+                return part_of['numberingPages']
+
+        return None
+
+    def get_first_page(self, obj):
+        """Get the first page.
+
+        :param obj: Record dict.
+        :returns: The first page.
+        """
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('numberingPages'):
+                matches = re.match(r'^([0-9]+)', part_of['numberingPages'])
+
+                return matches.group(1) if matches else None
+
+        return None
+
+    def get_last_page(self, obj):
+        """Get the last page.
+
+        :param obj: Record dict.
+        :returns: The last page.
+        """
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('numberingPages'):
+                matches = re.match(r'^[0-9]+\-([0-9]+)',
+                                   part_of['numberingPages'])
+
+                return matches.group(1) if matches else None
+
+        return None
diff --git a/sonar/modules/documents/serializers/schemas/dc.py b/sonar/modules/documents/serializers/schemas/dc.py
index 23ae7aff..7c8ca178 100644
--- a/sonar/modules/documents/serializers/schemas/dc.py
+++ b/sonar/modules/documents/serializers/schemas/dc.py
@@ -20,15 +20,15 @@
 import re
 
 from flask import request
-from marshmallow import Schema, fields, pre_dump
+from marshmallow import fields
 
 from sonar.modules.documents.api import DocumentRecord
-from sonar.modules.documents.utils import has_external_urls_for_files, \
-    populate_files_properties
 from sonar.modules.documents.views import part_of_format
 
+from .base_schema import BaseSchema
 
-class DublinCoreV1(Schema):
+
+class DublinCoreV1(BaseSchema):
     """Schema for records v1 in JSON."""
 
     contributors = fields.Method('get_contributors')
@@ -46,34 +46,6 @@ class DublinCoreV1(Schema):
     titles = fields.Method('get_titles')
     types = fields.Method('get_types')
 
-    @pre_dump
-    def pre_dump(self, item, **kwargs):
-        """Do some transformations in record before dumping it.
-
-        - Store the main file to use it in methods.
-        - Check if files must point to an external URL.
-        - Populate restrictions, thumbnail and URL in files.
-
-        :param item: Item object to process
-        :returns: Modified item
-        """
-        if not item['metadata'].get('_files'):
-            return item
-
-        # Store the main file
-        main_file = self.get_main_file(item)
-        if main_file:
-            item['metadata']['mainFile'] = main_file
-
-        # Check if organisation record forces to point file to an external url
-        item['metadata']['external_url'] = has_external_urls_for_files(
-            item['metadata'])
-
-        # Add restriction, link and thumbnail to files
-        populate_files_properties(item['metadata'])
-
-        return item
-
     def get_contributors(self, obj):
         """Get contributors."""
         items = []
@@ -307,16 +279,3 @@ def format_contributor(self, contributor):
             data += ' ({info})'.format(info=' : '.join(info))
 
         return data
-
-    def get_main_file(self, obj):
-        """Return the main file.
-
-        :param obj: Record dict.
-        :returns: Main file or None.
-        """
-        files = [
-            file for file in obj['metadata'].get('_files', [])
-            if file.get('type') == 'file'
-        ]
-        files = sorted(files, key=lambda file: file.get('order', 100))
-        return files[0] if files else None
diff --git a/sonar/modules/documents/serializers/schemas/google_scholar.py b/sonar/modules/documents/serializers/schemas/google_scholar.py
new file mode 100644
index 00000000..63a88865
--- /dev/null
+++ b/sonar/modules/documents/serializers/schemas/google_scholar.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Google scholar marshmallow schema."""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from marshmallow import fields, post_dump
+
+from sonar.modules.documents.views import get_language_from_bibliographic_code
+
+from .base_schema import BaseSchema
+
+
+class GoogleScholarV1(BaseSchema):
+    """Marshmallow schema for Google scholar."""
+
+    title = fields.Method('get_title')
+    language = fields.Method('get_language')
+    publication_date = fields.Method('get_start_date')
+    keywords = fields.Method('get_keywords')
+    pdf_url = fields.Method('get_url')
+    online_date = fields.Method('get_start_date')
+    author = fields.Method('get_author')
+    doi = fields.Method('get_doi')
+    abstract_html_url = fields.Method('get_id')
+    pages = fields.Method('get_pages')
+    firstpage = fields.Method('get_first_page')
+    lastpage = fields.Method('get_last_page')
+    volume = fields.Method('get_volume')
+    journal_title = fields.Method('get_host_document_title')
+
+    def get_language(self, obj):
+        """Get language."""
+        for language in obj['metadata'].get('language', []):
+            return get_language_from_bibliographic_code(language['value'])
+
+        return None
+
+    def get_keywords(self, obj):
+        """Get keywords."""
+        return ' ; '.join(super(GoogleScholarV1, self).get_keywords(obj))
+
+    def get_author(self, obj):
+        """Get authors."""
+        items = []
+        for contributor in obj['metadata'].get('contribution', []):
+            if contributor['role'][0] == 'cre' and contributor['agent'].get(
+                    'preferred_name'):
+                items.append(contributor['agent']['preferred_name'])
+
+        return items
+
+    def get_doi(self, obj):
+        """Get DOI."""
+        for identifier in obj['metadata'].get('identifiedBy', []):
+            if identifier['type'] == 'bf:Doi':
+                return identifier['value']
+
+        return None
+
+    def get_volume(self, obj):
+        """Get volume."""
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('numberingVolume'):
+                return part_of['numberingVolume']
+
+        return None
+
+    def get_host_document_title(self, obj):
+        """Get volume."""
+        for part_of in obj['metadata'].get('partOf', []):
+            if part_of.get('document', {}).get('title'):
+                return part_of['document']['title']
+
+        return None
+
+    @post_dump
+    def remove_empty_values(self, data, **kwargs):
+        """Remove empty values before dumping data."""
+        return {key: value for key, value in data.items() if value}
diff --git a/sonar/modules/documents/serializers/schemas/schemaorg.py b/sonar/modules/documents/serializers/schemas/schemaorg.py
new file mode 100644
index 00000000..c167ad60
--- /dev/null
+++ b/sonar/modules/documents/serializers/schemas/schemaorg.py
@@ -0,0 +1,175 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""schema.org marshmallow schema."""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from flask import request
+from marshmallow import fields, post_dump
+
+from .base_schema import BaseSchema
+
+TYPE_MAPPING = {
+    'coar:c_2f33': 'Book',
+    'coar:c_3248': 'Chapter',
+    'coar:c_c94f': 'CreativeWork',
+    'coar:c_5794': 'ScholarlyArticle',
+    'coar:c_18cp': 'ScholarlyArticle',
+    'coar:c_6670': 'Poster',
+    'coar:c_18co': 'Poster',
+    'coar:c_f744': 'Book',
+    'coar:c_ddb1': 'Dataset',
+    'coar:c_3e5a': 'Article',
+    'coar:c_beb9': 'ScholarlyArticle',
+    'coar:c_6501': 'ScholarlyArticle',
+    'coar:c_998f': 'NewsArticle',
+    'coar:c_dcae04bc': 'ScholarlyArticle',
+    'coar:c_8544': 'Course',
+    'non_textual_object': 'MediaObject',
+    'coar:c_8a7e': 'VideoObject',
+    'coar:c_ecc8': 'ImageObject',
+    'coar:c_12cc': 'Map',
+    'coar:c_18cc': 'AudioObject',
+    'coar:c_18cw': 'MusicComposition',
+    'coar:c_5ce6': 'SoftwareApplication',
+    'coar:c_15cd': 'CreativeWork',
+    'coar:c_2659': 'Periodical',
+    'coar:c_0640': 'Periodical',
+    'coar:c_2cd9': 'Periodical',
+    'coar:c_2fe3': 'Newspaper',
+    'coar:c_816b': 'ScholarlyArticle',
+    'coar:c_93fc': 'Report',
+    'coar:c_18ww': 'Report',
+    'coar:c_18wz': 'Report',
+    'coar:c_18wq': 'Report',
+    'coar:c_186u': 'Report',
+    'coar:c_18op': 'Report',
+    'coar:c_ba1f': 'Report',
+    'coar:c_18hj': 'Report',
+    'coar:c_18ws': 'Report',
+    'coar:c_18gh': 'Report',
+    'coar:c_46ec': 'Thesis',
+    'coar:c_7a1f': 'Thesis',
+    'coar:c_db06': 'Thesis',
+    'coar:c_bdcc': 'Thesis',
+    'habilitation_thesis': 'Thesis',
+    'advanced_studies_thesis': 'Thesis',
+    'other': 'Thesis',
+    'coar:c_8042': 'CreativeWork',
+    'coar:c_1843': 'CreativeWork'
+}
+
+
+class SchemaOrgV1(BaseSchema):
+    """Marshmallow schema for schema.org/ScholarlyArticle."""
+
+    type_ = fields.Method('get_type', data_key='@type')
+    context_ = fields.Constant('http://schema.org', data_key='@context')
+    id_ = fields.Method('get_id', data_key='@id')
+    name = fields.Method('get_title')
+    abstract = fields.Method('get_abstract')
+    description = fields.Method('get_abstract')
+    inLanguage = fields.Method('get_in_language')
+    creator = fields.Method('get_creator')
+    headline = fields.Method('get_title')
+    datePublished = fields.Method('get_start_date')
+    url = fields.Method('get_file_urls')
+    keywords = fields.Method('get_keywords')
+    identifier = fields.Method('get_id')
+    license = fields.Method('get_license')
+    image = fields.Method('get_image')
+    pagination = fields.Method('get_pages')
+    pageStart = fields.Method('get_first_page')
+    pageEnd = fields.Method('get_last_page')
+
+    def get_type(self, obj):
+        """Get type."""
+        if obj['metadata'].get('documentType') and TYPE_MAPPING.get(
+                obj['metadata']['documentType']):
+            return TYPE_MAPPING[obj['metadata']['documentType']]
+
+        return 'CreativeWork'
+
+    def get_abstract(self, obj):
+        """Get abstract."""
+        for abstract in obj['metadata'].get('abstracts', []):
+            return abstract['value']
+
+        return None
+
+    def get_in_language(self, obj):
+        """Get inLanguage."""
+        for language in obj['metadata'].get('language', []):
+            return language['value']
+
+        return None
+
+    def get_creator(self, obj):
+        """Get authors."""
+        items = []
+        for contributor in obj['metadata'].get('contribution', []):
+            if contributor['role'][0] == 'cre' and contributor['agent'].get(
+                    'preferred_name'):
+                items.append({
+                    '@type': 'Person',
+                    'name': contributor['agent']['preferred_name']
+                })
+
+        return items
+
+    def get_license(self, obj):
+        """Get license."""
+        if obj['metadata'].get('usageAndAccessPolicy'):
+            result = [obj['metadata']['usageAndAccessPolicy']['license']]
+
+            if obj['metadata']['usageAndAccessPolicy'].get('label'):
+                result.append(obj['metadata']['usageAndAccessPolicy']['label'])
+
+            return ', '.join(result)
+
+        return None
+
+    def get_image(self, obj):
+        """Get image."""
+        if obj['metadata'].get('mainFile', {}).get('thumbnail'):
+            return '{host}{image}'.format(
+                host=request.host_url.rstrip('/'),
+                image=obj['metadata']['mainFile']['thumbnail'])
+
+        return None
+
+    def get_file_urls(self, obj):
+        """Get file URLs."""
+        files = []
+
+        for file in obj['metadata'].get('_files', []):
+            if file.get('type') == 'file' and file.get('links'):
+                if file['links'].get('download'):
+                    files.append('{host}{image}'.format(
+                        host=request.host_url.rstrip('/'),
+                        image=file['links']['download']))
+
+                if file['links'].get('external'):
+                    files.append(file['links']['external'])
+
+        return files
+
+    @post_dump
+    def remove_empty_values(self, data, **kwargs):
+        """Remove empty values before dumping data."""
+        return {key: value for key, value in data.items() if value}
diff --git a/sonar/modules/documents/templates/documents/record.html b/sonar/modules/documents/templates/documents/record.html
index dc9a3125..2ec85221 100644
--- a/sonar/modules/documents/templates/documents/record.html
+++ b/sonar/modules/documents/templates/documents/record.html
@@ -19,8 +19,22 @@
 
 {% from 'sonar/macros/macro.html' import thumbnail %}
 
-{%- block body %}
 {% set title = record.title[0] | title_format(current_i18n.language) %}
+{% set description = record.abstracts[0].value if record.abstracts else None %}
+
+{% block head %}
+{{ super() }}
+<!-- Schema.org in JSON_LD -->
+<script type="application/ld+json">
+  {{ schema_org_data | safe }}
+</script>
+
+<!-- Google Scholar -->
+{{ google_scholar_data | safe }}
+{% endblock head %}
+
+{%- block body %}
+
 {% set files = record.get_files_list() %}
 
 <section class="mt-3">
diff --git a/sonar/modules/documents/views.py b/sonar/modules/documents/views.py
index cac46b65..6549e6d9 100644
--- a/sonar/modules/documents/views.py
+++ b/sonar/modules/documents/views.py
@@ -19,6 +19,8 @@
 
 from __future__ import absolute_import, print_function
 
+import json
+
 from flask import Blueprint, abort, current_app, g, render_template, request
 from flask_babelex import gettext as _
 from invenio_i18n.ext import current_i18n
@@ -96,12 +98,26 @@ def detail(pid_value, view='global'):
 
         populate_files_properties(record)
 
+    # Import is here to avoid a circular reference error.
+    from sonar.modules.documents.serializers import google_scholar_v1, \
+        schemaorg_v1
+
+    # Get schema org data
+    schema_org_data = json.dumps(
+        schemaorg_v1.transform_record(record['pid'], record))
+
+    # Get scholar data
+    google_scholar_data = google_scholar_v1.transform_record(
+        record['pid'], record)
+
     # Resolve $ref properties
     record = record.replace_refs()
 
     return render_template('documents/record.html',
                            pid=pid_value,
-                           record=record)
+                           record=record,
+                           schema_org_data=schema_org_data,
+                           google_scholar_data=google_scholar_data)
 
 
 @blueprint.route('/projects/<pid_value>')
diff --git a/tests/unit/documents/serializers/test_google_scholar_schema.py b/tests/unit/documents/serializers/test_google_scholar_schema.py
new file mode 100644
index 00000000..601299f1
--- /dev/null
+++ b/tests/unit/documents/serializers/test_google_scholar_schema.py
@@ -0,0 +1,405 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Test Google Scholar marshmallow schema."""
+
+from io import BytesIO
+
+import pytest
+
+from sonar.modules.documents.api import DocumentRecord
+from sonar.modules.documents.serializers import google_scholar_v1
+
+
+@pytest.fixture()
+def minimal_document(db, bucket_location, organisation):
+    record = DocumentRecord.create(
+        {
+            'pid':
+            '1000',
+            'title': [{
+                'type':
+                'bf:Title',
+                'mainTitle': [{
+                    'language': 'eng',
+                    'value': 'Title of the document'
+                }]
+            }],
+            'organisation': [{
+                '$ref': 'https://sonar.ch/api/organisations/org'
+            }]
+        },
+        dbcommit=True,
+        with_bucket=True)
+    record.commit()
+    db.session.commit()
+    return record
+
+
+@pytest.fixture()
+def contributors():
+    return [{
+        'agent': {
+            'preferred_name': 'Creator 1'
+        },
+        'role': ['cre'],
+    }, {
+        'agent': {
+            'preferred_name': 'Creator 2',
+            'number': '123',
+            'date': '2019',
+            'place': 'Martigny'
+        },
+        'role': ['cre'],
+    }, {
+        'agent': {
+            'preferred_name': 'Contributor 1'
+        },
+        'role': ['ctb'],
+    }, {
+        'agent': {
+            'preferred_name': 'Contributor 2',
+            'number': '999',
+            'date': '2010',
+            'place': 'Sion'
+        },
+        'role': ['ctb'],
+    }, {
+        'agent': {
+            'preferred_name': 'Degree supervisor'
+        },
+        'role': ['dgs'],
+    }, {
+        'agent': {
+            'preferred_name': 'Printer'
+        },
+        'role': ['prt'],
+    }, {
+        'agent': {
+            'preferred_name': 'Editor'
+        },
+        'role': ['edt'],
+    }]
+
+
+def test_title(minimal_document):
+    """Test name."""
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_title" content="Title of the document">' in \
+        result
+
+    # No title
+    minimal_document.pop('title', None)
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_title' not in result
+
+
+def test_language(minimal_document):
+    """Test inLanguage serialization."""
+    # No language
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_language' not in result
+
+    # Take the first language
+    minimal_document['language'] = [{'value': 'eng'}, {'value': 'fre'}]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_language" content="en">' in result
+
+
+def test_publication_date(minimal_document):
+    """Test publication date."""
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_publication_date' not in result
+
+    minimal_document.update({
+        'provisionActivity': [{
+            'type': 'bf:Agent',
+            'startDate': '2019'
+        }, {
+            'type': 'bf:Publication',
+        }, {
+            'type': 'bf:Publication',
+            'startDate': '2019'
+        }, {
+            'type': 'bf:Publication',
+            'startDate': '2020-01-01'
+        }]
+    })
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_publication_date" content="2019">' in result
+
+
+def test_keywords(minimal_document):
+    """Test subjects serialization."""
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_keywords' not in result
+
+    minimal_document['subjects'] = [{
+        'label': {
+            'language': 'eng',
+            'value': ['Subject 1', 'Subject 2']
+        }
+    }, {
+        'label': {
+            'language': 'fre',
+            'value': ['Sujet 1', 'Sujet 2']
+        }
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_keywords" content="Subject 1 ; Subject 2 ; '\
+        'Sujet 1 ; Sujet 2">' in result
+
+
+def test_pdf_url(minimal_document):
+    """Test PDF URL serialization."""
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_pdf_url' not in result
+
+    minimal_document.files['test.pdf'] = BytesIO(b'File content')
+    minimal_document.files['test.pdf']['type'] = 'file'
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_pdf_url" content="http://localhost/documents'\
+        '/1000/files/test.pdf">' in result
+
+    minimal_document.files['test.pdf']['force_external_url'] = True
+    minimal_document.files['test.pdf'][
+        'external_url'] = 'https://some.domain/file.pdf'
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_pdf_url" content="https://some.domain/' \
+        'file.pdf">' in result
+
+
+def test_authors(minimal_document, contributors):
+    """Test authors serialization."""
+    minimal_document.update({'contribution': contributors})
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+
+    for author in ['Creator 1', 'Creator 2']:
+        assert '<meta name="citation_author" content="{author}">'.format(
+            author=author) in result
+
+
+def test_doi(minimal_document):
+    """Test DOI serialization."""
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_doi' not in result
+
+    minimal_document['identifiedBy'] = [{'type': 'bf:Doi', 'value': '111111'}]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_doi" content="111111">' in result
+
+
+def test_abstract_html_url(minimal_document):
+    """Test HTML URL serialization."""
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_abstract_html_url" content="http://' \
+        'localhost/global/documents/1000">' in result
+
+
+def test_pages(app, minimal_document):
+    """Test pages."""
+    # No part of
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_pages' not in result
+
+    # No pages
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_pages' not in result
+
+    # OK
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123-125'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_pages" content="123-125">' in result
+
+
+def test_first_page(app, minimal_document):
+    """Test first page."""
+    # No partOf
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_firstpage' not in result
+
+    # No pages
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_firstpage' not in result
+
+    # Only one page
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_firstpage" content="123">' in result
+
+    # Set of pages
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123-130'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_firstpage" content="123">' in result
+
+    # Exotic formatting
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123, 134-135'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_firstpage" content="123">' in result
+
+    # Page start not found
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': 'pages'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_firstpage' not in result
+
+
+def test_last_page(app, minimal_document):
+    """Test last page."""
+    # No partOf
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_lastpage' not in result
+
+    # No pages
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_lastpage' not in result
+
+    # Only one page
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_lastpage' not in result
+
+    # Set of pages
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123-130'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_lastpage" content="130">' in result
+
+    # Exotic formatting
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123, 134-135'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_lastpage' not in result
+
+    # Page end not found
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': 'pages'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_lastpage' not in result
+
+
+def test_volume(app, minimal_document):
+    """Test volume."""
+    # No partOf
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_volume' not in result
+
+    # No volume
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_volume' not in result
+
+    # Only one page
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingVolume': '1'
+    }]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_volume" content="1">' in result
+
+
+def test_journal_title(app, minimal_document):
+    """Test journal title."""
+    # No partOf
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert 'citation_journal_title' not in result
+
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = google_scholar_v1.transform_record(minimal_document['pid'],
+                                                minimal_document)
+    assert '<meta name="citation_journal_title" content="Host document">' in \
+        result
diff --git a/tests/unit/documents/serializers/test_schemaorg_schema.py b/tests/unit/documents/serializers/test_schemaorg_schema.py
new file mode 100644
index 00000000..b642dcdb
--- /dev/null
+++ b/tests/unit/documents/serializers/test_schemaorg_schema.py
@@ -0,0 +1,463 @@
+# -*- coding: utf-8 -*-
+#
+# Swiss Open Access Repository
+# Copyright (C) 2019 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+"""Test schema.org marshmallow schema."""
+
+from io import BytesIO
+
+import pytest
+
+from sonar.modules.documents.api import DocumentRecord
+from sonar.modules.documents.serializers import schemaorg_v1
+
+
+@pytest.fixture()
+def minimal_document(db, bucket_location, organisation):
+    record = DocumentRecord.create(
+        {
+            'pid':
+            '1000',
+            'title': [{
+                'type':
+                'bf:Title',
+                'mainTitle': [{
+                    'language': 'eng',
+                    'value': 'Title of the document'
+                }]
+            }],
+            'organisation': [{
+                '$ref': 'https://sonar.ch/api/organisations/org'
+            }]
+        },
+        dbcommit=True,
+        with_bucket=True)
+    record.commit()
+    db.session.commit()
+    return record
+
+
+@pytest.fixture()
+def contributors():
+    return [{
+        'agent': {
+            'preferred_name': 'Creator 1'
+        },
+        'role': ['cre'],
+    }, {
+        'agent': {
+            'preferred_name': 'Creator 2',
+            'number': '123',
+            'date': '2019',
+            'place': 'Martigny'
+        },
+        'role': ['cre'],
+    }, {
+        'agent': {
+            'preferred_name': 'Contributor 1'
+        },
+        'role': ['ctb'],
+    }, {
+        'agent': {
+            'preferred_name': 'Contributor 2',
+            'number': '999',
+            'date': '2010',
+            'place': 'Sion'
+        },
+        'role': ['ctb'],
+    }, {
+        'agent': {
+            'preferred_name': 'Degree supervisor'
+        },
+        'role': ['dgs'],
+    }, {
+        'agent': {
+            'preferred_name': 'Printer'
+        },
+        'role': ['prt'],
+    }, {
+        'agent': {
+            'preferred_name': 'Editor'
+        },
+        'role': ['edt'],
+    }]
+
+
+def test_type(minimal_document):
+    """Test @type serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['@type'] == 'CreativeWork'
+
+    minimal_document['documentType'] = 'coar:c_2f33'
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['@type'] == 'Book'
+
+
+def test_context(minimal_document):
+    """Test @context serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['@context'] == 'http://schema.org'
+
+
+def test_abstract(minimal_document):
+    """Test abstract serialization."""
+    # No abstract
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'abstract' not in result
+
+    # Take the first
+    minimal_document['abstracts'] = [{
+        'value': 'Description 1'
+    }, {
+        'value': 'Description 2'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['abstract'] == 'Description 1'
+
+
+def test_description(minimal_document):
+    """Test description serialization."""
+    # No abstract
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'description' not in result
+
+    # Take the first
+    minimal_document['abstracts'] = [{
+        'value': 'Description 1'
+    }, {
+        'value': 'Description 2'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['description'] == 'Description 1'
+
+
+def test_in_language(minimal_document):
+    """Test inLanguage serialization."""
+    # No language
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'inLanguage' not in result
+
+    # Take the first language
+    minimal_document['language'] = [{'value': 'eng'}, {'value': 'fre'}]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['inLanguage'] == 'eng'
+
+
+def test_name(minimal_document):
+    """Test name."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['name'] == 'Title of the document'
+
+    # No title
+    minimal_document.pop('title', None)
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'name' not in result
+
+
+def test_headline(minimal_document):
+    """Test headline."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['headline'] == 'Title of the document'
+
+
+def test_creator(minimal_document, contributors):
+    """Test creator serialization."""
+    minimal_document.update({'contribution': contributors})
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['creator'] == [{
+        '@type': 'Person',
+        'name': 'Creator 1'
+    }, {
+        '@type': 'Person',
+        'name': 'Creator 2'
+    }]
+
+
+def test_date_published(minimal_document):
+    """Test date published serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'datePublished' not in result
+
+    minimal_document.update({
+        'provisionActivity': [{
+            'type': 'bf:Agent',
+            'startDate': '2019'
+        }, {
+            'type': 'bf:Publication',
+        }, {
+            'type': 'bf:Publication',
+            'startDate': '2019'
+        }, {
+            'type': 'bf:Publication',
+            'startDate': '2020-01-01'
+        }]
+    })
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['datePublished'] == '2019'
+
+
+def test_url(minimal_document):
+    """Test URL serialization."""
+    minimal_document.files['test.pdf'] = BytesIO(b'File content')
+    minimal_document.files['test.pdf']['type'] = 'file'
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['url'] == ['http://localhost/documents/1000/files/test.pdf']
+
+    # External file
+    minimal_document.files['test.pdf']['force_external_url'] = True
+    minimal_document.files['test.pdf'][
+        'external_url'] = 'https://some.domain/file.pdf'
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['url'] == ['https://some.domain/file.pdf']
+
+    # Multiple files
+    minimal_document.files['test2.pdf'] = BytesIO(b'File content')
+    minimal_document.files['test2.pdf']['type'] = 'file'
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['url'] == [
+        'https://some.domain/file.pdf',
+        'http://localhost/documents/1000/files/test2.pdf'
+    ]
+
+
+def test_identifier(minimal_document):
+    """Test identifier serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['identifier'] == 'http://localhost/global/documents/1000'
+
+
+def test_id(minimal_document):
+    """Test @id serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['@id'] == 'http://localhost/global/documents/1000'
+
+
+def test_keywords(minimal_document):
+    """Test subjects serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'keywords' not in result
+
+    minimal_document['subjects'] = [{
+        'label': {
+            'language': 'eng',
+            'value': ['Subject 1', 'Subject 2']
+        }
+    }, {
+        'label': {
+            'language': 'fre',
+            'value': ['Sujet 1', 'Sujet 2']
+        }
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['keywords'] == [
+        'Subject 1', 'Subject 2', 'Sujet 1', 'Sujet 2'
+    ]
+
+
+def test_license(app, minimal_document):
+    """Test license serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'license' not in result
+
+    minimal_document['usageAndAccessPolicy'] = {'license': 'CC BY-NC-SA'}
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['license'] == 'CC BY-NC-SA'
+
+    minimal_document['usageAndAccessPolicy'] = {
+        'license': 'Other OA / license undefined',
+        'label': 'Custom license'
+    }
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['license'] == 'Other OA / license undefined, Custom license'
+
+
+def test_image(app, minimal_document):
+    """Test image serialization."""
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'image' not in result
+
+    minimal_document.files['test.pdf'] = BytesIO(b'File content')
+    minimal_document.files['test.pdf']['type'] = 'file'
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['image'] == 'http://localhoststatic/images/no-image.png'
+
+
+def test_pagination(app, minimal_document):
+    """Test pagination."""
+    # No part of
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pagination' not in result
+
+    # No pages
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pagination' not in result
+
+    # OK
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123-125'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['pagination'] == '123-125'
+
+
+def test_page_start(app, minimal_document):
+    """Test page start."""
+    # No partOf
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageStart' not in result
+
+    # No pages
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageStart' not in result
+
+    # Only one page
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['pageStart'] == '123'
+
+    # Set of pages
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123-130'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['pageStart'] == '123'
+
+    # Exotic formatting
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123, 134-135'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['pageStart'] == '123'
+
+    # Page start not found
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': 'pages'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageStart' not in result
+
+
+def test_page_end(app, minimal_document):
+    """Test page end."""
+    # No partOf
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageEnd' not in result
+
+    # No pages
+    minimal_document['partOf'] = [{'document': {'title': 'Host document'}}]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageEnd' not in result
+
+    # Only one page
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageEnd' not in result
+
+    # Set of pages
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123-130'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert result['pageEnd'] == '130'
+
+    # Exotic formatting
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': '123, 134-135'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageEnd' not in result
+
+    # Page end not found
+    minimal_document['partOf'] = [{
+        'document': {
+            'title': 'Host document'
+        },
+        'numberingPages': 'pages'
+    }]
+    result = schemaorg_v1.transform_record(minimal_document['pid'],
+                                           minimal_document)
+    assert 'pageEnd' not in result