Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

documents: add metadata in detail view #460

Merged
merged 1 commit into from
Feb 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions sonar/modules/documents/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@
search_responsify

from sonar.modules.documents.serializers.dc import SonarDublinCoreSerializer
from sonar.modules.documents.serializers.google_scholar import \
SonarGoogleScholarSerializer
from sonar.modules.documents.serializers.schemaorg import \
SonarSchemaOrgSerializer
from sonar.modules.documents.serializers.schemas.dc import DublinCoreV1
from sonar.modules.documents.serializers.schemas.google_scholar import \
GoogleScholarV1
from sonar.modules.documents.serializers.schemas.schemaorg import SchemaOrgV1
from sonar.modules.organisations.api import OrganisationRecord
from sonar.modules.serializers import JSONSerializer as _JSONSerializer
from sonar.modules.users.api import current_user_record
Expand Down Expand Up @@ -76,6 +83,11 @@ def post_process_serialize_search(self, results, pid_fetcher):
json_v1 = JSONSerializer(DocumentSchemaV1)
#: Dublin Core serializer
dc_v1 = SonarDublinCoreSerializer(DublinCoreV1, replace_refs=True)
#: schema.org serializer
schemaorg_v1 = SonarSchemaOrgSerializer(SchemaOrgV1, replace_refs=True)
#: google scholar serializer
google_scholar_v1 = SonarGoogleScholarSerializer(GoogleScholarV1,
replace_refs=True)

# Records-REST serializers
# ========================
Expand Down
53 changes: 53 additions & 0 deletions sonar/modules/documents/serializers/google_scholar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Google scholar serializer."""

from invenio_records_rest.serializers.base import PreprocessorMixin, \
SerializerMixinInterface
from invenio_records_rest.serializers.marshmallow import MarshmallowMixin


class SonarGoogleScholarSerializer(SerializerMixinInterface, MarshmallowMixin,
PreprocessorMixin):
"""Google scholar serializer."""

def dump(self, obj, context=None):
"""Serialize object with schema.

Mandatory to override this method, as invenio-records-rest does not
use the right way to dump objects (compatible with marshmallow 3.9).
"""
return self.schema_class(context=context).dump(obj)

def transform_record(self, pid, record, links_factory=None, **kwargs):
"""Transform record in metas for Google scholar."""
data = super(SonarGoogleScholarSerializer,
self).transform_record(pid, record, links_factory,
**kwargs)

metas = []
meta_template = '<meta name="citation_{key}" content="{value}">'
for key, value in data.items():
if isinstance(value, list):
for listValue in value:
metas.append(meta_template.format(key=key,
value=listValue))
else:
metas.append(meta_template.format(key=key, value=value))

return '\n'.join(metas)
35 changes: 35 additions & 0 deletions sonar/modules/documents/serializers/schemaorg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""schema.org serializer."""

from invenio_records_rest.serializers.base import PreprocessorMixin, \
SerializerMixinInterface
from invenio_records_rest.serializers.marshmallow import MarshmallowMixin


class SonarSchemaOrgSerializer(SerializerMixinInterface, MarshmallowMixin,
PreprocessorMixin):
"""Marshmallow based schema.org serializer for records."""

def dump(self, obj, context=None):
"""Serialize object with schema.

Mandatory to override this method, as invenio-records-rest does not
use the right way to dump objects (compatible with marshmallow 3.9).
"""
return self.schema_class(context=context).dump(obj)
18 changes: 18 additions & 0 deletions sonar/modules/documents/serializers/schemas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Schemas for serialization."""
157 changes: 157 additions & 0 deletions sonar/modules/documents/serializers/schemas/base_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Base schema for marshmallow serialization."""

import re

from flask import request
from marshmallow import Schema, pre_dump

from sonar.modules.documents.api import DocumentRecord
from sonar.modules.documents.utils import has_external_urls_for_files, \
populate_files_properties


class BaseSchema(Schema):
"""Base schema for marshmallow serialization."""

@pre_dump
def pre_dump(self, item, **kwargs):
"""Do some transformations in record before dumping it.

- Store the main file to use it in methods.
- Check if files must point to an external URL.
- Populate restrictions, thumbnail and URL in files.

:param item: Item object to process
:returns: Modified item
"""
if not item['metadata'].get('_files'):
return item

# Store the main file
main_file = self.get_main_file(item)
if main_file:
item['metadata']['mainFile'] = main_file

# Check if organisation record forces to point file to an external url
item['metadata']['external_url'] = has_external_urls_for_files(
item['metadata'])

# Add restriction, link and thumbnail to files
populate_files_properties(item['metadata'])

return item

def get_main_file(self, obj):
"""Return the main file.

:param obj: Record dict.
:returns: Main file or None.
"""
files = [
file for file in obj['metadata'].get('_files', [])
if file.get('type') == 'file'
]
files = sorted(files, key=lambda file: file.get('order', 100))
return files[0] if files else None

def get_id(self, obj):
"""Get id."""
return DocumentRecord.get_permanent_link(request.host_url,
obj['metadata']['pid'])

def get_title(self, obj):
"""Get title."""
for title in obj['metadata'].get('title', []):
return title['mainTitle'][0]['value']

return None

def get_start_date(self, obj):
"""Get start date."""
for provision_activity in obj['metadata'].get('provisionActivity', []):
if provision_activity[
'type'] == 'bf:Publication' and provision_activity.get(
'startDate'):
return provision_activity['startDate']

return None

def get_keywords(self, obj):
"""Get keywords."""
items = []

for subjects in obj['metadata'].get('subjects', []):
items = items + subjects['label']['value']

return items

def get_url(self, obj):
"""Get url."""
if obj['metadata'].get('mainFile', {}).get('links'):
if obj['metadata']['mainFile']['links'].get('download'):
return '{host}{image}'.format(
host=request.host_url.rstrip('/'),
image=obj['metadata']['mainFile']['links']['download'])

if obj['metadata']['mainFile']['links'].get('external'):
return obj['metadata']['mainFile']['links']['external']

return None

def get_pages(self, obj):
"""Get pages.

:param obj: Record dict.
:returns: Pages stored in partOf
"""
for part_of in obj['metadata'].get('partOf', []):
if part_of.get('numberingPages'):
return part_of['numberingPages']

return None

def get_first_page(self, obj):
"""Get the first page.

:param obj: Record dict.
:returns: The first page.
"""
for part_of in obj['metadata'].get('partOf', []):
if part_of.get('numberingPages'):
matches = re.match(r'^([0-9]+)', part_of['numberingPages'])

return matches.group(1) if matches else None

return None

def get_last_page(self, obj):
"""Get the last page.

:param obj: Record dict.
:returns: The last page.
"""
for part_of in obj['metadata'].get('partOf', []):
if part_of.get('numberingPages'):
matches = re.match(r'^[0-9]+\-([0-9]+)',
part_of['numberingPages'])

return matches.group(1) if matches else None

return None
49 changes: 4 additions & 45 deletions sonar/modules/documents/serializers/schemas/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@
import re

from flask import request
from marshmallow import Schema, fields, pre_dump
from marshmallow import fields

from sonar.modules.documents.api import DocumentRecord
from sonar.modules.documents.utils import has_external_urls_for_files, \
populate_files_properties
from sonar.modules.documents.views import part_of_format

from .base_schema import BaseSchema

class DublinCoreV1(Schema):

class DublinCoreV1(BaseSchema):
"""Schema for records v1 in JSON."""

contributors = fields.Method('get_contributors')
Expand All @@ -46,34 +46,6 @@ class DublinCoreV1(Schema):
titles = fields.Method('get_titles')
types = fields.Method('get_types')

@pre_dump
def pre_dump(self, item, **kwargs):
"""Do some transformations in record before dumping it.

- Store the main file to use it in methods.
- Check if files must point to an external URL.
- Populate restrictions, thumbnail and URL in files.

:param item: Item object to process
:returns: Modified item
"""
if not item['metadata'].get('_files'):
return item

# Store the main file
main_file = self.get_main_file(item)
if main_file:
item['metadata']['mainFile'] = main_file

# Check if organisation record forces to point file to an external url
item['metadata']['external_url'] = has_external_urls_for_files(
item['metadata'])

# Add restriction, link and thumbnail to files
populate_files_properties(item['metadata'])

return item

def get_contributors(self, obj):
"""Get contributors."""
items = []
Expand Down Expand Up @@ -307,16 +279,3 @@ def format_contributor(self, contributor):
data += ' ({info})'.format(info=' : '.join(info))

return data

def get_main_file(self, obj):
"""Return the main file.

:param obj: Record dict.
:returns: Main file or None.
"""
files = [
file for file in obj['metadata'].get('_files', [])
if file.get('type') == 'file'
]
files = sorted(files, key=lambda file: file.get('order', 100))
return files[0] if files else None
Loading