Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

oai: fix error on document type processing #793

Merged
merged 1 commit into from
Mar 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sonar/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ def _(x):
'oai_dc': {
'namespace': 'http://www.openarchives.org/OAI/2.0/oai_dc/',
'schema': 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
'serializer': 'sonar.modules.documents.serializers.oaipmh_oai_dc',
'serializer': 'sonar.modules.documents.serializers.dc.sonar_dublin_core',
}
}
#: Number of records to return per page in OAI-PMH results.
Expand Down
13 changes: 1 addition & 12 deletions sonar/modules/documents/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,20 @@

from datetime import datetime

from flask import current_app, request
from flask import request
from invenio_records_rest.serializers.response import record_responsify, \
search_responsify

from sonar.modules.collections.api import Record as CollectionRecord
from sonar.modules.documents.serializers.dc import SonarDublinCoreSerializer
from sonar.modules.documents.serializers.google_scholar import \
SonarGoogleScholarSerializer
from sonar.modules.documents.serializers.schemaorg import \
SonarSchemaOrgSerializer
from sonar.modules.documents.serializers.schemas.dc import DublinCoreV1
from sonar.modules.documents.serializers.schemas.google_scholar import \
GoogleScholarV1
from sonar.modules.documents.serializers.schemas.schemaorg import SchemaOrgV1
from sonar.modules.organisations.api import OrganisationRecord
from sonar.modules.serializers import JSONSerializer as _JSONSerializer
from sonar.modules.subdivisions.api import Record as SubdivisionRecord
from sonar.modules.users.api import current_user_record
from sonar.modules.utils import get_language_value

from ..marshmallow import DocumentSchemaV1
Expand Down Expand Up @@ -84,8 +80,6 @@ def post_process_serialize_search(self, results, pid_fetcher):
# ===========
#: JSON serializer definition.
json_v1 = JSONSerializer(DocumentSchemaV1)
#: Dublin Core serializer
dc_v1 = SonarDublinCoreSerializer(DublinCoreV1, replace_refs=True)
#: schema.org serializer
schemaorg_v1 = SonarSchemaOrgSerializer(SchemaOrgV1, replace_refs=True)
#: google scholar serializer
Expand All @@ -104,8 +98,3 @@ def post_process_serialize_search(self, results, pid_fetcher):
'json_v1_response',
'json_v1_search',
)

# OAI-PMH record serializers.
# ===========================
#: OAI-PMH OAI Dublin Core record serializer.
oaipmh_oai_dc = dc_v1.serialize_oaipmh
34 changes: 26 additions & 8 deletions sonar/modules/documents/serializers/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,34 @@

"""Dublin Core serializer."""

from invenio_records_rest.serializers.dc import DublinCoreSerializer
from dcxml import simpledc
from flask_resources.serializers import SerializerMixin

from sonar.modules.documents.serializers.schemas.dc import DublinCoreSchema

class SonarDublinCoreSerializer(DublinCoreSerializer):
"""Marshmallow based DublinCore serializer for records."""

def dump(self, obj, context=None):
"""Serialize object with schema.
class SonarDublinCoreXMLSerializer(SerializerMixin):
"""DublinCore serializer for records."""

Mandatory to override this method, as invenio-records-rest does not
use the right way to dump objects (compatible with marshmallow 3.9).
def __init__(self, **options):
"""Constructor."""
self.schema_class = DublinCoreSchema

def transform_record(self, obj):
"""Tranform record."""
# TODO: Remove this hack after migrate to invenio ressources
return self.schema_class().dump(dict(metadata=obj))

def serialize_object_xml(self, obj):
"""Serialize a single record and persistent identifier to etree.

:param obj: Record instance
"""
return self.schema_class(context=context).dump(obj)
json = self.transform_record(obj["_source"])
return simpledc.dump_etree(json)


def sonar_dublin_core(pid, record):
"""Get DublinCore XML for OAI-PMH."""
return SonarDublinCoreXMLSerializer()\
.serialize_object_xml(record)
13 changes: 7 additions & 6 deletions sonar/modules/documents/serializers/schemas/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .base_schema import BaseSchema


class DublinCoreV1(BaseSchema):
class DublinCoreSchema(BaseSchema):
"""Schema for records v1 in JSON."""

contributors = fields.Method('get_contributors')
Expand Down Expand Up @@ -249,11 +249,12 @@ def get_titles(self, obj):

def get_types(self, obj):
"""Get types."""
if obj['metadata'].get('documentType'):
return [
'http://purl.org/coar/resource_type/{type}'.format(
type=obj['metadata']['documentType'].split(':')[1])
]
if obj['metadata'].get('documentType', ''):
types = obj['metadata'].get('documentType', '').split(':')
if len(types) == 1:
return [f'{types[0]}']
if len(types) == 2:
return [f'http://purl.org/coar/resource_type/{types[1]}']

return []

Expand Down
Loading