Skip to content

Commit

Permalink
oai: fix error on document type processing
Browse files Browse the repository at this point in the history
* Closes #789.

Co-Authored-by: Bertrand Zuchuat <[email protected]>
Co-Authored-by: Johnny Mariéthoz <[email protected]>
  • Loading branch information
Garfield-fr and jma committed Mar 9, 2022
1 parent 47b402e commit 8f3e748
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 70 deletions.
2 changes: 1 addition & 1 deletion sonar/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ def _(x):
'oai_dc': {
'namespace': 'http://www.openarchives.org/OAI/2.0/oai_dc/',
'schema': 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
'serializer': 'sonar.modules.documents.serializers.oaipmh_oai_dc',
'serializer': 'sonar.modules.documents.serializers.dc.sonar_dublin_core',
}
}
#: Number of records to return per page in OAI-PMH results.
Expand Down
13 changes: 1 addition & 12 deletions sonar/modules/documents/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,20 @@

from datetime import datetime

from flask import current_app, request
from flask import request
from invenio_records_rest.serializers.response import record_responsify, \
search_responsify

from sonar.modules.collections.api import Record as CollectionRecord
from sonar.modules.documents.serializers.dc import SonarDublinCoreSerializer
from sonar.modules.documents.serializers.google_scholar import \
SonarGoogleScholarSerializer
from sonar.modules.documents.serializers.schemaorg import \
SonarSchemaOrgSerializer
from sonar.modules.documents.serializers.schemas.dc import DublinCoreV1
from sonar.modules.documents.serializers.schemas.google_scholar import \
GoogleScholarV1
from sonar.modules.documents.serializers.schemas.schemaorg import SchemaOrgV1
from sonar.modules.organisations.api import OrganisationRecord
from sonar.modules.serializers import JSONSerializer as _JSONSerializer
from sonar.modules.subdivisions.api import Record as SubdivisionRecord
from sonar.modules.users.api import current_user_record
from sonar.modules.utils import get_language_value

from ..marshmallow import DocumentSchemaV1
Expand Down Expand Up @@ -84,8 +80,6 @@ def post_process_serialize_search(self, results, pid_fetcher):
# ===========
#: JSON serializer definition.
json_v1 = JSONSerializer(DocumentSchemaV1)
#: Dublin Core serializer
dc_v1 = SonarDublinCoreSerializer(DublinCoreV1, replace_refs=True)
#: schema.org serializer
schemaorg_v1 = SonarSchemaOrgSerializer(SchemaOrgV1, replace_refs=True)
#: google scholar serializer
Expand All @@ -104,8 +98,3 @@ def post_process_serialize_search(self, results, pid_fetcher):
'json_v1_response',
'json_v1_search',
)

# OAI-PMH record serializers.
# ===========================
#: OAI-PMH OAI Dublin Core record serializer.
oaipmh_oai_dc = dc_v1.serialize_oaipmh
34 changes: 26 additions & 8 deletions sonar/modules/documents/serializers/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,34 @@

"""Dublin Core serializer."""

from invenio_records_rest.serializers.dc import DublinCoreSerializer
from dcxml import simpledc
from flask_resources.serializers import SerializerMixin

from sonar.modules.documents.serializers.schemas.dc import DublinCoreSchema

class SonarDublinCoreSerializer(DublinCoreSerializer):
"""Marshmallow based DublinCore serializer for records."""

def dump(self, obj, context=None):
"""Serialize object with schema.
class SonarDublinCoreXMLSerializer(SerializerMixin):
"""DublinCore serializer for records."""

Mandatory to override this method, as invenio-records-rest does not
use the right way to dump objects (compatible with marshmallow 3.9).
def __init__(self, **options):
"""Constructor."""
self.schema_class = DublinCoreSchema

def transform_record(self, obj):
"""Tranform record."""
# TODO: Remove this hack after migrate to invenio ressources
return self.schema_class().dump(dict(metadata=obj))

def serialize_object_xml(self, obj):
"""Serialize a single record and persistent identifier to etree.
:param obj: Record instance
"""
return self.schema_class(context=context).dump(obj)
json = self.transform_record(obj["_source"])
return simpledc.dump_etree(json)


def sonar_dublin_core(pid, record):
"""Get DublinCore XML for OAI-PMH."""
return SonarDublinCoreXMLSerializer()\
.serialize_object_xml(record)
13 changes: 7 additions & 6 deletions sonar/modules/documents/serializers/schemas/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .base_schema import BaseSchema


class DublinCoreV1(BaseSchema):
class DublinCoreSchema(BaseSchema):
"""Schema for records v1 in JSON."""

contributors = fields.Method('get_contributors')
Expand Down Expand Up @@ -249,11 +249,12 @@ def get_titles(self, obj):

def get_types(self, obj):
"""Get types."""
if obj['metadata'].get('documentType'):
return [
'http://purl.org/coar/resource_type/{type}'.format(
type=obj['metadata']['documentType'].split(':')[1])
]
if obj['metadata'].get('documentType', ''):
types = obj['metadata'].get('documentType', '').split(':')
if len(types) == 1:
return [f'{types[0]}']
if len(types) == 2:
return [f'http://purl.org/coar/resource_type/{types[1]}']

return []

Expand Down
Loading

0 comments on commit 8f3e748

Please sign in to comment.