diff --git a/sonar/modules/documents/dojson/rerodoc/model.py b/sonar/modules/documents/dojson/rerodoc/model.py index 9004356aa..2b4729b3e 100644 --- a/sonar/modules/documents/dojson/rerodoc/model.py +++ b/sonar/modules/documents/dojson/rerodoc/model.py @@ -638,12 +638,39 @@ def marc21_to_other_edition(self, key, value): if not electronic_locator or not public_note: return None - return { - 'document': { - 'electronicLocator': electronic_locator - }, - 'publicNote': public_note - } + # if the value matches a DOI, apply `identifiedBy[type:bf:Doi]` + matches = re.search(r'(?P10\.\d{4,9}/[-._;()/:a-zA-Z0-9]+)', value.get('o')) + if matches and matches.group('doi'): + identified_by = self.get('identifiedBy', []) + return identified_by.append({ + 'type': 'bf:Doi', + 'value': matches.group('doi') + }) + return None + else: + return { + 'document': { + 'electronicLocator': electronic_locator + }, + 'publicNote': public_note + } + + +# @overdo.over('identifiedBy', '^775..') +# @utils.for_each_value +# @utils.ignore_value +# def marc21_to_identified_by_from_775(self, key, value): +# """Get DOI from field 775__o.""" +# # if it matches a DOI +# matches = re.search(r'(?P10\.\d{4,9}/[-._;()/:a-zA-Z0-9]+)', value.get('o')) +# if matches and matches.group('doi'): +# identified_by = self.get('identifiedBy', []) +# return identified_by.append({ +# 'type': 'bf:Doi', +# 'value': matches.group('doi') +# }) +# else: +# return None @overdo.over('collections', '^982..') diff --git a/tests/ui/documents/dojson/rerodoc/test_rerodoc_model.py b/tests/ui/documents/dojson/rerodoc/test_rerodoc_model.py index 8874dc332..2c82a5cef 100644 --- a/tests/ui/documents/dojson/rerodoc/test_rerodoc_model.py +++ b/tests/ui/documents/dojson/rerodoc/test_rerodoc_model.py @@ -1614,6 +1614,118 @@ def test_marc21_to_other_edition(app): data = overdo.do(marc21json) assert not data.get('otherEdition') + # Incorrect DOIs - they do get collected in `otherEdition` + # instead of `identifiedBy` + marc21json = """ + + + http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2 + version publiée + + + http://dx.doi.org/0.1021/jp0558775 + version publiée + + + http://dx.doi.org/1017/S0031182010000296 + version publiée + + + http://dx.doi.org/1039/B926873A + version publiée + + + http://dx.doi.org/0.1016/j.str.2012.09.019 + version publiée + + + https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x + version publiée + + + """ + marc21json = create_record(marc21xml) + data = overdo.do(marc21json) + assert data.get('otherEdition') == [{ + 'document': { + 'electronicLocator': 'http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2' + }, + 'publicNote': 'version publiée' + }, { + 'document': { + 'electronicLocator': 'http://dx.doi.org/0.1021/jp0558775' + }, + 'publicNote': 'version publiée' + }, { + 'document': { + 'electronicLocator': 'http://dx.doi.org/1017/S0031182010000296' + }, + 'publicNote': 'version publiée' + }, { + 'document': { + 'electronicLocator': 'http://dx.doi.org/1039/B926873A' + }, + 'publicNote': 'version publiée' + }, { + 'document': { + 'electronicLocator': 'http://dx.doi.org/0.1016/j.str.2012.09.019' + }, + 'publicNote': 'version publiée' + }, { + 'document': { + 'electronicLocator': 'https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x' + }, + 'publicNote': 'version publiée' + }] + assert not data.get('identifiedBy') + + # Well-formed DOIs - they get collected in `identifiedBy` + # instead of`otherEdition` + marc21json = """ + + + http://dx.doi.org/10.1002/1521-3773(20020104)41:1%3C99::AID-ANIE99%3E3.0.CO%3B2-E + + + 10.1016/j.apergo.2008.03.002 + + + : https://doi.pangaea.de/10.1594/PANGAEA.914883 + + + https://www.brepolsonline.net/doi/abs/10.1484/J.BPM.5.110808 + + + https://doi.org710.35662/unine-thesis-2747 + + + """ + marc21json = create_record(marc21xml) + data = overdo.do(marc21json) + assert not data.get('otherEdition') + assert data.get('identifiedBy') == [{ + { + 'type': 'bf:Doi', + 'value': '10.1002/1521-3773(20020104)41:1%3C99::AID-ANIE99%3E3.0.CO%3B2-E' + }, + { + 'type': 'bf:Doi', + 'value': '10.1016/j.apergo.2008.03.002' + }, + { + 'type': 'bf:Doi', + 'value': '10.1594/PANGAEA.914883' + }, + { + 'type': 'bf:Doi', + 'value': '10.1484/J.BPM.5.110808' + }, + { + 'type': 'bf:Doi', + 'value': '10.35662/unine-thesis-2747' + } + }] + def test_marc21_to_specific_collection(app, bucket_location, without_oaiset_signals): diff --git a/tests/unit/documents/loaders/test_rerodoc_loader.py b/tests/unit/documents/loaders/test_rerodoc_loader.py index 7b6ea427d..04e258f08 100644 --- a/tests/unit/documents/loaders/test_rerodoc_loader.py +++ b/tests/unit/documents/loaders/test_rerodoc_loader.py @@ -105,6 +105,10 @@ def test_rerodoc_loader(app, organisation): Publisher's version https://doi.org/10.1093/mnras/stu2500 + + Alternative version + http://www-geol.unine.ch/GEOMAGNETISME/tract.html + Consortium of Swiss Academic Libraries Zurich @@ -134,6 +138,11 @@ def test_rerodoc_loader(app, organisation): 'Swissbib', 'value': '(NATIONALLICENCE)oxford-10.1093/mnras/stu2500' + }, { + 'type': + 'bf:Doi', + 'value': + '10.1093/mnras/stu2500' }], 'collections': [{ '$ref': 'https://sonar.ch/api/collections/1' @@ -167,9 +176,9 @@ def test_rerodoc_loader(app, organisation): }], 'otherEdition': [{ 'document': { - 'electronicLocator': 'https://doi.org/10.1093/mnras/stu2500' + 'electronicLocator': 'http://www-geol.unine.ch/GEOMAGNETISME/tract.html' }, - 'publicNote': "Publisher's version" + 'publicNote': "Alternative version" }], 'language': [{ 'type': 'bf:Language',