Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Identify DOI in RERO DOC field 775 $o #696

Merged
merged 1 commit into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions sonar/modules/documents/dojson/rerodoc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,12 +638,23 @@ def marc21_to_other_edition(self, key, value):
if not electronic_locator or not public_note:
return None

return {
'document': {
'electronicLocator': electronic_locator
},
'publicNote': public_note
}
# if the value matches a DOI, apply `identifiedBy[type:bf:Doi]`
matches = re.search(r'(?P<doi>10\.\d{4,9}/[-._;()/:a-zA-Z0-9]+)', value.get('o'))
if matches and matches.group('doi'):
identified_by = self.get('identifiedBy', [])
identified_by.append({
'type': 'bf:Doi',
'value': matches.group('doi')
})
self['identifiedBy'] = identified_by
return None
else:
return {
'document': {
'electronicLocator': electronic_locator
},
'publicNote': public_note
}


@overdo.over('collections', '^982..')
Expand Down
115 changes: 115 additions & 0 deletions tests/ui/documents/dojson/rerodoc/test_rerodoc_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,121 @@ def test_marc21_to_other_edition(app):
data = overdo.do(marc21json)
assert not data.get('otherEdition')

# Incorrect DOIs - they do get collected in `otherEdition`
# instead of `identifiedBy`
marc21xml = """
<record>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/0.1021/jp0558775</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/1017/S0031182010000296</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/1039/B926873A</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/0.1016/j.str.2012.09.019</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
</record>
"""
marc21json = create_record(marc21xml)
data = overdo.do(marc21json)
assert data.get('otherEdition') == [{
'document': {
'electronicLocator': 'http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/0.1021/jp0558775'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/1017/S0031182010000296'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/1039/B926873A'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/0.1016/j.str.2012.09.019'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x'
},
'publicNote': 'version publiée'
}]
assert not data.get('identifiedBy')

# Well-formed DOIs - they get collected in `identifiedBy`
# instead of`otherEdition`
marc21xml = """
<record>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/10.1002/1521-3773(20020104)41:1</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">10.1016/j.apergo.2008.03.002</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">: https://doi.pangaea.de/10.1594/PANGAEA.914883</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://www.brepolsonline.net/doi/abs/10.1484/J.BPM.5.110808</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://doi.org710.35662/unine-thesis-2747</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
</record>
"""
marc21json = create_record(marc21xml)
data = overdo.do(marc21json)
assert not data.get('otherEdition')
assert data.get('identifiedBy') == [{
'type': 'bf:Doi',
'value': '10.1002/1521-3773(20020104)41:1'
},
{
'type': 'bf:Doi',
'value': '10.1016/j.apergo.2008.03.002'
},
{
'type': 'bf:Doi',
'value': '10.1594/PANGAEA.914883'
},
{
'type': 'bf:Doi',
'value': '10.1484/J.BPM.5.110808'
},
{
'type': 'bf:Doi',
'value': '10.35662/unine-thesis-2747'
}]


def test_marc21_to_specific_collection(app, bucket_location,
without_oaiset_signals):
Expand Down
13 changes: 11 additions & 2 deletions tests/unit/documents/loaders/test_rerodoc_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ def test_rerodoc_loader(app, organisation):
<marc:subfield code="g">Publisher's version</marc:subfield>
<marc:subfield code="o">https://doi.org/10.1093/mnras/stu2500</marc:subfield>
</marc:datafield>
<marc:datafield tag="775" ind1=" " ind2=" ">
<marc:subfield code="g">Alternative version</marc:subfield>
<marc:subfield code="o">http://www-geol.unine.ch/GEOMAGNETISME/tract.html</marc:subfield>
</marc:datafield>
<marc:datafield tag="919" ind1=" " ind2=" ">
<marc:subfield code="a">Consortium of Swiss Academic Libraries</marc:subfield>
<marc:subfield code="b">Zurich</marc:subfield>
Expand Down Expand Up @@ -134,6 +138,11 @@ def test_rerodoc_loader(app, organisation):
'Swissbib',
'value':
'(NATIONALLICENCE)oxford-10.1093/mnras/stu2500'
}, {
'type':
'bf:Doi',
'value':
'10.1093/mnras/stu2500'
}],
'collections': [{
'$ref': 'https://sonar.ch/api/collections/1'
Expand Down Expand Up @@ -167,9 +176,9 @@ def test_rerodoc_loader(app, organisation):
}],
'otherEdition': [{
'document': {
'electronicLocator': 'https://doi.org/10.1093/mnras/stu2500'
'electronicLocator': 'http://www-geol.unine.ch/GEOMAGNETISME/tract.html'
},
'publicNote': "Publisher's version"
'publicNote': "Alternative version"
}],
'language': [{
'type': 'bf:Language',
Expand Down