Skip to content

Commit

Permalink
Identify DOI in RERO DOC field 775 $o
Browse files Browse the repository at this point in the history
* When MARC field 775 $o contains a DOI in RERO DOC, its value is
  imported in the `identifiedBy` field instead of `otherEdition`.
* Closes #659.
Co-Authored-by: Miguel Moreira <[email protected]>
  • Loading branch information
mmo committed Nov 5, 2021
1 parent dc2d35c commit d2789b6
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 8 deletions.
23 changes: 17 additions & 6 deletions sonar/modules/documents/dojson/rerodoc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,12 +638,23 @@ def marc21_to_other_edition(self, key, value):
if not electronic_locator or not public_note:
return None

return {
'document': {
'electronicLocator': electronic_locator
},
'publicNote': public_note
}
# if the value matches a DOI, apply `identifiedBy[type:bf:Doi]`
matches = re.search(r'(?P<doi>10\.\d{4,9}/[-._;()/:a-zA-Z0-9]+)', value.get('o'))
if matches and matches.group('doi'):
identified_by = self.get('identifiedBy', [])
identified_by.append({
'type': 'bf:Doi',
'value': matches.group('doi')
})
self['identifiedBy'] = identified_by
return None
else:
return {
'document': {
'electronicLocator': electronic_locator
},
'publicNote': public_note
}


@overdo.over('collections', '^982..')
Expand Down
115 changes: 115 additions & 0 deletions tests/ui/documents/dojson/rerodoc/test_rerodoc_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,121 @@ def test_marc21_to_other_edition(app):
data = overdo.do(marc21json)
assert not data.get('otherEdition')

# Incorrect DOIs - they do get collected in `otherEdition`
# instead of `identifiedBy`
marc21xml = """
<record>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/0.1021/jp0558775</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/1017/S0031182010000296</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/1039/B926873A</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/0.1016/j.str.2012.09.019</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
</record>
"""
marc21json = create_record(marc21xml)
data = overdo.do(marc21json)
assert data.get('otherEdition') == [{
'document': {
'electronicLocator': 'http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/0.1021/jp0558775'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/1017/S0031182010000296'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/1039/B926873A'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/0.1016/j.str.2012.09.019'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x'
},
'publicNote': 'version publiée'
}]
assert not data.get('identifiedBy')

# Well-formed DOIs - they get collected in `identifiedBy`
# instead of`otherEdition`
marc21xml = """
<record>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/10.1002/1521-3773(20020104)41:1</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">10.1016/j.apergo.2008.03.002</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">: https://doi.pangaea.de/10.1594/PANGAEA.914883</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://www.brepolsonline.net/doi/abs/10.1484/J.BPM.5.110808</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://doi.org710.35662/unine-thesis-2747</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
</record>
"""
marc21json = create_record(marc21xml)
data = overdo.do(marc21json)
assert not data.get('otherEdition')
assert data.get('identifiedBy') == [{
'type': 'bf:Doi',
'value': '10.1002/1521-3773(20020104)41:1'
},
{
'type': 'bf:Doi',
'value': '10.1016/j.apergo.2008.03.002'
},
{
'type': 'bf:Doi',
'value': '10.1594/PANGAEA.914883'
},
{
'type': 'bf:Doi',
'value': '10.1484/J.BPM.5.110808'
},
{
'type': 'bf:Doi',
'value': '10.35662/unine-thesis-2747'
}]


def test_marc21_to_specific_collection(app, bucket_location,
without_oaiset_signals):
Expand Down
13 changes: 11 additions & 2 deletions tests/unit/documents/loaders/test_rerodoc_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ def test_rerodoc_loader(app, organisation):
<marc:subfield code="g">Publisher's version</marc:subfield>
<marc:subfield code="o">https://doi.org/10.1093/mnras/stu2500</marc:subfield>
</marc:datafield>
<marc:datafield tag="775" ind1=" " ind2=" ">
<marc:subfield code="g">Alternative version</marc:subfield>
<marc:subfield code="o">http://www-geol.unine.ch/GEOMAGNETISME/tract.html</marc:subfield>
</marc:datafield>
<marc:datafield tag="919" ind1=" " ind2=" ">
<marc:subfield code="a">Consortium of Swiss Academic Libraries</marc:subfield>
<marc:subfield code="b">Zurich</marc:subfield>
Expand Down Expand Up @@ -134,6 +138,11 @@ def test_rerodoc_loader(app, organisation):
'Swissbib',
'value':
'(NATIONALLICENCE)oxford-10.1093/mnras/stu2500'
}, {
'type':
'bf:Doi',
'value':
'10.1093/mnras/stu2500'
}],
'collections': [{
'$ref': 'https://sonar.ch/api/collections/1'
Expand Down Expand Up @@ -167,9 +176,9 @@ def test_rerodoc_loader(app, organisation):
}],
'otherEdition': [{
'document': {
'electronicLocator': 'https://doi.org/10.1093/mnras/stu2500'
'electronicLocator': 'http://www-geol.unine.ch/GEOMAGNETISME/tract.html'
},
'publicNote': "Publisher's version"
'publicNote': "Alternative version"
}],
'language': [{
'type': 'bf:Language',
Expand Down

0 comments on commit d2789b6

Please sign in to comment.