Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Identify DOI in RERO DOC field 775 $o
Browse files Browse the repository at this point in the history
* When MARC field 775 $o contains a DOI in RERO DOC, its value is
  imported in the `identifiedBy` field instead of `otherEdition`.
* Closes #659.
Co-Authored-by: Miguel Moreira <[email protected]>
mmo committed Nov 5, 2021
1 parent 599373a commit d62832b
Showing 3 changed files with 156 additions and 8 deletions.
39 changes: 33 additions & 6 deletions sonar/modules/documents/dojson/rerodoc/model.py
Original file line number Diff line number Diff line change
@@ -638,12 +638,39 @@ def marc21_to_other_edition(self, key, value):
if not electronic_locator or not public_note:
return None

return {
'document': {
'electronicLocator': electronic_locator
},
'publicNote': public_note
}
# if the value matches a DOI, apply `identifiedBy[type:bf:Doi]`
matches = re.search(r'(?P<doi>10\.\d{4,9}/[-._;()/:a-zA-Z0-9]+)', value.get('o'))
if matches and matches.group('doi'):
identified_by = self.get('identifiedBy', [])
return identified_by.append({
'type': 'bf:Doi',
'value': matches.group('doi')
})
return None
else:
return {
'document': {
'electronicLocator': electronic_locator
},
'publicNote': public_note
}


# @overdo.over('identifiedBy', '^775..')
# @utils.for_each_value
# @utils.ignore_value
# def marc21_to_identified_by_from_775(self, key, value):
# """Get DOI from field 775__o."""
# # if it matches a DOI
# matches = re.search(r'(?P<doi>10\.\d{4,9}/[-._;()/:a-zA-Z0-9]+)', value.get('o'))
# if matches and matches.group('doi'):
# identified_by = self.get('identifiedBy', [])
# return identified_by.append({
# 'type': 'bf:Doi',
# 'value': matches.group('doi')
# })
# else:
# return None


@overdo.over('collections', '^982..')
112 changes: 112 additions & 0 deletions tests/ui/documents/dojson/rerodoc/test_rerodoc_model.py
Original file line number Diff line number Diff line change
@@ -1614,6 +1614,118 @@ def test_marc21_to_other_edition(app):
data = overdo.do(marc21json)
assert not data.get('otherEdition')

# Incorrect DOIs - they do get collected in `otherEdition`
# instead of `identifiedBy`
marc21json = """
<record>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/0.1021/jp0558775</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/1017/S0031182010000296</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/1039/B926873A</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/0.1016/j.str.2012.09.019</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x</subfield>
<subfield code="g">version publiée</subfield>
</datafield>
</record>
"""
marc21json = create_record(marc21xml)
data = overdo.do(marc21json)
assert data.get('otherEdition') == [{
'document': {
'electronicLocator': 'http://dx.doi.org/10.1130%2F0091-7613(2002)030%3C0655:CWCIAP%3E2.0.CO%3B2'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/0.1021/jp0558775'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/1017/S0031182010000296'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/1039/B926873A'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'http://dx.doi.org/0.1016/j.str.2012.09.019'
},
'publicNote': 'version publiée'
}, {
'document': {
'electronicLocator': 'https://doi.org/10.1111%2Fj.1467-9280.2009.02364.x'
},
'publicNote': 'version publiée'
}]
assert not data.get('identifiedBy')

# Well-formed DOIs - they get collected in `identifiedBy`
# instead of`otherEdition`
marc21json = """
<record>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">http://dx.doi.org/10.1002/1521-3773(20020104)41:1%3C99::AID-ANIE99%3E3.0.CO%3B2-E</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">10.1016/j.apergo.2008.03.002</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">: https://doi.pangaea.de/10.1594/PANGAEA.914883</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://www.brepolsonline.net/doi/abs/10.1484/J.BPM.5.110808</subfield>
</datafield>
<datafield tag="775" ind1=" " ind2=" ">
<subfield code="o">https://doi.org710.35662/unine-thesis-2747</subfield>
</datafield>
</record>
"""
marc21json = create_record(marc21xml)
data = overdo.do(marc21json)
assert not data.get('otherEdition')
assert data.get('identifiedBy') == [{
{
'type': 'bf:Doi',
'value': '10.1002/1521-3773(20020104)41:1%3C99::AID-ANIE99%3E3.0.CO%3B2-E'
},
{
'type': 'bf:Doi',
'value': '10.1016/j.apergo.2008.03.002'
},
{
'type': 'bf:Doi',
'value': '10.1594/PANGAEA.914883'
},
{
'type': 'bf:Doi',
'value': '10.1484/J.BPM.5.110808'
},
{
'type': 'bf:Doi',
'value': '10.35662/unine-thesis-2747'
}
}]


def test_marc21_to_specific_collection(app, bucket_location,
without_oaiset_signals):
13 changes: 11 additions & 2 deletions tests/unit/documents/loaders/test_rerodoc_loader.py
Original file line number Diff line number Diff line change
@@ -105,6 +105,10 @@ def test_rerodoc_loader(app, organisation):
<marc:subfield code="g">Publisher's version</marc:subfield>
<marc:subfield code="o">https://doi.org/10.1093/mnras/stu2500</marc:subfield>
</marc:datafield>
<marc:datafield tag="775" ind1=" " ind2=" ">
<marc:subfield code="g">Alternative version</marc:subfield>
<marc:subfield code="o">http://www-geol.unine.ch/GEOMAGNETISME/tract.html</marc:subfield>
</marc:datafield>
<marc:datafield tag="919" ind1=" " ind2=" ">
<marc:subfield code="a">Consortium of Swiss Academic Libraries</marc:subfield>
<marc:subfield code="b">Zurich</marc:subfield>
@@ -134,6 +138,11 @@ def test_rerodoc_loader(app, organisation):
'Swissbib',
'value':
'(NATIONALLICENCE)oxford-10.1093/mnras/stu2500'
}, {
'type':
'bf:Doi',
'value':
'10.1093/mnras/stu2500'
}],
'collections': [{
'$ref': 'https://sonar.ch/api/collections/1'
@@ -167,9 +176,9 @@ def test_rerodoc_loader(app, organisation):
}],
'otherEdition': [{
'document': {
'electronicLocator': 'https://doi.org/10.1093/mnras/stu2500'
'electronicLocator': 'http://www-geol.unine.ch/GEOMAGNETISME/tract.html'
},
'publicNote': "Publisher's version"
'publicNote': "Alternative version"
}],
'language': [{
'type': 'bf:Language',

0 comments on commit d62832b

Please sign in to comment.