Skip to content

Commit

Permalink
documents: Import documents from RERODOC
Browse files Browse the repository at this point in the history
* Harvests and imports data from RERODOC, based on defined sets.
* Activates Celery beat for tasks processing.
* Adds manual translations file.
* Removes fake institutions and documents fixtures.
* Logs errors in a file for tracking import failures.
* Re-enables marshmallow serializers, closes #79.
* Changes JSON schema properties for titles and abstracts.
* Updates detail view.
* Closes #76.

Co-Authored-by: Sébastien Délèze <[email protected]>
  • Loading branch information
Sébastien Délèze committed Feb 21, 2020
1 parent c0239ea commit f1a419c
Show file tree
Hide file tree
Showing 55 changed files with 3,838 additions and 2,132 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ docker-compose.yml
docker-compose-dev.yml

Procfile*

celerybeat-schedule
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@ target/

# Generated JSON schema
sonar/modules/documents/jsonschemas/documents/document-v1.0.0.json

# Celery
celerybeat-schedule
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pycountry = "*"
werkzeug = "==0.16.0"
# TODO: check why we need this.
flask-login = "<0.5"
invenio-oaiharvester = {editable = true,ref = "v1.0.0a4",git = "https://github.com/inveniosoftware/invenio-oaiharvester.git"}

[dev-packages]
Flask-Debugtoolbar = ">=0.10.1"
Expand Down
14 changes: 13 additions & 1 deletion Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions babel.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
# Extraction from Python source files
[python: **.py]
encoding = utf-8
[python: **/manual_translations.txt]
encoding = utf-8

# Extraction from Jinja2 templates
[jinja2: **/templates/**.html]
Expand Down
162 changes: 162 additions & 0 deletions data/complete_document_sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
{
"identifiedBy": [
{
"value": "urn:nbn:ch:rero-006-108713",
"type": "bf:Urn"
},
{
"value": "oai:doc.rero.ch:20050302172954-WU",
"type": "bf:Identifier"
}
],
"language": [
{
"value": "eng",
"type": "bf:Language"
}
],
"authors": [
{
"type": "person",
"name": "Mancini, Loriano",
"date": "1975-03-23",
"qualifier": "Librarian"
},
{
"type": "person",
"name": "Ronchetti, Elvezio"
},
{
"type": "person",
"name": "Trojani, Fabio"
}
],
"title": [
{
"type": "bf:Title",
"mainTitle": [
{
"language": "eng",
"value": "Title of the document"
}
],
"subtitle": [
{
"language": "eng",
"value": "Subtitle"
}
]
}
],
"extent": "103 p",
"abstracts": [
{
"language": "eng",
"value": "Abstract of the document"
}
],
"subjects": [
{
"language": "eng",
"value": [
"Time series models",
"GARCH models"
]
}
],
"provisionActivity": [
{
"type": "bf:Manufacture",
"statement": [
{
"label": [
{
"value": "Bienne"
}
],
"type": "bf:Place"
},
{
"label": [
{
"value": "Impr. Weber"
}
],
"type": "bf:Agent"
},
{
"label": [
{
"value": "[2006]"
}
],
"type": "Date"
},
{
"label": [
{
"value": "Lausanne"
}
],
"type": "bf:Place"
},
{
"label": [
{
"value": "Rippone"
}
],
"type": "bf:Place"
},
{
"label": [
{
"value": "Impr. Coustaud"
}
],
"type": "bf:Agent"
}
]
}
],
"editionStatement": [
{
"editionDesignation": [
{
"value": "Di 3 ban"
},
{
"value": "第3版",
"language": "chi-hani"
}
],
"responsibility": [
{
"value": "Zeng Lingliang zhu bian"
},
{
"value": "曾令良主编",
"language": "chi-hani"
}
]
}
],
"is_part_of": "Is part of",
"copyrightDate": [
"© 1971"
],
"series": [
{
"name": "Collection One",
"number": "5"
},
{
"name": "Collection Two",
"number": "123"
}
],
"notes": ["Note 1", "Note 2"],
"institution": {
"$ref": "https://sonar.ch/api/institutions/org"
}
}
10 changes: 0 additions & 10 deletions data/institutions.json

This file was deleted.

130 changes: 130 additions & 0 deletions data/rerodoc_oai_sources.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
[
{
"key": "rerodoc-unisi",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "unisi"
},
{
"key": "rerodoc-unifr",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "unifr"
},
{
"key": "rerodoc-nl-epfl",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-epfl"
},
{
"key": "rerodoc-nl-ethz",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-ethz"
},
{
"key": "rerodoc-nl-fachhochschulen",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-fachhochschulen"
},
{
"key": "rerodoc-nl-lib4ri",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-lib4ri"
},
{
"key": "rerodoc-nl-unibas",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unibas"
},
{
"key": "rerodoc-nl-unibe",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unibe"
},
{
"key": "rerodoc-nl-unifr",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unifr"
},
{
"key": "rerodoc-nl-unige",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unige"
},
{
"key": "rerodoc-nl-unil",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unil"
},
{
"key": "rerodoc-nl-unilu",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unilu"
},
{
"key": "rerodoc-nl-unine",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unine"
},
{
"key": "rerodoc-nl-unisg",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-unisg"
},
{
"key": "rerodoc-nl-usi",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-usi"
},
{
"key": "rerodoc-nl-uzh",
"name": "rerodoc",
"url": "http://doc.rero.ch/oai2d",
"metadataprefix": "marcxml",
"comment": "",
"setspecs": "nl-uzh"
}
]
Loading

0 comments on commit f1a419c

Please sign in to comment.