-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpublish.py
156 lines (121 loc) · 4.78 KB
/
publish.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# encoding=utf8
import sys
import os
import re
import logging
import logging.handlers
import requests
import datetime
from dateutil.parser import parse
from tzlocal import get_localzone
import pytz # timezone in Python 3
import argparse
from roald import Roald
from functools import reduce
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s %(levelname)s:%(module)s:] %(message)s')
console_handler = logging.StreamHandler(stream=sys.stdout)
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
warn_handler = logging.FileHandler('warnings.log')
warn_handler.setLevel(logging.WARNING)
warn_handler.setFormatter(formatter)
logger.addHandler(warn_handler)
def fetch(url, filename):
"""
Download a file from an URL
"""
with open(filename, 'wb') as handle:
response = requests.get(url, stream=True)
if not response.ok:
logger.error('Download failed')
return False
for block in response.iter_content(1024):
if not block:
break
handle.write(block)
return True
def check_modification_dates(record):
"""
Check modification date for remote and local file
"""
tz = get_localzone()
head = requests.head(record['remote_url'])
if head.status_code != 200:
logger.warn('Got status code: %s' % (head.status_code))
record['modified'] = False
return record
if 'etag' in head.headers:
record['remote_etag'] = head.headers['etag']
if os.path.isfile(record['local_file'] + '.etag'):
with open(record['local_file'] + '.etag', 'rb') as f:
record['local_etag'] = f.read().decode('utf-8').strip()
else:
record['local_etag'] = '0'
logger.info(' Remote file etag: %s' % (record['remote_etag']))
logger.info(' Local file etag: %s' % (record['local_etag']))
if record['remote_etag'] == record['local_etag']:
logger.info(' -> Local data are up-to-date.')
record['modified'] = False
return record
with open(record['local_file'] + '.etag', 'wb') as f:
f.write(record['remote_etag'].encode('utf-8'))
else:
record['remote_datemod'] = parse(head.headers['last-modified'])
if os.path.isfile(record['local_file']):
record['local_datemod'] = datetime.datetime.fromtimestamp(os.path.getmtime(record['local_file']))
else:
# use some date in the past
record['local_datemod'] = datetime.datetime(year=2014, month=1, day=1)
record['local_datemod'] = tz.normalize(tz.localize(record['local_datemod'])).astimezone(pytz.utc)
logger.info(' Remote file modified: %s' % (record['remote_datemod'].isoformat()))
logger.info(' Local file modified: %s' % (record['local_datemod'].isoformat()))
# Subtract 5 minutes to account for the possibility of the clock being slightly off
if record['remote_datemod'] < record['local_datemod'] + datetime.timedelta(minutes=5):
logger.info(' -> Local data are up-to-date.')
record['modified'] = False
return record
logger.info(' -> Fetching updated data...')
fetch(record['remote_url'], record['local_file'])
record['modified'] = True
return record
def run():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--force', action='store_true', default=False, help='Force update')
args = parser.parse_args()
# files = [
# {
# 'remote_url': 'http://www.bibsys.no/files/out/usvdsok/HUMEregister.xml',
# 'local_file': 'src/usvd.xml'
# },{
# 'remote_url': 'http://lambda.biblionaut.net/export.rdf',
# 'local_file': 'src/lambda.rdf'
# }
# ]
# for f in files:
# logger.info('Checking {}...'.format(f['local_file']))
# f['record'] = check_modification_dates(f)
# if not reduce(lambda x, y: x or y['record']['modified'], files, False):
# if args.force:
# logger.info('No changes.')
# else:
# logger.info('No changes, exiting.')
# sys.exit(1) # tells prepare.sh that there's no need to continue
# # return
make()
def make():
roald = Roald()
roald.load('src/usvd.xml', format='bibsys', language='nb')
roald.set_uri_format('http://data.ub.uio.no/usvd/c{id}')
roald.save('usvd.json')
marc21options = {
'vocabulary_code': 'usvd',
'created_by': 'NoOU'
}
roald.export('dist/usvd.marc21.xml', format='marc21', **marc21options)
roald.export('dist/usvd.ttl', format='rdfskos',
include=['usvd.scheme.ttl', 'ubo-onto.ttl'])
if __name__ == '__main__':
run()