-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlinks.py
88 lines (81 loc) · 2.21 KB
/
links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from glob import glob
import codecs
sources = [
'Tolkien Gateway',
'The Thain\'s Book',
]
def source_index(source):
try:
return sources.index(source.source)
except ValueError:
return len(sources)
class Link(object):
def __init__(
self,
canonical = None,
mappable = None,
mapped = None,
names = None,
description = None,
href = None,
source = None
):
if names is None: names = []
self.canonical = canonical or ""
self.mappable = mappable
self.mapped = mapped
self.names = names
self.description = description or ""
self.href = href or ""
self.source = source
def __repr__(self):
return 'Link(%s)' % ", ".join(
"%s=%r" % (key, value)
for key, value in vars(self).items()
)
@property
def name(self):
return u" / ".join(self.names)
def LinkReader(file_name):
file = codecs.open(file_name, "r", "utf-8")
content = file.read()
content = content.lstrip(unicode(codecs.BOM_UTF8, "utf-8"))
rows = (
[cell.strip('"') for cell in line.split(u"\t")]
for line in content.split(u"\n")
if line.strip()
)
headers = rows.next()
return (
dict(zip(headers, row))
for row in rows
)
def links():
reader = LinkReader('links.tsv')
articles = {}
links = list(
Link(
canonical = entry['Canonical'],
#mappable = entry['Mappable'] == '1',
#mapped = entry['Mapped'] == '1',
names = entry['Names'].split(u" / "),
description = entry['Description'],
href = entry['Link'],
source = entry['Source'],
) for entry in reader
)
for link in links:
articles.setdefault(link.canonical, []).append(link)
return dict(
(canonical, sorted(links, key=source_index))
for canonical, links in articles.items()
)
def main():
import sys
from pprint import pprint
command = sys.argv[1].replace('-', '_')
result = globals()[command](*sys.argv[2:])
if result is not None:
pprint(result)
if __name__ == "__main__":
main()