-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathetl.py
55 lines (45 loc) · 1.31 KB
/
etl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import argparse
import os
import sys
"""
python etl.py \
--day 20160926 \
--langs en,ja,de,es,ru,fr,it,zh,pt,pl,tr,ar,nl,id,sv,ko,cs,fa,fi,vi
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--day', required = False )
parser.add_argument('--langs', required = False, help='comma seperated list of languages')
args = vars(parser.parse_args())
print(args)
if args['day']:
cmd = """
python /home/ellery/wmf/util/wikidata_utils.py \
--day %(day)s \
--download_dump
"""
os.system(cmd % args)
cmd = """
spark-submit \
--driver-memory 5g \
--master yarn \
--deploy-mode client \
--num-executors 8 \
--executor-memory 10g \
--executor-cores 4 \
--queue priority \
/home/ellery/wmf/util/wikidata_utils.py \
--day %(day)s \
--extract_wills \
--create_table \
--db prod
"""
os.system(cmd % args)
if args['langs']:
cmd = """
python /home/ellery/wmf/util/get_multilingual_prod_db.py \
--db prod \
--langs %(langs)s \
--tables page,redirect,page_props
"""
os.system(cmd % args)