-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathrun.py
120 lines (95 loc) · 3.14 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import argparse
import os, sys
from subprocess import Popen
"""
python run.py \
--get_requests \
--get_sessions \
--get_vectors \
--start 2016-02-01 \
--stop 2016-02-07 \
--release 2016_02_01_2016_02_07 \
--langs wikidata,en \
--dims 50,100,300
python run.py \
--get_requests \
--start 2016-02-01 \
--stop 2016-02-01 \
--release test2
python run.py \
--get_sessions \
--release test2 \
--langs wikidata,en
python run.py \
--get_vectors \
--release test2 \
--langs wikidata,en \
--dims 10,20
"""
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--get_requests', default = False, action ='store_true')
parser.add_argument('--get_sessions', default = False, action ='store_true' )
parser.add_argument('--get_vectors', default = False, action ='store_true' )
parser.add_argument('--start', required = False )
parser.add_argument('--stop', required = False)
parser.add_argument('--release', required = True)
parser.add_argument('--langs', required = False)
parser.add_argument('--dims', required = False)
args = vars(parser.parse_args())
if args['get_requests']:
if 'start' in args and 'stop' in args:
cmd = """
python /home/ellery/a2v/src/get_requests.py \
--start %(start)s \
--stop %(stop)s \
--release %(release)s \
--priority
"""
os.system(cmd % args)
else:
print('need start and stop to get_requests')
sys.exit()
if args['get_sessions']:
if 'langs' in args:
os.system("hadoop fs -mkdir /user/ellery/a2v/data/%(release)s" % args)
cmd = """
spark-submit \
--driver-memory 5g \
--master yarn \
--deploy-mode client \
--num-executors 10 \
--executor-memory 10g \
--executor-cores 4 \
--queue priority \
/home/ellery/a2v/src/get_sessions.py \
--release %(release)s \
--lang %(lang)s
"""
for lang in args['langs'].split(','):
args['lang'] = lang
os.system(cmd % args)
else:
print('need langs to get sessions')
sys.exit()
if args['get_vectors']:
cmds = []
if 'langs' in args and 'dims' in args:
os.system("mkdir /home/ellery/a2v/data/%(release)s" % args)
cmd = """
python /home/ellery/a2v/src/get_vectors.py \
--release %(release)s \
--lang %(lang)s \
--dims %(dim)s \
"""
for lang in args['langs'].split(','):
args['lang'] = lang
for dim in args['dims'].split(','):
args['dim'] = dim
cmds.append(cmd % args)
for c in cmds:
#Popen([c,], shell=True)
os.system(c)
else:
print('need langs and dims to get models')
sys.exit()