-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_corpus.py
71 lines (53 loc) · 1.78 KB
/
build_corpus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Usage: python build_corpus.py corpus_path thread_count
corpus_path: full or relative path to existing folders where corpus files will be written [required]
thread_count: number of concurrent threads to use to query mlb challenge server
Example: python build_corpus.py /path/to/corpus 20
"""
import logging
import time
import sys
import os
from threading import Thread
from MLB.server import Server
DEFAULT_THREAD_COUNT = 10
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
def build_corpus(thread_num):
import random
log = logging.getLogger(__name__)
of_path = os.path.join(corpus_dir, str(thread_num)+'_'+str(time.time())+'.txt')
of = open(of_path, 'w')
s = Server(log=log)
log.info("Starting thread number: " + str(thread_num))
for i in range(40000):
try:
# query the /challenge endpoint
s.get()
# choose a random target and /solve
target = random.choice(s.targets)
s.post(target)
of.write(s.ans + ', ' + s.bin_b64+'\n')
of.flush()
if i%100 == 0:
log.info("#{}: {}, {} ".format(i, s.ans, s.bin_b64))
except Exception as e:
log.info("Error getting corpus data: " + str(e))
of.close()
if __name__ == "__main__":
try:
corpus_dir = sys.argv[1]
if len(sys.argv) >= 3:
thread_count = int(sys.argv[2])
else:
thread_count = DEFAULT_THREAD_COUNT
except:
print(__doc__)
sys.exit(-1)
threads = []
for i in range(thread_count):
threads.append(Thread(target=build_corpus, args=(i,)))
for thread in threads:
thread.start()
for thread in threads:
thread.join()