-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparallel2.py
63 lines (54 loc) · 1.73 KB
/
parallel2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# This version provided by [/u/civilization_phaze_3][1]
# [1]: https://www.reddit.com/user/civilization_phaze_3
import datetime
import sys
from multiprocessing import Pool, cpu_count
def scan_in_csv(r):
try:
return [line.split(',') for line in r], None
except Exception as e:
return None, e
def validate_rows(args):
rows, col_size = args
print('validating %s rows' % len(rows))
for i, row in enumerate(rows):
if len(row) != col_size:
msg = "Row {} has {} cells, but expected {}\n"
print(msg.format(row_id, len(row), col_size))
continue
for col_id, cell in enumerate(row):
try:
int(cell)
except ValueError as e:
print("Err at ({}, {}): {}".format(col_id, row_id, e))
def multi_validate_rows(rows, col_size):
n_cores = 4
print('N_CORES', n_cores)
pool = Pool(n_cores)
chunks = ((rows[i::n_cores], col_size) for i in range(n_cores))
pool.imap(validate_rows, chunks)
pool.close()
pool.join()
def timeit(f):
start = datetime.datetime.now()
f()
return (datetime.datetime.now() - start).total_seconds()
if __name__ == '__main__':
rows, err = scan_in_csv(sys.stdin)
if err is not None:
print("No rows in file")
sys.exit(-1)
if len(rows) < 1:
print('No rows in file')
sys.exit(-1)
print("Beginning validation...")
#print("Validated {} rows of {} cells in {}".format(
# len(rows),
# len(rows[0]),
# timeit(lambda: validate_rows(rows, len(rows[0]))),
#))
print("Validated {} rows of {} cells in {}".format(
len(rows),
len(rows[0]),
timeit(lambda: multi_validate_rows(rows, len(rows[0]))),
))