forked from mitre/data-owner-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblock.py
60 lines (49 loc) · 1.68 KB
/
block.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
import argparse
import glob
import os
from pathlib import Path
import subprocess
import sys
from zipfile import ZipFile
def parse_arguments():
parser = argparse.ArgumentParser(
description="Tool for garbling PII for PPRL purposes in the CODI project"
)
parser.add_argument(
"--schemafile", default="example-schema/blocking-schema/lambda.json",
help="Path to blocking schema. Default: example-schema/blocking-schema/lambda.json"
)
parser.add_argument(
'--clkpath', default="output",
help="Specify a folder containing clks. Default is 'output' folder"
)
args = parser.parse_args()
if not Path(args.schemafile).exists():
parser.error("Unable to find schema file: " + args.schemafile)
return args
def block_individuals(args):
os.makedirs('temp-data', exist_ok=True)
os.makedirs('output', exist_ok=True)
schema_file = Path(args.schemafile)
clk_files = glob.glob(os.path.join(args.clkpath, "*.json"))
blocked_files = []
for clk in clk_files:
clk_path = Path(clk)
temp_file = Path("temp-data", clk.split('/')[-1])
subprocess.run(
["anonlink", "block", str(clk_path), str(schema_file), str(temp_file)],
check=True
)
blocked_files.append(temp_file)
return blocked_files
def zip_blocked_files(blocked_files):
with ZipFile("output/garbled_blocked.zip", "w") as garbled_zip:
for blocked_file in blocked_files:
garbled_zip.write(blocked_file)
def main():
args = parse_arguments()
blocked_files = block_individuals(args)
zip_blocked_files(blocked_files)
if __name__ == "__main__":
main()