-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_all.py
93 lines (80 loc) · 3.11 KB
/
get_all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from rucio.client import Client as RucioClient
from metacat.webapi import MetaCatClient
from argparse import ArgumentParser as ap
def get_one_wf(mc, w):
files = mc.query(f"files where dune.workflow['workflow_id'] in ({w}) and "
'core.data_tier=root-tuple-virtual and '
'dune.output_status=confirmed ordered',
with_provenance=True,
with_metadata=False
)
parents = mc.query(f"parents(files where dune.workflow['workflow_id'] in ({w}) and "
'core.data_tier=root-tuple-virtual and '
'dune.output_status=confirmed ordered) ordered',
with_provenance=True,
with_metadata=False
)
return zip(files, parents)
def get_one_ana_only(mc, w):
return mc.query(f"files where dune.workflow['workflow_id'] in ({w}) and "
'core.data_tier=root-tuple-virtual and '
'dune.output_status=confirmed',
with_provenance=True,
with_metadata=False
)
def add_parent(pname, f, all_parents):
if pname not in all_parents.keys():
all_parents[pname] = []
name = f['name']
namespace = f['namespace']
all_parents[pname].append(f'{namespace}:{name}')
if __name__ == '__main__':
parser = ap()
parser.add_argument('-w', help='workflows', type=int, nargs='+')
parser.add_argument('--ana_only', type=int, nargs='+')
args = parser.parse_args()
mc = MetaCatClient()
all_parents = dict()
if args.ana_only is not None:
ana_only_files = [get_one_ana_only(mc, w) for w in args.ana_only]
for wf in ana_only_files:
for f in wf:
#print(f)
pname = f['parents'][0]['fid']
#if pname not in all_parents.keys():
# all_parents[pname] = []
#all_parents[pname].append(f)
add_parent(pname, f, all_parents)
if args.w is not None:
workflow_files = [get_one_wf(mc, w) for w in args.w]
for wf in workflow_files:
for f,p in wf:
#p = f['parents'][0]['fid']
#gp = mc.get_file(fid=p, with_metadata=False)['fid']
#add_parent(gp, f, all_parents)
pname = p['parents'][0]['fid']
#if pname not in all_parents.keys():
# all_parents[pname] = []
#all_parents[pname].append(f)
#print(pname, f)
add_parent(pname, f, all_parents)
#print(all_parents)
nw = 0 if args.w is None else len(args.w)
nana_only = 0 if args.ana_only is None else len(args.ana_only)
good_parents = {p:fs for p, fs in all_parents.items() if len(fs) == (nw + nana_only)}
print(len(good_parents))
iall = 0
if args.ana_only is not None:
for i in range(len(args.ana_only)):
print('Writing', args.ana_only[i])
with open(f'{args.ana_only[i]}_to_merge.txt', 'w') as f:
for fl in good_parents.values():
f.write(f'{fl[iall]}\n')
iall += 1
if args.w is not None:
for i in range(len(args.w)):
print('Writing', args.w[i])
with open(f'{args.w[i]}_to_merge.txt', 'w') as f:
for fl in good_parents.values():
f.write(f'{fl[iall]}\n')
iall += 1