-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsplitFile.py
81 lines (69 loc) · 2.4 KB
/
splitFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
###Tony: split a single tree into n paterners
import ROOT, rootlogon, helpers
import argparse, copy, glob, os, sys, time
#for parallel processing!
import multiprocessing as mp
import config as CONF
#import tree configuration
ROOT.gROOT.SetBatch(True)
#this is probably the worse parallel effort
#but whatever
#define functions
def options():
parser = argparse.ArgumentParser()
parser.add_argument("--inputdir", default="TEST")
parser.add_argument("--nfiles", default=CONF.splits)
return parser.parse_args()
def split(targetpath="data_test"):
start_time = time.time()
ops = options()
nfiles = ops.nfiles
inputdir = ops.inputdir
global inputpath
inputpath = CONF.inputpath + inputdir + "/" + targetpath
global outputpath
outputpath = CONF.inputpath + inputdir + "/" + targetpath
helpers.checkpath(outputpath)
print "split! target: ", targetpath
f = ROOT.TFile(inputpath + "/" + "hist-MiniNTuple.root", "read")
#load the target tree
t = f.Get("TinyTree")
#load the histograms
hist_list = ["CutFlowWeight", "CutFlowNoWeight", "h_leadHCand_pT_pre_trig", "h_leadHCand_pT_aft_trig"]
temp_hist_list = []
for j, hist in enumerate(hist_list):
temp_hist_list.append(f.Get(hist).Clone())
temp_hist_list[j].Scale(1.0/(nfiles * 1.0))
##correct for the sqrt N error here; this is really stupid
for x_bin in range(0, temp_hist_list[j].GetXaxis().GetNbins()+1):
temp_hist_list[j].SetBinError(x_bin, temp_hist_list[j].GetBinError(x_bin) * ROOT.TMath.Sqrt(nfiles))
outfile = []
outtree = []
for i in range(nfiles):
outfile.append(ROOT.TFile(inputpath + "/" + "hist-MiniNTuple_%s.root" % (str(i)), "recreate"))
outtree.append(t.CloneTree(0))
#open and copy
nentries = t.GetEntries()
for n in range(nentries):
t.GetEntry(n)
#print n%nfiles
outtree[n%nfiles].Fill()
for i in range(nfiles):
outfile[i].cd()
outtree[i].Write()
for j, hist in enumerate(temp_hist_list):
hist.Write()
outfile[i].Close()
f.Close()
del(t)
del(outtree)
del(temp_hist_list)
print("--- %s seconds ---" % (time.time() - start_time))
print "Finish!"
def main():
split(targetpath="ttbar_comb_test")
split(targetpath="data_test")
#split(targetpath="signal_QCD")
#def clearbranches():
if __name__ == "__main__":
main()