-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdfoilPicker2compd.py
147 lines (123 loc) · 5.64 KB
/
dfoilPicker2compd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
import argparse
import errno
import os
import sys
from itertools import chain, islice
from tables import *
class Dfoil(IsDescription):
compDtest = StringCol(300) # 300-character String
def main():
args = Get_Arguments()
with open(args.outgroup) as fin:
outgroup = [line.strip() for line in fin if line.strip()]
print(outgroup)
#smallfile = None
file_large = args.tests
commandfilename = "compDcommands"
silentremove(commandfilename) # Remove compDcommands.txt file if exists
with open(file_large, "r") as f:
for chunkCount, piece in enumerate(read_in_chunks(f, args.lines)):
compd_h5 = "{}.compDtest.hdf5".format(chunkCount)
with open_file(compd_h5, mode="w", title="Comp-D Tests") as h5file:
group = h5file.create_group("/", "compDtests", "Comp-D Input")
# For line in DFOIL_picked.txt
for count, line in enumerate(piece):
#print(line)
#print(count)
if not line.strip():
continue
cols = line.strip().split()
#print(cols)
compDbase = "compDtest"
compDtest = "{}_{}".format(compDbase, count)
cols.reverse()
# Write each output to separate HDF5 tables in one file
table = h5file.create_table(group, compDtest, Dfoil, "Test_" + str(count))
test = table.row
test["compDtest"] = "{}\n{}\n".format(" ".join(outgroup), "\n".join(cols))
#o.write("{}\n{}\n".format(" ".join(outgroup), "\n".join(cols)))
test.append()
splitcommandfilename = "{}.{}.txt".format(commandfilename, chunkCount)
# Write comp-D commands to file.
with open(splitcommandfilename, "a") as o:
o.write("compD -i {} -t {} -b {} -l {} -PfH -o compdtest.{}.out.txt\n".format(args.phylip, compDtest, args.bootstraps, args.sites, count))
table.flush()
#print(piece)
# Split compDcommands.txt into equal chunks
#compD_basefile = "compDcommands"
#with open(commandfilename, "r") as f:
#for i, lines in enumerate(split_bigfile(f, args.lines)):
#file_split = "{}.{}.split.txt".format(compD_basefile, i)
#with open(file_split, "w") as o:
#o.writelines(lines)
def read_in_chunks(file_object, chunk_size):
"""Lazy function (generator) to read a file piece by piece."""
while True:
next_n_lines = list(islice(file_object, chunk_size))
if not next_n_lines:
break
yield next_n_lines
def silentremove(filename):
try:
os.remove(filename)
except OSError as e: # this would be "except OSError, e:" before Python 2.6
if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
raise # re-raise exception if a different error occurred
def split_bigfile(iterable, n):
"""Split a large file into equal chunks"""
iterable = iter(iterable)
while True:
try:
yield chain([next(iterable)], islice(iterable, n-1))
except StopIteration:
return
def Get_Arguments():
"""
Parse command-line arguments. Imported with argparse.
Returns: object of command-line arguments.
"""
parser = argparse.ArgumentParser(description="Prepares DFOIL_Picker.R output for input into Comp-D", add_help=False)
required_args = parser.add_argument_group("Required Arguments")
optional_args = parser.add_argument_group("Optional Arguments")
## Required Arguments
required_args.add_argument("-t", "--tests",
type=str,
required=True,
help="String; Output from DFOIL_picker.R "
"(space delimited)")
required_args.add_argument("-o", "--outgroup",
type=str,
required=True,
help="String; Specify file containing outgroup individuals (space delimited)")
required_args.add_argument("-p", "--phylip",
type=str,
required=True,
help="String; Specify PHYLIP filename")
required_args.add_argument("-b", "--bootstraps",
type=int,
required=True,
help="Integer; Specify number of bootstrap replicates you're going to use")
required_args.add_argument("-s", "--sites",
type=int,
required=True,
help="Integer; Specify number of sites in alignment")
## Optional Arguments
## Call help menu
optional_args.add_argument("-l", "--lines",
type=int,
required=False,
default=250000,
nargs="?",
help="Integer; Specify number of lines per split file; DEFAULT=250000")
optional_args.add_argument("-h", "--help",
action="help",
help="Displays this help menu")
if len(sys.argv)==1:
print("\nExiting because no command-line options were called.\n")
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
return args
if __name__ == "__main__":
main()