Skip to content

Commit

Permalink
fixed bug rounding down ANI values to the next integer
Browse files Browse the repository at this point in the history
  • Loading branch information
moritzbuck committed Jun 14, 2021
1 parent 57cd1b6 commit 044484c
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 6 deletions.
2 changes: 1 addition & 1 deletion mOTUlizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
if os.path.exists(".git"):
label = subprocess.check_output(["git", "describe", "--tags"]).strip().decode()
else:
label = "0.2.2"
label = "0.2.3"

os.chdir(cwd)

Expand Down
5 changes: 3 additions & 2 deletions mOTUlizer/bin/mOTUlize.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"""

fasta_exts = [".fna", ".fa", ".fasta", ".fna", ".ffn"]

def motulize(args):
#parse and check your amino-acid files
Expand Down Expand Up @@ -72,8 +73,8 @@ def motulize(args):
for l in handle:
if "query" not in l:
ll = l.split("\t")
g1 = ".".join(os.path.basename(ll[0]).split(".")[:-1]) if ll[0].endswith(".fna") else ll[0]
g2 = ".".join(os.path.basename(ll[1]).split(".")[:-1]) if ll[1].endswith(".fna") else ll[1]
g1 = ".".join(os.path.basename(ll[0]).split(".")[:-1]) if any([ll[0].endswith(ext) for ext in fasta_exts]) else ll[0]
g2 = ".".join(os.path.basename(ll[1]).split(".")[:-1]) if any([ll[1].endswith(ext) for ext in fasta_exts]) else ll[1]
dist = float(ll[2])
dist_dict[(g1,g2)] = dist
else :
Expand Down
4 changes: 3 additions & 1 deletion mOTUlizer/classes/MetaBin.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def get_anis(cls, bins, outfile = None, method = "fastANI", block_size = 500, th
out_tfile = tempfile.NamedTemporaryFile().name
call("fastANI --ql {b1} --rl {b2} -o {out} -t {threads} 2> /dev/null".format(b1 = b1_tfile, b2 = b2_tfile, out = out_tfile, threads = threads), shell = True)
with open(out_tfile) as handle:
new_dat = ["\t".join([".".join(ll.split("/")[-1].split(".")[:-1]) if "." in ll else ll for ll in l.split()]) +"\n" for l in handle.readlines()]
new_dat = ["\t".join([ll for ll in l.split()]) +"\n" for l in handle.readlines()]
with open(fastani_file, "a") as handle:
handle.writelines(new_dat)

Expand All @@ -82,6 +82,8 @@ def get_anis(cls, bins, outfile = None, method = "fastANI", block_size = 500, th
with open(fastani_file) as handle:
handle.readline()
out_dists = {(l.split()[0], l.strip().split()[1]) : float(l.split()[2]) for l in handle}
# tfile = lambda k : ".".join(k.split(".")[:-1]) if (k.endswith(".fna") or k.endswith(".fa") or k.endswith(".fasta") or k.endswith(".fna") or k.endswith(".ffn")) else k
# out_dists = {(tfile(k[0]),tfile(k[1])) : v for k,v in out_dists.items()}
if outfile is None:
os.remove(fastani_file)
else :
Expand Down
8 changes: 6 additions & 2 deletions mOTUlizer/scripts/prochloros.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
stratfresh = pandas.read_csv("/home/moritz/data/data_submit/metadata/master_table.csv", index_col=0)
#stratfresh_motus = pandas.read_csv("/home/moritz/data/data_submit/metadata/Supplementary_Table_S7_-_Data_about_mOTUs.csv", index_col=0)
#stratfresh_motus["est_size"] = list(100*stratfresh.loc[stratfresh_motus.representative_MAGs].length/stratfresh.loc[stratfresh_motus.representative_MAGs].completeness/1000000)
with open("stratfreshmotus.json") as handle :
stratfresh_motus = json.load(handle)

with open("stratfreshmotus.json") as handle :
bin2stratfreshmotu = { g['name'] : k for k,v in tqdm(json.load(handle).items()) for g in v['genomes'] }

Expand Down Expand Up @@ -185,7 +188,7 @@ def process_species_cores():
scaffold_count = stratfresh.loc[tliss].nb_contigs if type =="stratfreshdb" else r95.loc[tliss].scaffold_count
new_comps_roary = {gid.replace("RS_","").replace("GB_","") : len(v['roary_cogs'][gid.replace("RS_","").replace("GB_","")].intersection(v['motupan_roary']))/len(v['motupan_roary']) for gid in v['gids']}
new_comps_ppan = {gid.replace("RS_","").replace("GB_","") : len(v['ppan_cogs'][gid.replace("RS_","").replace("GB_","")].intersection(v['motupan']))/len(v['motupan']) for gid in v['gids']}
goods = set(goods)
# goods = set(goods)
core_stats[k] = {
'taxo' : k if type == "gtdb" else "tbd", #stratfresh_motus.loc[k].consensus_tax,
'motupan_w_ppan' : len(v['motupan']),
Expand All @@ -201,7 +204,8 @@ def process_species_cores():
'mean_est_roary_cogs' : mean([ len(v['roary_cogs'][k])/c for k,c in new_comps_ppan.items() if c > 0.4] ),
'mean_scaff_count' : mean(scaffold_count),
'type' : type,
'est_size' : r95.loc[gtdb2rep[k]].est_size if type == "gtdb" else -1 #stratfresh_motus.loc[k].est_size
'est_size' : r95.loc[gtdb2rep[k]].est_size if type == "gtdb" else stratfresh_motus.loc[k].est_size
'genoms_ids' : ";".joinb(v['gids '])
}


Expand Down

0 comments on commit 044484c

Please sign in to comment.