From 044484c608e20fd8f4f7b6c195dd1466803be0c5 Mon Sep 17 00:00:00 2001 From: moritzbuck Date: Mon, 14 Jun 2021 11:00:14 +0200 Subject: [PATCH] fixed bug rounding down ANI values to the next integer --- mOTUlizer/__init__.py | 2 +- mOTUlizer/bin/mOTUlize.py | 5 +++-- mOTUlizer/classes/MetaBin.py | 4 +++- mOTUlizer/scripts/prochloros.py | 8 ++++++-- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/mOTUlizer/__init__.py b/mOTUlizer/__init__.py index a53db70..6b2783f 100644 --- a/mOTUlizer/__init__.py +++ b/mOTUlizer/__init__.py @@ -10,7 +10,7 @@ if os.path.exists(".git"): label = subprocess.check_output(["git", "describe", "--tags"]).strip().decode() else: - label = "0.2.2" + label = "0.2.3" os.chdir(cwd) diff --git a/mOTUlizer/bin/mOTUlize.py b/mOTUlizer/bin/mOTUlize.py index 5f7dade..785f6bf 100755 --- a/mOTUlizer/bin/mOTUlize.py +++ b/mOTUlizer/bin/mOTUlize.py @@ -28,6 +28,7 @@ """ +fasta_exts = [".fna", ".fa", ".fasta", ".fna", ".ffn"] def motulize(args): #parse and check your amino-acid files @@ -72,8 +73,8 @@ def motulize(args): for l in handle: if "query" not in l: ll = l.split("\t") - g1 = ".".join(os.path.basename(ll[0]).split(".")[:-1]) if ll[0].endswith(".fna") else ll[0] - g2 = ".".join(os.path.basename(ll[1]).split(".")[:-1]) if ll[1].endswith(".fna") else ll[1] + g1 = ".".join(os.path.basename(ll[0]).split(".")[:-1]) if any([ll[0].endswith(ext) for ext in fasta_exts]) else ll[0] + g2 = ".".join(os.path.basename(ll[1]).split(".")[:-1]) if any([ll[1].endswith(ext) for ext in fasta_exts]) else ll[1] dist = float(ll[2]) dist_dict[(g1,g2)] = dist else : diff --git a/mOTUlizer/classes/MetaBin.py b/mOTUlizer/classes/MetaBin.py index 03acbd8..f86d6db 100644 --- a/mOTUlizer/classes/MetaBin.py +++ b/mOTUlizer/classes/MetaBin.py @@ -71,7 +71,7 @@ def get_anis(cls, bins, outfile = None, method = "fastANI", block_size = 500, th out_tfile = tempfile.NamedTemporaryFile().name call("fastANI --ql {b1} --rl {b2} -o {out} -t {threads} 2> /dev/null".format(b1 = b1_tfile, b2 = b2_tfile, out = out_tfile, threads = threads), shell = True) with open(out_tfile) as handle: - new_dat = ["\t".join([".".join(ll.split("/")[-1].split(".")[:-1]) if "." in ll else ll for ll in l.split()]) +"\n" for l in handle.readlines()] + new_dat = ["\t".join([ll for ll in l.split()]) +"\n" for l in handle.readlines()] with open(fastani_file, "a") as handle: handle.writelines(new_dat) @@ -82,6 +82,8 @@ def get_anis(cls, bins, outfile = None, method = "fastANI", block_size = 500, th with open(fastani_file) as handle: handle.readline() out_dists = {(l.split()[0], l.strip().split()[1]) : float(l.split()[2]) for l in handle} +# tfile = lambda k : ".".join(k.split(".")[:-1]) if (k.endswith(".fna") or k.endswith(".fa") or k.endswith(".fasta") or k.endswith(".fna") or k.endswith(".ffn")) else k +# out_dists = {(tfile(k[0]),tfile(k[1])) : v for k,v in out_dists.items()} if outfile is None: os.remove(fastani_file) else : diff --git a/mOTUlizer/scripts/prochloros.py b/mOTUlizer/scripts/prochloros.py index f4bde54..1649c0c 100644 --- a/mOTUlizer/scripts/prochloros.py +++ b/mOTUlizer/scripts/prochloros.py @@ -28,6 +28,9 @@ stratfresh = pandas.read_csv("/home/moritz/data/data_submit/metadata/master_table.csv", index_col=0) #stratfresh_motus = pandas.read_csv("/home/moritz/data/data_submit/metadata/Supplementary_Table_S7_-_Data_about_mOTUs.csv", index_col=0) #stratfresh_motus["est_size"] = list(100*stratfresh.loc[stratfresh_motus.representative_MAGs].length/stratfresh.loc[stratfresh_motus.representative_MAGs].completeness/1000000) +with open("stratfreshmotus.json") as handle : + stratfresh_motus = json.load(handle) + with open("stratfreshmotus.json") as handle : bin2stratfreshmotu = { g['name'] : k for k,v in tqdm(json.load(handle).items()) for g in v['genomes'] } @@ -185,7 +188,7 @@ def process_species_cores(): scaffold_count = stratfresh.loc[tliss].nb_contigs if type =="stratfreshdb" else r95.loc[tliss].scaffold_count new_comps_roary = {gid.replace("RS_","").replace("GB_","") : len(v['roary_cogs'][gid.replace("RS_","").replace("GB_","")].intersection(v['motupan_roary']))/len(v['motupan_roary']) for gid in v['gids']} new_comps_ppan = {gid.replace("RS_","").replace("GB_","") : len(v['ppan_cogs'][gid.replace("RS_","").replace("GB_","")].intersection(v['motupan']))/len(v['motupan']) for gid in v['gids']} - goods = set(goods) +# goods = set(goods) core_stats[k] = { 'taxo' : k if type == "gtdb" else "tbd", #stratfresh_motus.loc[k].consensus_tax, 'motupan_w_ppan' : len(v['motupan']), @@ -201,7 +204,8 @@ def process_species_cores(): 'mean_est_roary_cogs' : mean([ len(v['roary_cogs'][k])/c for k,c in new_comps_ppan.items() if c > 0.4] ), 'mean_scaff_count' : mean(scaffold_count), 'type' : type, - 'est_size' : r95.loc[gtdb2rep[k]].est_size if type == "gtdb" else -1 #stratfresh_motus.loc[k].est_size + 'est_size' : r95.loc[gtdb2rep[k]].est_size if type == "gtdb" else stratfresh_motus.loc[k].est_size + 'genoms_ids' : ";".joinb(v['gids ']) }