Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
to get to v0.0.2 due to big SUB recruitment bug
  • Loading branch information
moritzbuck committed Jul 17, 2020
2 parents df24c1d + 0d75973 commit 47a42ca
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 6 deletions.
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,27 @@
# 0039_mOTUlizer
# mOTUlizer
Utility to analyse a group of closely related MAgs/Genomes/bins/SUBs of more or less dubious origin

## INSTALL

```
pip install mOTUlizer
```


## USAGE

### EASY

To make OTUs and get some stats, needs fastANI in the `PATH`, and output of checkm

```
mOTUlize.py -k checkm_output.txt --output a_messy_json-file_with_the_output.json
```

Loads of little options if you do : `mOTUlize.py -h`

Also there is `mOTUpan.py` that can compute core genomes and pangenomes. Needs to be more debugged so try out at own risk:

```
mOTUpan.py -h
```
4 changes: 2 additions & 2 deletions mOTUlizer/bin/mOTUlize.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def main(args):



assert all([g in checkm_info for g in genomes]), "you do not have completness/contamination info for all you bins"
assert all([g in checkm_info for g in genomes]), "you do not have completness/contamination info for all you bins, values missing for :" + ", ".join([g for g in genomes if g not in checkm_info][0:10]) + "... (only 10 first shown )"

if fnas == {}:
fnas = {g : None for g in genomes}
Expand Down Expand Up @@ -137,7 +137,7 @@ def main(args):
parser.add_argument('--MAG-completeness', '--MC', '-M', nargs = '?', type=float, default = 40, help = "completeness cutoff for seed MAGs, default : 40")
parser.add_argument('--MAG-contamination', '--Mc', '-m', nargs = '?', type=float, default = 5, help = "contamination cutoff for seed MAGs, default : 5")
parser.add_argument('--SUB-completeness', '--SC', '-S', nargs = '?', type=float, default = 0, help = "completeness cutoff for recruited SUBs, default : 0")
parser.add_argument('--SUB-contamination', '--Sc', '-s', nargs = '?', type=float, default = 0, help = "contamination cutoff for recruited SUBs, default : 0")
parser.add_argument('--SUB-contamination', '--Sc', '-s', nargs = '?', type=float, default = 100, help = "contamination cutoff for recruited SUBs, default : 0")
parser.add_argument('--similarity-cutoff', '-i', nargs = '?', type=float, default = 95, help = "distance cutoff for making the graph, default : 95")
parser.add_argument('--cpus', '-c', nargs = '?', type=int, default = 1, help = "number of threads, default : 1")
parser.add_argument('--keep-simi-file', '-K', nargs = '?', default = None, help = "keep generated similarity file if '--similarities' is not procided")
Expand Down
11 changes: 8 additions & 3 deletions mOTUlizer/classes/mOTU.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def __from_bins(self, bins, name,dist_dict = None ):
self.core = None
self.fastani_dict = dist_dict
self.aa2cog = None
self.likelies = None


@classmethod
Expand All @@ -258,7 +259,7 @@ def cluster_MetaBins(cls , all_bins, dist_dict, ani_cutoff = 95, prefix = "mOTU_
tt = [(k, v.checkm_complet, v.checkm_contamin) for k, v in all_bins.items() if v.checkm_complet > 0]

good_mag = lambda b : all_bins[b].checkm_complet > mag_complete and all_bins[b].checkm_contamin < mag_contamin
decent_sub = lambda b : all_bins[b].checkm_complet > sub_complete and all_bins[b].checkm_contamin < mag_contamin and not good_mag(b)
decent_sub = lambda b : all_bins[b].checkm_complet > sub_complete and all_bins[b].checkm_contamin < sub_contamin and not good_mag(b)
good_pairs = [k for k,v in dist_dict.items() if v > ani_cutoff and dist_dict.get((k[1],k[0]), 0) > ani_cutoff and good_mag(k[0]) and good_mag(k[1])]
species_graph = igraph.Graph()
vertexDeict = { v : i for i,v in enumerate(set([x for k in good_pairs for x in k]))}
Expand All @@ -277,12 +278,16 @@ def cluster_MetaBins(cls , all_bins, dist_dict, ani_cutoff = 95, prefix = "mOTU_

left_pairs = {k : v for k, v in dist_dict.items() if v > ani_cutoff and k[0] != k[1] and ((decent_sub(k[0]) and good_mag(k[1])) or (decent_sub(k[1]) and good_mag(k[0])))}
print("looking for good_left pairs")
subs = {l[0] : (None,0) for l in left_pairs}
# print(left_pairs)

subs = {l : (None,0) for ll in left_pairs.keys() for l in ll if not good_mag(l)}
# print(subs)
print("looking for best mOTU match")
for p,ani in left_pairs.items():
if subs[p[0]][1] < ani:
if p[0] in subs and subs[p[0]][1] < ani:
subs[p[0]] = (p[1], ani)
if p[1] in subs and subs[p[1]][1] < ani:
subs[p[1]] = (p[0], ani)

genome_clusters = [set(gg) for gg in genome_clusters]

Expand Down

0 comments on commit 47a42ca

Please sign in to comment.