Skip to content

Commit

Permalink
#GI2020 release
Browse files Browse the repository at this point in the history
  • Loading branch information
moritzbuck committed Sep 14, 2020
1 parent 11e142b commit 6cc3295
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 12 deletions.
2 changes: 1 addition & 1 deletion mOTUlizer/classes/MetaBin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __repr__(self) :

def __init__(self, name, cogs,fnas, faas, complet = None, contamin = 0, max_complete = 99.9):
self.name = name
self.cogs = cogs
self.cogs = cogs if type(cogs) != str else set([cogs])
self.faas = faas
self.fnas = fnas
self.checkm_complet = complet
Expand Down
26 changes: 19 additions & 7 deletions mOTUlizer/classes/MockData.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def __init__(self, name, core_len, nb_genomes, completeness, max_it = 20, access

core = {"CoreTrait_{}".format(i) for i in range(core_len)}


if accessory is None:
sub_dist = [int(nb_genomes/i) for i in range(2,1000) if int(nb_genomes/i) > 0] + [1]*100
sub_dist = list(range(nb_genomes-1, 1,-1))
Expand All @@ -29,28 +28,41 @@ def __init__(self, name, core_len, nb_genomes, completeness, max_it = 20, access
mock_genomes["Genome_{}".format(k)] = list(core)

for i,v in enumerate(sub_dist):
genomes = sample(list(mock_genomes.keys()), v)
genomes = sample(list(mock_genomes.keys()), v if v < len(mock_genomes) else len(mock_genomes) )
for g in genomes:
mock_genomes[g] += ["AccessoryTrait_{}".format(i)]

self.incompletes = {g : {vv for vv in v if random() < (completeness(g)/100)} for g, v in mock_genomes.items()}

to_rm = []
for k, v in self.incompletes.items():
if len(v) == 0:
choice(core)
if len(core) > 0:
self.incompletes[k] = choice(list(core))

self.mean_completeness = mean([len({vv for vv in v if vv.startswith("CoreTrait_")})/core_len for c,v in self.incompletes.items()])
self.completenesses = {c : 100*len({vv for vv in v if vv.startswith("CoreTrait_")})/core_len for c,v in self.incompletes.items()}
# self.accessory = accessory
if core_len == 0:
self.mean_completeness = "NA"
self.completenesses = {c : 0 for c,v in self.incompletes.items()}
else :
self.mean_completeness = mean([len({vv for vv in v if vv.startswith("CoreTrait_")})/core_len for c,v in self.incompletes.items()])
self.completenesses = {c : 100*len({vv for vv in v if vv.startswith("CoreTrait_")})/core_len for c,v in self.incompletes.items()}
# self.accessory = accessory
self.mean_size = mean([len(m) for m in mock_genomes.values()])
self.real_core_len = core_len

zerifneg = lambda g: 0.001 if g < 0 else g
super().__init__(name = name, faas = {}, cog_dict = self.incompletes, checkm_dict = { k : zerifneg(normal(v, 10)) for k,v in self.completenesses.items()}, max_it = max_it)
self.recall = len(core.intersection(self.core))/core_len
if core_len == 0:
self.recall = "NA"
self.fpr = "NA"
else :
self.recall = len(core.intersection(self.core))/core_len
self.fpr = sum([not c.startswith("CoreTrait_") for c in self.core])/len([not c.startswith("CoreTrait_") for c in self.core])

self.lowest_false = {k : v for k,v in self.cogCounts.items() if k in self.core and k not in core}
self.lowest_false = 1 if(len(self.lowest_false) ==0) else min(self.lowest_false.items(), key = lambda x : x[1])[1]/len(self)


def mock_cog_stats(self):
all_genes = set.union(*self.incompletes.values())
outp = {t : {} for t in all_genes}
Expand Down
5 changes: 2 additions & 3 deletions mOTUlizer/classes/mOTU.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __for_mOTUpan(self, name, faas, cog_dict, checkm_dict, max_it = 20):

self.members = [MetaBin(bin_name, cogs = self.cog_dict[bin_name], faas = self.faas.get(bin_name), fnas = None, complet = checkm_dict.get(bin_name)) for bin_name in self.cog_dict.keys()]
self.mock = None
self.cogCounts = {c : 0 for c in set.union(*[mag.cogs for mag in self.members])}
self.cogCounts = {c : 0 for c in set.union(set([cog for mag in self.members for cog in mag.cogs]))}
for mag in self.members:
for cog in mag.cogs:
self.cogCounts[cog] += 1
Expand All @@ -73,8 +73,7 @@ def roc_values(self):
accessory[k] += 1

self.mock = MockmOTU(self.name + "_mock", len(self.core), len(self), lambda g : completnesses[g], accessory = accessory)
fpr = sum([not c.startswith("CoreTrait_") for c in self.mock.core])/len([not c.startswith("CoreTrait_") for c in self.mock.core])
return { 'recall' : self.mock.recall, 'lowest_false' : self.mock.lowest_false, 'fpr' : fpr }
return { 'recall' : self.mock.recall, 'lowest_false' : self.mock.lowest_false, 'fpr' : self.mock.fpr }


def avg_cog_content(self):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="mOTUlizer", # Replace with your own username
version="0.1.0",
version="0.1.1",
author="Moritz Buck",
author_email="[email protected]",
description="making OTUs from genomes, and stats on them. and maybe even core-genomes",
Expand Down

0 comments on commit 6cc3295

Please sign in to comment.