Skip to content

Commit

Permalink
Merge pull request #162 from martinghunt/mic_plots
Browse files Browse the repository at this point in the history
Mic plots
  • Loading branch information
martinghunt authored Mar 4, 2017
2 parents ce04c52 + c2f3935 commit 809fbbc
Show file tree
Hide file tree
Showing 37 changed files with 1,375 additions and 121 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Once the dependencies are installed, install ARIBA using pip:

ARIBA also depends on several Python packages, all of which are available
via pip, so the above command will get those automatically if they
are not installed. The packages are dendropy >= 4.2.0,
are not installed. The packages are dendropy >= 4.2.0, matplotlib (no
minimum version required, but only tested on 2.0.0),
pyfastaq >= 3.12.0, pysam >= 0.9.1, and pymummer >= 0.10.1.

Alternatively, you can download the latest release from this github repository,
Expand Down
1 change: 1 addition & 0 deletions ariba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
'mapping',
'megares_data_finder',
'megares_zip_parser',
'mic_plotter',
'mlst_profile',
'mlst_reporter',
'pubmlst_getter',
Expand Down
33 changes: 33 additions & 0 deletions ariba/assembly_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,39 @@ def nucmer_hits_to_ref_coords(cls, nucmer_hits, contig=None):
return coords


@classmethod
def nucmer_hits_to_ref_and_qry_coords(cls, nucmer_hits, contig=None):
'''Same as nucmer_hits_to_ref_coords, except removes containing hits first,
and returns ref and qry coords lists'''
if contig is None:
ctg_coords = {key: [] for key in nucmer_hits.keys()}
else:
ctg_coords = {contig: []}

ref_coords = {}

for key in ctg_coords:
hits = copy.copy(nucmer_hits[key])
hits.sort(key=lambda x: len(x.ref_coords()))

if len(hits) > 1:
i = 0
while i < len(hits) - 1:
c1 = hits[i].ref_coords()
c2 = hits[i+1].ref_coords()
if c2.contains(c1):
hits.pop(i)
else:
i += 1

ref_coords[key] = [hit.ref_coords() for hit in hits]
ctg_coords[key] = [hit.qry_coords() for hit in hits]
pyfastaq.intervals.merge_overlapping_in_list(ref_coords[key])
pyfastaq.intervals.merge_overlapping_in_list(ctg_coords[key])

return ctg_coords, ref_coords


@staticmethod
def ref_cov_per_contig(nucmer_hits):
'''Input is hits made by self._parse_nucmer_coords_file.
Expand Down
23 changes: 19 additions & 4 deletions ariba/assembly_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def _get_remaining_known_ref_variants(known_ref_variants, used_ref_variants, nuc
return variants


def get_variants(self, ref_sequence_name, nucmer_coords):
def get_variants(self, ref_sequence_name, allowed_ctg_coords, allowed_ref_coords, nucmer_matches=None):
'''Nucmr coords = dict. Key=contig name. Value = list of intervals of ref coords that match the contig.
Made by assembly_compare.AssemblyCompare.nucmer_hits_to_ref_coords
Returns dictionary. Key=contig name. Value = list of variants. Each variant
Expand All @@ -287,12 +287,27 @@ def get_variants(self, ref_sequence_name, nucmer_coords):

known_non_wild_variants_in_ref = self.refdata.all_non_wild_type_variants(ref_sequence_name)

for contig in nucmer_coords:
for contig in allowed_ctg_coords:
if contig not in allowed_ref_coords:
continue

used_known_variants = set()
variants[contig] = []

if contig in mummer_variants:
for mummer_variant_list in mummer_variants[contig]:
ref_start = min([x.ref_start for x in mummer_variant_list])
ref_end = max([x.ref_end for x in mummer_variant_list])
ctg_start = min([x.qry_start for x in mummer_variant_list])
ctg_end = min([x.qry_end for x in mummer_variant_list])
ref_interval = intervals.Interval(ref_start, ref_end)
ctg_interval = intervals.Interval(ctg_start, ctg_end)
ref_ok = True in {x.intersects(ref_interval) for x in allowed_ref_coords[contig]}
qry_ok = True in {x.intersects(ctg_interval) for x in allowed_ctg_coords[contig]}

if not (ref_ok and qry_ok):
continue

if seq_type == 'p':
new_variant, used_variants = self._get_one_variant_for_one_contig_coding(ref_sequence, refdata_var_dict, mummer_variant_list)
else:
Expand All @@ -306,10 +321,10 @@ def get_variants(self, ref_sequence_name, nucmer_coords):
# for this contig, need to know all the ref sequence and coords it maps to.
# Then report just the unused known variants, as the contig also has these variants
if seq_type == 'p':
new_variants = self._get_remaining_known_ref_variants(known_non_wild_variants_in_ref['p'], used_known_variants, nucmer_coords[contig])
new_variants = self._get_remaining_known_ref_variants(known_non_wild_variants_in_ref['p'], used_known_variants, allowed_ref_coords[contig])

else:
new_variants = self._get_remaining_known_ref_variants(known_non_wild_variants_in_ref['n'], used_known_variants, nucmer_coords[contig])
new_variants = self._get_remaining_known_ref_variants(known_non_wild_variants_in_ref['n'], used_known_variants, allowed_ref_coords[contig])

if is_variant_only:
new_variants = [x for x in new_variants if len(x[5]) > 0]
Expand Down
4 changes: 2 additions & 2 deletions ariba/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,9 @@ def _run(self):
self.assembly_compare.run()
self.status_flag = self.assembly_compare.update_flag(self.status_flag)

nucmer_hits_to_ref = assembly_compare.AssemblyCompare.nucmer_hits_to_ref_coords(self.assembly_compare.nucmer_hits)
allowed_ctg_pos, allowed_ref_pos = assembly_compare.AssemblyCompare.nucmer_hits_to_ref_and_qry_coords(self.assembly_compare.nucmer_hits)
assembly_variants_obj = assembly_variants.AssemblyVariants(self.refdata, self.assembly_compare.nucmer_snps_file)
self.assembly_variants = assembly_variants_obj.get_variants(self.ref_sequence.id, nucmer_hits_to_ref)
self.assembly_variants = assembly_variants_obj.get_variants(self.ref_sequence.id, allowed_ctg_pos, allowed_ref_pos)

for var_list in self.assembly_variants.values():
for var in var_list:
Expand Down
3 changes: 2 additions & 1 deletion ariba/ext/vcfcall_ariba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ void vectorSumAndMax(const std::vector<uint32_t>& v, uint32_t& maxValue, uint32_

bool depthOk(const uint32_t& totalDepth, const uint32_t& testNum, const uint32_t& minSecondDepth, const float& maxAlleleFreq)
{
return (testNum >= minSecondDepth && 1.0 * testNum / totalDepth <= maxAlleleFreq);
double depthFreq = 1.0 * testNum / totalDepth;
return (testNum >= minSecondDepth && depthFreq >= (1 - maxAlleleFreq) && depthFreq <= maxAlleleFreq);
}


Expand Down
Loading

0 comments on commit 809fbbc

Please sign in to comment.