Skip to content

Commit

Permalink
Merge pull request #133 from martinghunt/bcftools_ploidy
Browse files Browse the repository at this point in the history
Bcftools ploidy
  • Loading branch information
martinghunt authored Aug 31, 2016
2 parents 8b5216c + 18ed4c8 commit 07d17b5
Show file tree
Hide file tree
Showing 39 changed files with 11,985 additions and 461 deletions.
19 changes: 13 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,22 @@ Installation
------------

ARIBA has the following dependencies, which need to be installed:
* [Python3][python] version >= 3.4
* [Python3][python] version >= 3.3.2
* [Bowtie2][bowtie2] version >= 2.1.0
* [CD-HIT][cdhit] version >= 4.6
* [MASH][mash] version >= 1.0.2
* [MUMmer][mummer] version >= 3.23
* [Samtools and BCFtools][samtools] version >= 1.3


Once the dependencies are installed, install ARIBA using pip:

pip3 install ariba

ARIBA also depends on several Python packages, all of which are available
via pip, so the above command will get those automatically if they
are not installed. The packages are dendropy >= 4.1.0,
pyfastaq >= 3.12.0, pysam >= 0.9.1, and pymummer >= 0.8.1.

Alternatively, you can download the latest release from this github repository,
or clone the repository. Then run the tests:

Expand All @@ -53,18 +57,22 @@ to the following dependencies.

| Dependency | Default executable | Environment variable name |
|----------------|------------------------|---------------------------|
| BCFtools | `bcftools` | `$ARIBA_BCFTOOLS` |
| Bowtie2 | `bowtie2` | `$ARIBA_BOWTIE2` |
| CD-HIT (est) | `cd-hit-est` | `$ARIBA_CDHIT` |
| CD-HIT (est-2d)| `cd-hit-est-2d` | `$ARIBA_CDHIT2D` |
| MASH | `mash` | `$ARIBA_MASH` |
| Samtools | `samtools` | `$ARIBA_SAMTOOLS` |


For example, you could specify an exact version of a Samtools executable
that you compiled and downloaded in your home directory (assuming BASH):

export ARIBA_SAMTOOLS=$HOME/samtools-1.3/samtools
export ARIBA_BOWTIE2=$HOME/bowtie2-2.1.0/bowtie2

Note that ARIBA also runs `bowtie2-build`, for which it uses the
`bowtie2` executable with `-build` appended. So in this case
it would try to use

$HOME/bowtie2-2.1.0/bowtie2-build


### Temporary files
Expand Down Expand Up @@ -123,7 +131,6 @@ Build status: [![Build Status](https://travis-ci.org/sanger-pathogens/ariba.svg?
[ARIBA wiki]: https://github.com/sanger-pathogens/ariba/wiki
[mash]: https://mash.readthedocs.io/en/latest/
[mummer]: http://mummer.sourceforge.net/
[samtools]: http://www.htslib.org/
[python]: https://www.python.org/


1 change: 0 additions & 1 deletion ariba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
'assembly_compare',
'assembly_variants',
'bam_parse',
'best_seq_chooser',
'card_record',
'cdhit',
'cluster',
Expand Down
3 changes: 1 addition & 2 deletions ariba/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def run(self):
self.log_fh = None
return

faidx.write_fa_subset({self.ref_seq_name}, self.ref_fastas, self.ref_fasta, samtools_exe=self.extern_progs.exe('samtools'), verbose=True, verbose_filehandle=self.log_fh)
faidx.write_fa_subset({self.ref_seq_name}, self.ref_fastas, self.ref_fasta)
print('Closest reference sequence according to mash: ', self.ref_seq_name, file=self.log_fh)

contigs_both_strands = self._fix_contig_orientation(self.gapfilled_length_filtered, self.ref_fasta, self.final_assembly_fa, min_id=self.nucmer_min_id, min_length=self.nucmer_min_len, breaklen=self.nucmer_breaklen)
Expand All @@ -398,7 +398,6 @@ def run(self):
self.final_assembly_bam[:-4],
threads=1,
sort=True,
samtools=self.extern_progs.exe('samtools'),
bowtie2=self.extern_progs.exe('bowtie2'),
verbose=True,
verbose_filehandle=self.log_fh
Expand Down
98 changes: 0 additions & 98 deletions ariba/best_seq_chooser.py

This file was deleted.

26 changes: 10 additions & 16 deletions ariba/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,10 @@ def __init__(self,
sspace_k=20,
sspace_sd=0.4,
threads=1,
bcf_min_dp=10,
bcf_min_dv=5,
bcf_min_dv_over_dp=0.3,
bcf_min_qual=20,
assembled_threshold=0.95,
min_var_read_depth=5,
min_second_var_read_depth=2,
max_allele_freq=0.90,
unique_threshold=0.03,
max_gene_nt_extend=30,
spades_other_options=None,
Expand Down Expand Up @@ -84,10 +83,9 @@ def __init__(self,
self.nucmer_min_len = nucmer_min_len
self.nucmer_breaklen = nucmer_breaklen

self.bcf_min_dp = bcf_min_dp
self.bcf_min_dv = bcf_min_dv
self.bcf_min_dv_over_dp = bcf_min_dv_over_dp
self.bcf_min_qual = bcf_min_qual
self.min_var_read_depth = min_var_read_depth
self.min_second_var_read_depth = min_second_var_read_depth
self.max_allele_freq = max_allele_freq

self.threads = threads
self.assembled_threshold = assembled_threshold
Expand Down Expand Up @@ -351,7 +349,6 @@ def _run(self):
self.final_assembly_bam[:-4],
threads=1,
sort=True,
samtools=self.extern_progs.exe('samtools'),
bowtie2=self.extern_progs.exe('bowtie2'),
bowtie2_preset='very-sensitive-local',
verbose=True,
Expand Down Expand Up @@ -405,16 +402,13 @@ def _run(self):
self.final_assembly_bam,
self.samtools_vars_prefix,
log_fh=self.log_fh,
samtools_exe=self.extern_progs.exe('samtools'),
bcftools_exe=self.extern_progs.exe('bcftools'),
bcf_min_dp=self.bcf_min_dp,
bcf_min_dv=self.bcf_min_dv,
bcf_min_dv_over_dp=self.bcf_min_dv_over_dp,
bcf_min_qual=self.bcf_min_qual,
min_var_read_depth=self.min_var_read_depth,
min_second_var_read_depth=self.min_second_var_read_depth,
max_allele_freq=self.max_allele_freq
)
self.samtools_vars.run()

self.total_contig_depths = self.samtools_vars.total_depth_per_contig(self.samtools_vars.read_depths_file)
self.total_contig_depths = self.samtools_vars.total_depth_per_contig(self.samtools_vars.contig_depths_file)

self.variants_from_samtools = self.samtools_vars.variants_in_coords(self.assembly_compare.assembly_match_coords(), self.samtools_vars.vcf_file)
if len(self.variants_from_samtools):
Expand Down
4 changes: 0 additions & 4 deletions ariba/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,6 @@ def _init_and_run_clusters(self):
sspace_k=self.min_scaff_depth,
sspace_sd=self.insert_sspace_sd,
threads=1, # clusters now run in parallel, so this should always be 1!
bcf_min_dp=10, # let the user change this in a future version?
bcf_min_dv=5, # let the user change this in a future version?
bcf_min_dv_over_dp=0.3, # let the user change this in a future version?
bcf_min_qual=20, # let the user change this in a future version?
assembled_threshold=self.assembled_threshold,
unique_threshold=self.unique_threshold,
max_gene_nt_extend=self.max_gene_nt_extend,
Expand Down
Loading

0 comments on commit 07d17b5

Please sign in to comment.