Skip to content

Commit

Permalink
Merge pull request #21 from martinghunt/use_bowtie2
Browse files Browse the repository at this point in the history
Use bowtie2
  • Loading branch information
martinghunt committed Apr 7, 2015
2 parents 259f872 + 722ab7c commit 2e0675c
Show file tree
Hide file tree
Showing 20 changed files with 188 additions and 46 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ Installation
------------

ARIBA has the following dependencies, which need to be installed:
* [bowtie2] [Bowtie2] version >= 2.1.0
* [cd-hit] [cdhit] version >= 4.6
* [samtools and bcftools] [samtools] version >= 1.2
* [MUMmer] [mummer] version >= 3.23
* [SMALT] [smalt] version >= 0.7.4
* Either [SPAdes] [spades] version >= 3.5.0 or [Velvet] [velvet] version >= 1.2.07
(SPAdes is recommended)

Expand Down Expand Up @@ -44,12 +44,12 @@ Usage
Please read the [ARIBA wiki page] [ARIBA wiki] for usage instructions.


[bowtie2]: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
[cdhit]: http://weizhongli-lab.org/cd-hit/
[ARIBA wiki]: https://github.com/sanger-pathogens/ariba/wiki
[gapfiller]: http://www.baseclear.com/genomics/bioinformatics/basetools/gapfiller
[mummer]: http://mummer.sourceforge.net/
[samtools]: http://www.htslib.org/
[smalt]: https://www.sanger.ac.uk/resources/software/smalt/
[spades]: http://bioinf.spbau.ru/spades
[sspace]: http://www.baseclear.com/genomics/bioinformatics/basetools/SSPACE
[velvet]: http://www.ebi.ac.uk/~zerbino/velvet/
Expand Down
14 changes: 8 additions & 6 deletions ariba/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(self,
bcftools_exe='bcftools',
gapfiller_exe='GapFiller.pl',
samtools_exe='samtools',
bowtie2_exe='bowtie2',
smalt_exe='smalt',
spades_exe='spades.py',
sspace_exe='SSPACE_Basic_v2.0.pl',
Expand Down Expand Up @@ -93,6 +94,7 @@ def __init__(self,

self.samtools_exe = samtools_exe
self.smalt_exe = smalt_exe
self.bowtie2_exe = bowtie2_exe

if self.assembler == 'velvet':
self.velveth = velvet_exe + 'h'
Expand Down Expand Up @@ -146,14 +148,14 @@ def _get_total_alignment_score(self, gene_name):
tmp_fa = os.path.join(self.root_dir, 'tmp.get_total_alignment_score.ref.fa')
assert not os.path.exists(tmp_fa)
faidx.write_fa_subset([gene_name], self.genes_fa, tmp_fa, samtools_exe=self.samtools_exe, verbose=self.verbose)
mapping.run_smalt(
mapping.run_bowtie2(
self.reads1,
self.reads2,
tmp_fa,
tmp_bam[:-4],
threads=self.threads,
samtools=self.samtools_exe,
smalt=self.smalt_exe,
bowtie2=self.bowtie2_exe,
verbose=self.verbose,
)

Expand Down Expand Up @@ -218,15 +220,15 @@ def _set_assembly_kmer(self, k):

def _assemble_with_velvet(self):
# map reads to reference gene to make BAM input to velvet columbus
mapping.run_smalt(
mapping.run_bowtie2(
self.reads1,
self.reads2,
self.gene_fa,
self.gene_bam[:-4],
threads=self.threads,
sort=True,
samtools=self.samtools_exe,
smalt=self.smalt_exe,
bowtie2=self.bowtie2_exe,
verbose=self.verbose,
)

Expand Down Expand Up @@ -996,15 +998,15 @@ def run(self):
self._load_final_contigs()

# map reads to assembly
mapping.run_smalt(
mapping.run_bowtie2(
self.reads1,
self.reads2,
self.final_assembly_fa,
self.final_assembly_bam[:-4],
threads=self.threads,
sort=True,
samtools=self.samtools_exe,
smalt=self.smalt_exe,
bowtie2=self.bowtie2_exe,
verbose=self.verbose,
)
self._parse_assembly_bam()
Expand Down
9 changes: 4 additions & 5 deletions ariba/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(self,
gapfiller_exe='GapFiller.pl',
samtools_exe='samtools',
smalt_exe='smalt',
bowtie2_exe='bowtie2',
spades_exe='spades.py',
sspace_exe='SSPACE_Basic_v2.0.pl',
velvet_exe='velvet', # prefix of velvet{g,h}
Expand Down Expand Up @@ -68,6 +69,7 @@ def __init__(self,
self.smalt_min_id = smalt_min_id
self.max_insert = max_insert
self.smalt_exe = smalt_exe
self.bowtie2_exe = bowtie2_exe

self.insert_hist_bin = 10
self.insert_hist = histogram.Histogram(self.insert_hist_bin)
Expand Down Expand Up @@ -136,17 +138,14 @@ def _write_clusters_info_file(self):


def _map_reads_to_clustered_genes(self):
mapping.run_smalt(
mapping.run_bowtie2(
self.reads_1,
self.reads_2,
self.db_fasta_clustered,
self.bam_prefix,
index_k=self.smalt_k,
index_s=self.smalt_s,
threads=self.threads,
samtools=self.samtools_exe,
smalt=self.smalt_exe,
minid=self.smalt_min_id,
bowtie2=self.bowtie2_exe,
verbose=self.verbose,
)

Expand Down
5 changes: 4 additions & 1 deletion ariba/external_progs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def is_in_path(prog):

prog_to_default = {
'bcftools': 'bcftools',
'bowtie2': 'bowtie2',
'cdhit': 'cd-hit-est',
'gapfiller': 'GapFiller.pl',
'nucmer' : 'nucmer',
Expand All @@ -36,6 +37,7 @@ def is_in_path(prog):

prog_to_version_cmd = {
'bcftools': ('', re.compile('^Version: ([0-9\.]+)')),
'bowtie2': ('--version', re.compile('.*bowtie2-align version (.*)$')),
'cdhit': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
'gapfiller': ('', re.compile('^Usage: .*pl \[GapFiller_(.*)\]')),
'nucmer': ('--version', re.compile('^NUCmer \(NUCleotide MUMmer\) version ([0-9\.]+)')),
Expand All @@ -50,6 +52,7 @@ def is_in_path(prog):

min_versions = {
'bcftools': '1.2',
'bowtie2': '2.1.0',
'cd-hit': '4.6',
'nucmer': '3.1',
'samtools': '1.2',
Expand Down Expand Up @@ -113,9 +116,9 @@ def check_versions(opts, verbose=False, not_required=None):

to_check = [
'bcftools',
'bowtie2',
'cdhit',
'nucmer',
'smalt',
'samtools',
'sspace',
'gapfiller',
Expand Down
55 changes: 55 additions & 0 deletions ariba/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,61 @@
class Error (Exception): pass


def run_bowtie2(
reads_fwd,
reads_rev,
ref_fa,
out_prefix,
threads=1,
max_insert=1000,
sort=False,
samtools='samtools',
bowtie2='bowtie2',
verbose=False
):

map_index = out_prefix + '.map_index'
clean_files = [map_index + '.' + x + '.bt2' for x in ['1', '2', '3', '4', 'rev.1', 'rev.2']]
index_cmd = ' '.join([
bowtie2 + '-build',
'-q',
ref_fa,
map_index
])

final_bam = out_prefix + '.bam'
if sort:
intermediate_bam = out_prefix + '.unsorted.bam'
else:
intermediate_bam = final_bam

map_cmd = ' '.join([
bowtie2,
'--threads', str(threads),
'--very-sensitive-local',
'-X', str(max_insert),
'-x', map_index,
'-1', reads_fwd,
'-2', reads_rev,
'|', samtools, 'view',
'-bS -T', ref_fa,
'- >', intermediate_bam
])

common.syscall(index_cmd, verbose=verbose)
common.syscall(map_cmd, verbose=verbose)

if sort:
threads = min(4, threads)
thread_mem = int(500 / threads)
sort_cmd = samtools + ' sort -@' + str(threads) + ' -m ' + str(thread_mem) + 'M ' + intermediate_bam + ' ' + out_prefix
index_cmd = samtools + ' index ' + final_bam
common.syscall(sort_cmd, verbose=verbose)
common.syscall(index_cmd, verbose=verbose)
for fname in clean_files:
os.unlink(fname)


def run_smalt(
reads_fwd,
reads_rev,
Expand Down
14 changes: 3 additions & 11 deletions ariba/tasks/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ def run():
cdhit_group.add_argument('--cdhit_seq_identity_threshold', type=float, help='Sequence identity threshold (cd-hit option -c) [%(default)s]', default=0.9, metavar='FLOAT')
cdhit_group.add_argument('--cdhit_length_diff_cutoff', type=float, help='length difference cutoff (cd-hit option -s) [%(default)s]', default=0.9, metavar='FLOAT')

smalt_group = parser.add_argument_group('smalt options')
smalt_group.add_argument('--smalt_k', type=int, help='kmer to use when indexing with smalt (smalt index -k) [%(default)s]', default=13, metavar='INT')
smalt_group.add_argument('--smalt_s', type=int, help='Step length to use when indexing with smalt (see smalt index -s) [%(default)s]', default=2, metavar='INT')
smalt_group.add_argument('--smalt_min_id', type=float, help='Minimum identity to report a match (smalt map -y) [%(default)s]', default=0.9, metavar='FLOAT')

nucmer_group = parser.add_argument_group('nucmer options')
nucmer_group.add_argument('--nucmer_min_id', type=int, help='Minimum alignment identity (delta-filter -i) [%(default)s]', default=90, metavar='INT')
nucmer_group.add_argument('--nucmer_min_len', type=int, help='Minimum alignment length (delta-filter -i) [%(default)s]', default=50, metavar='INT')
Expand All @@ -35,19 +30,19 @@ def run():

other_group = parser.add_argument_group('Other options')
other_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
other_group.add_argument('--threads', type=int, help='Number of threads for smalt and spades [%(default)s]', default=1, metavar='INT')
other_group.add_argument('--threads', type=int, help='Number of threads for bowtie2 and spades [%(default)s]', default=1, metavar='INT')
other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
other_group.add_argument('--clean', type=int, choices=[0,1,2], help='Specify how much cleaning to do. 0=none, 1=some, 2=only keep the report [%(default)s]', default=1, metavar='INT')
other_group.add_argument('--verbose', action='store_true', help='Be verbose')

executables_group = parser.add_argument_group('executables locations')
executables_group.add_argument('--bcftools', help='bcftools executable [bcftools]', metavar='PATH')
executables_group.add_argument('--bowtie2', help='bowtie2 executable [bowtie2]', metavar='PATH')
executables_group.add_argument('--cdhit', help=argparse.SUPPRESS)
executables_group.add_argument('--gapfiller', help='GapFiller executable [GapFiller.pl]', metavar='PATH')
executables_group.add_argument('--nucmer', help=argparse.SUPPRESS, default='nucmer')
executables_group.add_argument('--samtools', help='samtools executable [samtools]', metavar='PATH')
executables_group.add_argument('--smalt', help='SMALT executable [smalt]', metavar='PATH')
executables_group.add_argument('--spades', help='SPAdes executable [spades.py]', metavar='PATH')
executables_group.add_argument('--sspace', help='SSPACE executable [SSPACE_Basic_v2.0.pl]', metavar='PATH')
executables_group.add_argument('--velvet', help='prefix of velvet{g,h} executables [velvet]', metavar='PATH')
Expand All @@ -69,9 +64,6 @@ def run():
assembler=options.assembler,
threads=options.threads,
verbose=options.verbose,
smalt_k=options.smalt_k,
smalt_s=options.smalt_s,
smalt_min_id=options.smalt_min_id,
min_scaff_depth=options.min_scaff_depth,
nucmer_min_id=options.nucmer_min_id,
nucmer_min_len=options.nucmer_min_len,
Expand All @@ -82,7 +74,7 @@ def run():
bcftools_exe=options.bcftools,
gapfiller_exe=options.gapfiller,
samtools_exe=options.samtools,
smalt_exe=options.smalt,
bowtie2_exe=options.bowtie2,
spades_exe=options.spades,
sspace_exe=options.sspace,
velvet_exe=options.velvet,
Expand Down
2 changes: 1 addition & 1 deletion ariba/tests/cluster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_get_total_alignment_score(self):
clean_cluster_dir(cluster_dir)
c = cluster.Cluster(cluster_dir, 'name')
got_score = c._get_total_alignment_score('1')
expected_score = 1500
expected_score = 3000
self.assertEqual(got_score, expected_score)
clean_cluster_dir(cluster_dir)

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added ariba/tests/data/mapping_test_bowtie2_sorted.bam
Binary file not shown.
Binary file added ariba/tests/data/mapping_test_bowtie2_unsorted.bam
Binary file not shown.
28 changes: 28 additions & 0 deletions ariba/tests/data/mapping_test_smalt_reads_1.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
@1/1
AGCCCTCCACAGGATGGTGGTATAC
+
IIIIIIIIIIIIIIIIIIIIIIIII
@2/1
TAATGTTCTTAGGGCTTACCATAGA
+
IIIIIIIIIIIIIIIIIIIIIIIII
@3/1
TCGGGTCTGTACAAGGACGGATGGT
+
IIIIIIIIIIIIIIIIIIIIIIIII
@4/1
CCGCCGGGAAGTCCTTCTGTCGTGC
+
IIIIIIIIIIIIIIIIIIIIIIIII
@5/1
CCTCCACAGGATGGTGGTATACCTG
+
IIIIIIIIIIIIIIIIIIIIIIIII
@6/1
CAGTTGCATGACGTCATGCAGTCAT
+
IIIIIIIIIIIIIIIIIIIIIIIII
@7/1
ACGCCGGGAAGTCCTTCTGTCGTGT
+
IIIIIIIIIIIIIIIIIIIIIIIII
28 changes: 28 additions & 0 deletions ariba/tests/data/mapping_test_smalt_reads_2.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
@1/2
ACCTTTCCCACAAGATCTGTATCCT
+
IIIIIIIIIIIIIIIIIIIIIIIII
@2/2
CGAGTCTGCGCTTAGCTAAGGTGGA
+
IIIIIIIIIIIIIIIIIIIIIIIII
@3/2
CGTACTGACTGACTGACGTACTGCA
+
IIIIIIIIIIIIIIIIIIIIIIIII
@4/2
TTTTAGTGTACCTCTATGGTAAGCC
+
IIIIIIIIIIIIIIIIIIIIIIIII
@5/2
TCTGCGCTTAGCTAAGGTGGATGAA
+
IIIIIIIIIIIIIIIIIIIIIIIII
@6/2
AATGAGTATGATGAGTAATGGTATG
+
IIIIIIIIIIIIIIIIIIIIIIIII
@7/2
ATTTAGTGTACCTCTATGGTAAGCC
+
IIIIIIIIIIIIIIIIIIIIIIIII
5 changes: 5 additions & 0 deletions ariba/tests/data/mapping_test_smalt_ref.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
>ref
TCCACAGGATGGTGGTATACCTGAGGCCAAAGGATACAGATCTTGTGGGAAAGGTCCGCC
GGGAAGTCCTTCTGTCGTGCTTTTTATCGGGTCTGTACAAGGACGGATGGTTTCCGGCAT
ACCATAATGTTCTTAGGGCTTACCATAGAGGTACACTAAAAAGTGTTTCATCCACCTTAG
CTAAGCGCAG
1 change: 1 addition & 0 deletions ariba/tests/data/mapping_test_smalt_ref.fa.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ref 190 5 60 61
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 2e0675c

Please sign in to comment.