From f088ae9c1cb46dc4e251268702a5a87cd8d22f80 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 08:58:19 +0000 Subject: [PATCH 01/10] Do not require velvet --- ariba/tests/cluster_test.py | 16 ++++++++-------- setup.py | 6 +----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py index e9db8ea3..af46113e 100644 --- a/ariba/tests/cluster_test.py +++ b/ariba/tests/cluster_test.py @@ -71,14 +71,14 @@ def test_set_assembly_kmer(self): clean_cluster_dir(cluster_dir) - def test_assemble_with_velvet(self): - '''test _assemble_with_velvet''' - cluster_dir = os.path.join(data_dir, 'cluster_test_assemble_with_velvet') - clean_cluster_dir(cluster_dir, exclude=set(['gene.reads_mapped.unsorted.bam'])) - c = cluster.Cluster(cluster_dir) - c._assemble_with_velvet() - self.assertEqual(c.status_flag.to_number(), 0) - clean_cluster_dir(cluster_dir, exclude=set(['gene.reads_mapped.unsorted.bam'])) + #def test_assemble_with_velvet(self): + # '''test _assemble_with_velvet''' + # cluster_dir = os.path.join(data_dir, 'cluster_test_assemble_with_velvet') + # clean_cluster_dir(cluster_dir, exclude=set(['gene.reads_mapped.unsorted.bam'])) + # c = cluster.Cluster(cluster_dir) + # c._assemble_with_velvet() + # self.assertEqual(c.status_flag.to_number(), 0) + # clean_cluster_dir(cluster_dir, exclude=set(['gene.reads_mapped.unsorted.bam'])) def test_assemble_with_spades(self): diff --git a/setup.py b/setup.py index 4c50e353..595ea63d 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ 'SSPACE_Basic_v2.0.pl', 'samtools', 'smalt', + 'spades.py', ] print('Checking that dependencies are found in path...') @@ -27,11 +28,6 @@ sys.exit(1) -if not( (shutil.which('velveth') and shutil.which('velvetg')) or shutil.which('spades.py') ): - print('Must have velvet (velveth and velvetg) or spades.py installed. Cannot continue', file=sys.stderr) - sys.exit(1) - - print('... all dependencies found') From d3f030be6c32a3c1f38eb5ce3c0069350948ffc2 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 13:02:32 +0000 Subject: [PATCH 02/10] Use spades by default ; check dependencies --- ariba/__init__.py | 1 + ariba/cluster.py | 6 +-- ariba/clusters.py | 3 ++ ariba/external_progs.py | 87 +++++++++++++++++++++++++++++++++++++++++ ariba/tasks/run.py | 30 ++++++++------ 5 files changed, 112 insertions(+), 15 deletions(-) create mode 100644 ariba/external_progs.py diff --git a/ariba/__init__.py b/ariba/__init__.py index bc52112f..2c86f770 100644 --- a/ariba/__init__.py +++ b/ariba/__init__.py @@ -3,6 +3,7 @@ 'cluster', 'clusters', 'common', + 'external_progs', 'flag', 'histogram', 'link', diff --git a/ariba/cluster.py b/ariba/cluster.py index 37eff498..7f432940 100644 --- a/ariba/cluster.py +++ b/ariba/cluster.py @@ -20,7 +20,6 @@ def __init__(self, nucmer_min_id=90, nucmer_min_len=50, nucmer_breaklen=50, - spades='spades.py', spades_only_assembler=False, sspace_k=20, reads_insert=500, @@ -36,6 +35,7 @@ def __init__(self, bcftools_exe='bcftools', gapfiller_exe='GapFiller.pl', samtools_exe='samtools', + spades_exe='spades.py', sspace_exe='SSPACE_Basic_v2.0.pl', velvet_exe='velvet', # prefix of velvet{g,h} ): @@ -71,8 +71,8 @@ def __init__(self, self._set_assembly_kmer(assembly_kmer) self.assembler = assembler assert self.assembler in ['velvet', 'spades'] - self.spades = spades self.spades_only_assembler = spades_only_assembler + self.spades_exe = spades_exe self.bcftools_exe = bcftools_exe @@ -185,7 +185,7 @@ def _assemble_with_velvet(self): def _assemble_with_spades(self, unittest=False): cmd = ' '.join([ - self.spades, + self.spades_exe, '-1', self.reads1, '-2', self.reads2, '-o', self.assembler_dir, diff --git a/ariba/clusters.py b/ariba/clusters.py index 31c4116b..63d2795b 100644 --- a/ariba/clusters.py +++ b/ariba/clusters.py @@ -33,6 +33,7 @@ def __init__(self, bcftools_exe='bcftools', gapfiller_exe='GapFiller.pl', samtools_exe='samtools', + spades_exe='spades.py', sspace_exe='SSPACE_Basic_v2.0.pl', velvet_exe='velvet', # prefix of velvet{g,h} ): @@ -83,6 +84,7 @@ def __init__(self, self.gapfiller_exe = os.path.realpath(self.gapfiller_exe) # otherwise gapfiller dies loading packages self.samtools_exe = samtools_exe + self.spades_exe = spades_exe self.sspace_exe = shutil.which(sspace_exe) if self.sspace_exe is None: @@ -248,6 +250,7 @@ def _init_and_run_clusters(self): bcftools_exe=self.bcftools_exe, gapfiller_exe=self.gapfiller_exe, samtools_exe=self.samtools_exe, + spades_exe=self.spades_exe, sspace_exe=self.sspace_exe, velvet_exe=self.velvet, ) diff --git a/ariba/external_progs.py b/ariba/external_progs.py new file mode 100644 index 00000000..f345efd3 --- /dev/null +++ b/ariba/external_progs.py @@ -0,0 +1,87 @@ +import shutil +import subprocess +import os +from distutils.version import LooseVersion +import re +import sys +import pyfastaq +from ariba import common + +class Error (Exception): pass + +def is_in_path(prog): + return shutil.which(prog) is not None + + +prog_to_version_cmd = { + 'bcftools': ('bcftools', re.compile('^Version: ([0-9\.]+)')), + 'nucmer': ('nucmer --version', re.compile('^NUCmer \(NUCleotide MUMmer\) version ([0-9\.]+)')), + 'smalt': ('smalt version', re.compile('^Version: ([0-9\.]+)')), + 'spades': ('spades.py', re.compile('^SPAdes genome assembler v.([0-9\.]+)')), + 'samtools': ('samtools', re.compile('^Version: ([0-9\.]+)')), + 'sspace': (None, re.compile('^Usage: .*pl \[SSPACE_(.*)\]')), + 'gapfiller': (None, re.compile('^Usage: .*pl \[GapFiller_(.*)\]')) +} + + +def get_version(prog, path=None): + assert prog in prog_to_version_cmd + if path is None: + path = prog + if not is_in_path(path): + raise Error('Error getting version of ' + path + ' - not found in path.') + + if prog in ['sspace', 'gapfiller']: + cmd = 'perl ' + os.path.realpath(shutil.which(path)) + regex = prog_to_version_cmd[prog][1] + else: + cmd, regex = prog_to_version_cmd[prog] + + cmd_output = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + cmd_output = common.decode(cmd_output[0]).split('\n')[:-1] + common.decode(cmd_output[1]).split('\n')[:-1] + for line in cmd_output: + hits = regex.search(line) + if hits: + return hits.group(1) + return 'UNKNOWN ...\n I tried running this to get the version: "' + cmd + '"\n and the output didn\'t match this regular expression: "' + regex.pattern + '"' + + + +def check_versions(opts, verbose=False): + if verbose: + print('\nChecking depencies and their versions ...\n') + print('tool', 'path', 'version', sep='\t') + + l = [ + ('bcftools', opts.bcftools), + ('nucmer', None), + ('smalt', opts.smalt), + ('spades', opts.spades), + ('samtools', opts.samtools), + ('sspace', opts.sspace), + ('gapfiller', opts.gapfiller), + ] + + min_versions = { + 'bcftools': '1.2', + 'samtools': '1.2', + 'nucmer': '3.1', + 'spades': '3.5.0', + 'smalt': '0.7.6', + } + + errors = [] + + for t in l: + version = get_version(t[0], path=t[1]) + if verbose: + print(t[0], t[1], version, sep='\t') + + if t[0] in min_versions and LooseVersion(version) < LooseVersion(min_versions[t[0]]): + errors.append(' '.join(['Found version', version, 'of', t[0], 'which is too low! Please update to at least', min_versions[t[0]]])) + + if len(errors): + for e in errors: + print(e, file=sys.stderr) + raise Error('Cannot continue. Some dependencies need updating') + diff --git a/ariba/tasks/run.py b/ariba/tasks/run.py index fbe17dc3..f1692e91 100644 --- a/ariba/tasks/run.py +++ b/ariba/tasks/run.py @@ -22,26 +22,31 @@ def run(): nucmer_group.add_argument('--nucmer_min_len', type=int, help='Minimum alignment length (delta-filter -i) [%(default)s]', default=50, metavar='INT') nucmer_group.add_argument('--nucmer_breaklen', type=int, help='Value to use for -breaklen when running nucmer [%(default)s]', default=50, metavar='INT') - executables_group = parser.add_argument_group('executables locations') - executables_group.add_argument('--bcftools', help='Path to bcftools executable [%(default)s]', default='bcftools', metavar='PATH') - executables_group.add_argument('--gapfiller', help='Path to GapFiller.pl [%(default)s]', default='GapFiller.pl', metavar='PATH') - executables_group.add_argument('--samtools', help='Path to samtools executable [%(default)s]', default='samtools', metavar='PATH') - executables_group.add_argument('--sspace', help='Path to SSPACE_Basic_v2.0.pl [%(default)s]', default='SSPACE_Basic_v2.0.pl', metavar='PATH') - executables_group.add_argument('--velvet', help='Path to prefix of velvet{g,h} [%(default)s]', default='velvet', metavar='PATH') + assembly_group = parser.add_argument_group('Assembly options') + assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs. This is also the value used for sspace -k when scaffolding [%(default)s]', default=10, metavar='INT') + allowed_assemblers = ['velvet', 'spades'] + assembly_group.add_argument('--assembler', help='Assembler to use. Available options: ' + ','.join(allowed_assemblers) + ' [%(default)s]', choices=allowed_assemblers, default='spades', metavar='Assembler') + assembly_group.add_argument('--assembler_k', help='kmer size to use with assembler. Default is 2/3 of the read length', metavar='INT') + assembly_group.add_argument('--spades_only_assembler', action='store_true', help='Use --only-assembler when running SPAdes') other_group = parser.add_argument_group('Other options') other_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1 (default) or 4', choices=[1,4], default=1, metavar='INT') - other_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs. This is also the value used for sspace -k when scaffolding [%(default)s]', default=10, metavar='INT') - allowed_assemblers = ['velvet', 'spades'] - other_group.add_argument('--assembler', help='Assembler to use. Although SPAdes can be used, it is experimental and velvet is currently recommended. Available options: ' + ','.join(allowed_assemblers) + ' [%(default)s]', choices=allowed_assemblers, default='velvet', metavar='Assembler') - other_group.add_argument('--assembler_k', help='kmer size to use with assembler. Default is 2/3 of the read length', metavar='INT') - other_group.add_argument('--spades_only_assembler', action='store_true', help='Use --only-assembler when running SPAdes') other_group.add_argument('--threads', type=int, help='Number of threads for smalt and spades [%(default)s]', default=1, metavar='INT') other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)') other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)') other_group.add_argument('--verbose', action='store_true', help='Be verbose') - options = parser.parse_args() + executables_group = parser.add_argument_group('executables locations') + executables_group.add_argument('--bcftools', help='Path to bcftools executable [%(default)s]', default='bcftools', metavar='PATH') + executables_group.add_argument('--gapfiller', help='Path to GapFiller executable [%(default)s]', default='GapFiller.pl', metavar='PATH') + executables_group.add_argument('--samtools', help='Path to samtools executable [%(default)s]', default='samtools', metavar='PATH') + executables_group.add_argument('--smalt', help='Path to SMALT executable [%(default)s]', default='smalt', metavar='PATH') + executables_group.add_argument('--spades', help='Path to SPAdes executable [%(default)s]', default='spades.py', metavar='PATH') + executables_group.add_argument('--sspace', help='Path to SSPACE executable [%(default)s]', default='SSPACE_Basic_v2.0.pl', metavar='PATH') + executables_group.add_argument('--velvet', help='Path to prefix of velvet{g,h} [%(default)s]', default='velvet', metavar='PATH') + + options = parser.parse_args() + ariba.external_progs.check_versions(options, verbose=options.verbose) pyfastaq.sequences.codon2aa = pyfastaq.genetic_codes.codes[options.genetic_code] c = ariba.clusters.Clusters( @@ -66,6 +71,7 @@ def run(): bcftools_exe=options.bcftools, gapfiller_exe=options.gapfiller, samtools_exe=options.samtools, + spades_exe=options.spades, sspace_exe=options.sspace, velvet_exe=options.velvet, ) From 95e8d9a5a4ebfe7327552538235695c0fe46fd6b Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 13:14:51 +0000 Subject: [PATCH 03/10] Add --spades_other option, remove --spades_only_assembler option --- ariba/cluster.py | 8 ++++---- ariba/clusters.py | 6 +++--- ariba/tasks/run.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ariba/cluster.py b/ariba/cluster.py index 7f432940..38ea162e 100644 --- a/ariba/cluster.py +++ b/ariba/cluster.py @@ -20,7 +20,6 @@ def __init__(self, nucmer_min_id=90, nucmer_min_len=50, nucmer_breaklen=50, - spades_only_assembler=False, sspace_k=20, reads_insert=500, sspace_sd=0.4, @@ -38,6 +37,7 @@ def __init__(self, spades_exe='spades.py', sspace_exe='SSPACE_Basic_v2.0.pl', velvet_exe='velvet', # prefix of velvet{g,h} + spades_other=None, ): self.root_dir = os.path.abspath(root_dir) @@ -71,8 +71,8 @@ def __init__(self, self._set_assembly_kmer(assembly_kmer) self.assembler = assembler assert self.assembler in ['velvet', 'spades'] - self.spades_only_assembler = spades_only_assembler self.spades_exe = spades_exe + self.spades_other = spades_other self.bcftools_exe = bcftools_exe @@ -192,8 +192,8 @@ def _assemble_with_spades(self, unittest=False): '-k', str(self.assembly_kmer), '--threads', str(self.threads), ]) - if self.spades_only_assembler: - cmd += ' --only-assembler' + if self.spades_other is not None: + cmd += ' ' + self.spades_other cwd = os.getcwd() os.chdir(self.assembly_dir) diff --git a/ariba/clusters.py b/ariba/clusters.py index 63d2795b..a9bc993d 100644 --- a/ariba/clusters.py +++ b/ariba/clusters.py @@ -22,7 +22,7 @@ def __init__(self, smalt_k=13, smalt_s=2, smalt_min_id=0.9, - spades_only_assembler=False, + spades_other=None, max_insert=1000, min_scaff_depth=10, nucmer_min_id=90, @@ -45,7 +45,7 @@ def __init__(self, self.assembler = assembler assert self.assembler in ['velvet', 'spades'] self.assembly_kmer = assembly_kmer - self.spades_only_assembler = spades_only_assembler + self.spades_other = spades_other self.bam_prefix = os.path.join(self.outdir, 'map_all_reads') self.bam = self.bam_prefix + '.bam' @@ -239,7 +239,6 @@ def _init_and_run_clusters(self): nucmer_min_id=self.nucmer_min_id, nucmer_min_len=self.nucmer_min_len, nucmer_breaklen=self.nucmer_breaklen, - spades_only_assembler=self.spades_only_assembler, sspace_k=self.min_scaff_depth, reads_insert=self.insert_size, sspace_sd=self.insert_sspace_sd, @@ -253,6 +252,7 @@ def _init_and_run_clusters(self): spades_exe=self.spades_exe, sspace_exe=self.sspace_exe, velvet_exe=self.velvet, + spades_other=self.spades_other ) self.clusters[gene].run() diff --git a/ariba/tasks/run.py b/ariba/tasks/run.py index f1692e91..e1a233fc 100644 --- a/ariba/tasks/run.py +++ b/ariba/tasks/run.py @@ -23,11 +23,11 @@ def run(): nucmer_group.add_argument('--nucmer_breaklen', type=int, help='Value to use for -breaklen when running nucmer [%(default)s]', default=50, metavar='INT') assembly_group = parser.add_argument_group('Assembly options') - assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs. This is also the value used for sspace -k when scaffolding [%(default)s]', default=10, metavar='INT') allowed_assemblers = ['velvet', 'spades'] assembly_group.add_argument('--assembler', help='Assembler to use. Available options: ' + ','.join(allowed_assemblers) + ' [%(default)s]', choices=allowed_assemblers, default='spades', metavar='Assembler') + assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs. This is also the value used for sspace -k when scaffolding [%(default)s]', default=10, metavar='INT') assembly_group.add_argument('--assembler_k', help='kmer size to use with assembler. Default is 2/3 of the read length', metavar='INT') - assembly_group.add_argument('--spades_only_assembler', action='store_true', help='Use --only-assembler when running SPAdes') + assembly_group.add_argument('--spades_other', help='Put options string to be used with spades in quotes. This will NOT be sanity checked. Do not use -k or -t: for these options you should use the ariba run options --assembler_k and --threads') other_group = parser.add_argument_group('Other options') other_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1 (default) or 4', choices=[1,4], default=1, metavar='INT') @@ -65,7 +65,7 @@ def run(): nucmer_min_id=options.nucmer_min_id, nucmer_min_len=options.nucmer_min_len, nucmer_breaklen=options.nucmer_breaklen, - spades_only_assembler=options.spades_only_assembler, + spades_other=options.spades_other, assembled_threshold=options.assembled_threshold, unique_threshold=options.unique_threshold, bcftools_exe=options.bcftools, From 0cee26ef13f946cb2069e1f57c3b0ce99addc958 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 13:47:30 +0000 Subject: [PATCH 04/10] Improve verbose output; use --trusted-contigs --- ariba/cluster.py | 23 +++++++++++++---------- ariba/clusters.py | 22 ++++++++++++++++++++-- ariba/external_progs.py | 4 +++- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/ariba/cluster.py b/ariba/cluster.py index 38ea162e..6e762cab 100644 --- a/ariba/cluster.py +++ b/ariba/cluster.py @@ -140,6 +140,17 @@ def _set_assembly_kmer(self, k): def _assemble_with_velvet(self): + # map reads to reference gene to make BAM input to velvet columbus + mapping.run_smalt( + self.reads1, + self.reads2, + self.gene_fa, + self.gene_bam[:-4], + threads=self.threads, + sort=True, + verbose=self.verbose, + ) + cmd = ' '.join([ self.velveth, self.assembler_dir, @@ -191,6 +202,7 @@ def _assemble_with_spades(self, unittest=False): '-o', self.assembler_dir, '-k', str(self.assembly_kmer), '--threads', str(self.threads), + '--trusted-contigs', self.gene_fa, ]) if self.spades_other is not None: cmd += ' ' + self.spades_other @@ -659,16 +671,6 @@ def _make_report_lines(self): def run(self): - # map reads to reference gene - mapping.run_smalt( - self.reads1, - self.reads2, - self.gene_fa, - self.gene_bam[:-4], - threads=self.threads, - sort=True, - ) - if self.assembler == 'velvet': self._assemble_with_velvet() elif self.assembler == 'spades': @@ -696,6 +698,7 @@ def run(self): self.final_assembly_bam[:-4], threads=self.threads, sort=True, + verbose=self.verbose, ) self._parse_assembly_bam() diff --git a/ariba/clusters.py b/ariba/clusters.py index a9bc993d..cf041ebc 100644 --- a/ariba/clusters.py +++ b/ariba/clusters.py @@ -108,7 +108,8 @@ def _map_reads(self): index_k=self.smalt_k, index_s=self.smalt_s, threads=self.threads, - minid=self.smalt_min_id + minid=self.smalt_min_id, + verbose=self.verbose, ) @@ -203,9 +204,11 @@ def _set_insert_size_data(self): (x, self.insert_size, pc95, self.insert_sspace_sd) = self.insert_hist.stats() self.insert_proper_pair_max = 1.1 * pc95 if self.verbose: + print('\nInsert size information from reads mapped to reference genes:') print('Insert size:', self.insert_size, sep='\t') print('Insert sspace sd:', self.insert_sspace_sd, sep='\t') print('Max insert:', self.insert_proper_pair_max, sep='\t') + print() def _write_gene_fa(self, gene_name, outfile): @@ -225,9 +228,12 @@ def _init_and_run_clusters(self): if len(self.cluster_to_dir) == 0: raise Error('Did not get any reads mapped to genes. Cannot continue') + counter = 0 + for gene in sorted(self.cluster_to_dir): + counter += 1 if self.verbose: - print('Running', gene) + print('\nAssembling cluster', counter, 'of', str(len(self.cluster_to_dir)) + ':', gene) new_dir = self.cluster_to_dir[gene] self._write_gene_fa(gene, os.path.join(new_dir, 'gene.fa')) self.clusters[gene] = cluster.Cluster( @@ -296,8 +302,20 @@ def _write_reports(self): def run(self): + if self.verbose: + print('{:_^79}'.format(' Mapping reads to reference genes ')) self._map_reads() + if self.verbose: + print('Finished mapping\n') + print('{:_^79}'.format(' Generating clusters ')) self._bam_to_clusters_reads() self._set_insert_size_data() + if self.verbose: + print('{:_^79}'.format(' Assembling each cluster ')) self._init_and_run_clusters() + if self.verbose: + print('Finished assembling clusters\n') + print('{:_^79}'.format(' Writing report files ')) self._write_reports() + if self.verbose: + print('Finished writing report files. All done!') diff --git a/ariba/external_progs.py b/ariba/external_progs.py index f345efd3..12cd9b0c 100644 --- a/ariba/external_progs.py +++ b/ariba/external_progs.py @@ -49,7 +49,7 @@ def get_version(prog, path=None): def check_versions(opts, verbose=False): if verbose: - print('\nChecking depencies and their versions ...\n') + print('{:_^79}'.format(' Checking dependencies and their versions ')) print('tool', 'path', 'version', sep='\t') l = [ @@ -85,3 +85,5 @@ def check_versions(opts, verbose=False): print(e, file=sys.stderr) raise Error('Cannot continue. Some dependencies need updating') + if verbose: + print('\n... dependencies look OK.\n') From b9bb4d4beb48161e70ed1264dd6accd26fa3cf7d Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 14:03:29 +0000 Subject: [PATCH 05/10] Be verbose when required --- ariba/mapping.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ariba/mapping.py b/ariba/mapping.py index 3c2622eb..b4718830 100644 --- a/ariba/mapping.py +++ b/ariba/mapping.py @@ -15,7 +15,8 @@ def run_smalt( max_insert=1000, minid=0.9, sort=False, - extra_smalt_map_ops='-x' + extra_smalt_map_ops='-x', + verbose=False ): if extra_smalt_map_ops is None: extra_smalt_map_ops = '' @@ -59,15 +60,15 @@ def run_smalt( intermediate_bam = final_bam map_cmd += ' -bS -T ' + ref_fa + ' - > ' + intermediate_bam - common.syscall(index_cmd) - common.syscall(map_cmd) + common.syscall(index_cmd, verbose=verbose) + common.syscall(map_cmd, verbose=verbose) if sort: threads = min(4, threads) thread_mem = int(500 / threads) sort_cmd = 'samtools sort -@' + str(threads) + ' -m ' + str(thread_mem) + 'M ' + intermediate_bam + ' ' + out_prefix index_cmd = 'samtools index ' + final_bam - common.syscall(sort_cmd) - common.syscall(index_cmd) + common.syscall(sort_cmd, verbose=verbose) + common.syscall(index_cmd, verbose=verbose) for fname in clean_files: os.unlink(fname) From 848494f51a32584ff33f66c78b6739212ac095fe Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 14:17:06 +0000 Subject: [PATCH 06/10] Only check path/version for assembler being used --- ariba/external_progs.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/ariba/external_progs.py b/ariba/external_progs.py index 12cd9b0c..3ca89cc9 100644 --- a/ariba/external_progs.py +++ b/ariba/external_progs.py @@ -20,7 +20,9 @@ def is_in_path(prog): 'spades': ('spades.py', re.compile('^SPAdes genome assembler v.([0-9\.]+)')), 'samtools': ('samtools', re.compile('^Version: ([0-9\.]+)')), 'sspace': (None, re.compile('^Usage: .*pl \[SSPACE_(.*)\]')), - 'gapfiller': (None, re.compile('^Usage: .*pl \[GapFiller_(.*)\]')) + 'gapfiller': (None, re.compile('^Usage: .*pl \[GapFiller_(.*)\]')), + 'velvetg': ('velvetg', re.compile('Version ([0-9\.]+)')), + 'velveth': ('velveth', re.compile('Version ([0-9\.]+)')), } @@ -56,18 +58,27 @@ def check_versions(opts, verbose=False): ('bcftools', opts.bcftools), ('nucmer', None), ('smalt', opts.smalt), - ('spades', opts.spades), ('samtools', opts.samtools), ('sspace', opts.sspace), ('gapfiller', opts.gapfiller), ] - + + if opts.assembler == 'spades': + l.append(('spades', opts.spades)) + elif opts.assembler == 'velvet': + l.append(('velvetg', opts.velvet + 'g')) + l.append(('velveth', opts.velvet + 'h')) + else: + raise Error('Assembler ' + opts.assembler + ' not recognised. Cannot continue') + min_versions = { 'bcftools': '1.2', 'samtools': '1.2', 'nucmer': '3.1', 'spades': '3.5.0', 'smalt': '0.7.6', + 'velvetg': '1.2.07', + 'velveth': '1.2.07', } errors = [] From a1dee8eac47a76b4d5f058c4e14114456dcd2efd Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 14:20:01 +0000 Subject: [PATCH 07/10] Tweak usage --- scripts/ariba | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ariba b/scripts/ariba index 1316a7b3..2dfaa0d5 100755 --- a/scripts/ariba +++ b/scripts/ariba @@ -19,7 +19,7 @@ ordered_tasks = [ def print_usage_and_exit(): - print('Usage: ariba [options]', file=sys.stderr) + print('Usage: ariba [options] ', file=sys.stderr) print('\nTo get minimal usage for a command use:\nariba command', file=sys.stderr) print('\nTo get full help for a command use one of:\nariba command -h\nariba command --help\n', file=sys.stderr) print('\nAvailable commands:\n', file=sys.stderr) From 5e0c2c2564e47d19f3fcb74a7fd5dc647943e423 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 14:21:09 +0000 Subject: [PATCH 08/10] Do not check for spades --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 595ea63d..75f24f65 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,6 @@ 'SSPACE_Basic_v2.0.pl', 'samtools', 'smalt', - 'spades.py', ] print('Checking that dependencies are found in path...') From 1c436d57abfacac42f8dc4558cb05ce9d1c84131 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 14:27:02 +0000 Subject: [PATCH 09/10] Require smalt version 0.7.4 or higher --- ariba/external_progs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ariba/external_progs.py b/ariba/external_progs.py index 3ca89cc9..5a8d024c 100644 --- a/ariba/external_progs.py +++ b/ariba/external_progs.py @@ -76,7 +76,7 @@ def check_versions(opts, verbose=False): 'samtools': '1.2', 'nucmer': '3.1', 'spades': '3.5.0', - 'smalt': '0.7.6', + 'smalt': '0.7.4', 'velvetg': '1.2.07', 'velveth': '1.2.07', } From 3d0a3ba44b184edad3c9128e9ae6ec819715a708 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 17 Feb 2015 14:27:39 +0000 Subject: [PATCH 10/10] Mummer, SPAdes dependency; tweak usage --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 55990e56..cee5f85b 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,10 @@ Dependencies: * [samtools and bcftools] [samtools] version >= 1.2 * [SSPACE-basic scaffolder] [sspace] * [GapFiller] [gapfiller] - * [SMALT] [smalt] - * [Velvet] [velvet] + * [MUMmer] [mummer] vesrion >= 3.23 + * [SMALT] [smalt] version >= 0.7.4 + * Either [SPAdes] [spades] version >= 3.5.0 or [Velvet] [velvet] version >= 1.2.07 + (SPAdes is recommended) Install with pip: @@ -35,7 +37,7 @@ Usage The installation will put a single script called `ariba` in your path. The usage is: - ariba [options] + ariba [options] Run just `ariba` to get a list of tasks. Use `-h` or `--help` with a task to get the help, for example @@ -48,8 +50,10 @@ To run the pipeline to identify the genes present in a pair of FASTQ files: - [smalt]: https://www.sanger.ac.uk/resources/software/smalt/ - [sspace]: http://www.baseclear.com/genomics/bioinformatics/basetools/SSPACE [gapfiller]: http://www.baseclear.com/genomics/bioinformatics/basetools/gapfiller + [mummer]: http://mummer.sourceforge.net/ [samtools]: http://www.htslib.org/ + [smalt]: https://www.sanger.ac.uk/resources/software/smalt/ + [spades]: http://bioinf.spbau.ru/spades + [sspace]: http://www.baseclear.com/genomics/bioinformatics/basetools/SSPACE [velvet]: http://www.ebi.ac.uk/~zerbino/velvet/