Skip to content

Commit

Permalink
Merge pull request #105 from martinghunt/use_subcommands
Browse files Browse the repository at this point in the history
Use subcommands
  • Loading branch information
martinghunt authored Jul 20, 2016
2 parents 3a99849 + d8ad208 commit 50c5949
Show file tree
Hide file tree
Showing 15 changed files with 224 additions and 191 deletions.
6 changes: 3 additions & 3 deletions ariba/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(self,
self.bam = self.bam_prefix + '.bam'
self.report_file_all_tsv = os.path.join(self.outdir, 'report.all.tsv')
self.report_file_all_xls = os.path.join(self.outdir, 'report.all.xls')
self.report_file_filtered_prefix = os.path.join(self.outdir, 'report')
self.report_file_filtered = os.path.join(self.outdir, 'report.tsv')
self.catted_assembled_seqs_fasta = os.path.join(self.outdir, 'assembled_seqs.fa.gz')
self.catted_genes_matching_refs_fasta = os.path.join(self.outdir, 'assembled_genes.fa.gz')
self.threads = threads
Expand Down Expand Up @@ -558,9 +558,9 @@ def _run(self):
self._write_reports(self.clusters, self.report_file_all_tsv)

if self.verbose:
print('Making', self.report_file_filtered_prefix + '.tsv')
print('Making', self.report_file_filtered)
rf = report_filter.ReportFilter(infile=self.report_file_all_tsv)
rf.run(self.report_file_filtered_prefix)
rf.run(self.report_file_filtered)

if self.verbose:
print()
Expand Down
4 changes: 2 additions & 2 deletions ariba/report_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def _write_report_xls(self, outfile):
workbook.save(outfile)


def run(self, outprefix):
def run(self, outfile):
self._filter_dicts()
self._write_report_tsv(outprefix + '.tsv')
self._write_report_tsv(outfile)

14 changes: 14 additions & 0 deletions ariba/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
__all__ = [
'aln2meta',
'flag',
'getref',
'prepareref',
'refquery',
'reportfilter',
'run',
'summary',
'test',
'version',
]

from ariba.tasks import *
16 changes: 1 addition & 15 deletions ariba/tasks/aln2meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,7 @@
from ariba import aln_to_metadata


def run():
coding_choices = ['coding', 'noncoding']
parser = argparse.ArgumentParser(
description = 'Converts multi-alignment fasta and SNP info to metadata',
usage = 'ariba aln2meta [options] <aln_fasta> <variants_tsv> <(non)coding> <cluster_rep> <outprefix>'
)

parser.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
parser.add_argument('aln_fasta', help='Multi-fasta file of alignments')
parser.add_argument('variants_tsv', help='TSV file of variants information')
parser.add_argument('coding_or_non', help='Sequences are coding or noncoding. Must be one of: ' + ' '.join(coding_choices), choices=coding_choices, metavar='(non)coding')
parser.add_argument('cluster_rep', help='Name of sequence to be used as cluster representative. Must exactly match a sequence in aln_fasta file')
parser.add_argument('outprefix', help='Prefix of output filenames')
options = parser.parse_args()

def run(options):
aln_to_meta = aln_to_metadata.AlnToMetadata(
options.aln_fasta,
options.variants_tsv,
Expand Down
8 changes: 1 addition & 7 deletions ariba/tasks/flag.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
import argparse
import ariba

def run():
parser = argparse.ArgumentParser(
description = 'Translate the meaning of a flag output by ARIBA',
usage = 'ariba flag <flag>')
parser.add_argument('flag_in', type=int, help='Flag to be translated (an integer)', metavar='flag')
options = parser.parse_args()

def run(options):
f = ariba.flag.Flag(options.flag_in)
print('Meaning of flag', f)
print(f.to_long_string())
13 changes: 1 addition & 12 deletions ariba/tasks/getref.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,7 @@
from ariba import ref_genes_getter


def run():
allowed_dbs = ['argannot', 'card', 'resfinder','vfdb']
parser = argparse.ArgumentParser(
description = 'Downloads reference data',
usage = 'ariba getref [options] <' + '|'.join(allowed_dbs) + '> <outprefix>'
)

parser.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
parser.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs)
parser.add_argument('outprefix', help='Prefix of output filenames')
options = parser.parse_args()

def run(options):
getter = ref_genes_getter.RefGenesGetter(options.db, genetic_code=options.genetic_code)
getter.run(options.outprefix)

26 changes: 1 addition & 25 deletions ariba/tasks/prepareref.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,7 @@
import argparse
from ariba import ref_preparer, external_progs, versions

def run():
parser = argparse.ArgumentParser(
description = 'ARIBA: Antibiotic Resistance Identification By Assembly',
usage = 'ariba prepareref [options] <outdir>',
epilog = 'REQUIRED: -f and -m must each be used at least once')
input_group = parser.add_argument_group('input files options')
input_group.add_argument('-f', '--fasta', action='append', dest='fasta_files', required=True, help='REQUIRED. Name of fasta file. Can be used more than once if your sequences are spread over more than on file', metavar='FILENAME')
input_group.add_argument('-m', '--metadata', action='append', dest='tsv_files', required=True, help='REQUIRED. Name of tsv file of metadata about the input sequences. Can be used more than once if your metadata is spread over more than one file', metavar='FILENAME')

cdhit_group = parser.add_argument_group('cd-hit options')
cdhit_group.add_argument('--no_cdhit', action='store_true', help='Do not run cd-hit. Each input sequence is put into its own "cluster". Incompatible with --cdhit_clusters.')
cdhit_group.add_argument('--cdhit_clusters', help='File specifying how the sequences should be clustered. Will be used instead of running cdhit. Format is one cluster per line. Sequence names separated by whitespace. First name in line is the cluster representative. Incompatible with --no_cdhit', metavar='FILENAME')
cdhit_group.add_argument('--cdhit_min_id', type=float, help='Sequence identity threshold (cd-hit option -c) [%(default)s]', default=0.9, metavar='FLOAT')
cdhit_group.add_argument('--cdhit_min_length', type=float, help='length difference cutoff (cd-hit option -s) [%(default)s]', default=0.9, metavar='FLOAT')

other_group = parser.add_argument_group('other options')
other_group.add_argument('--min_gene_length', type=int, help='Minimum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=6)
other_group.add_argument('--max_gene_length', type=int, help='Maximum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=10000)
other_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
other_group.add_argument('--threads', type=int, help='Number of threads (currently only applies to cdhit) [%(default)s]', default=1, metavar='INT')
other_group.add_argument('--verbose', action='store_true', help='Be verbose')

parser.add_argument('outdir', help='Output directory (must not already exist)')
options = parser.parse_args()

def run(options):
if options.no_cdhit and options.cdhit_clusters is not None:
sys.exit('Cannot use both --no_cdhit and --cdhit_clusters. Neither or exactly one of those options must be used')

Expand Down
11 changes: 1 addition & 10 deletions ariba/tasks/refquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,6 @@
import argparse
from ariba import refdata_query

def run():
parser = argparse.ArgumentParser(
description = 'Find cluster or sequence information from prepareref directory',
usage = 'ariba refquery <prepareref directory> <cluster|seq> <cluster name|sequence name>',
)
parser.add_argument('prepareref_dir', help='Name of directory output by prepareref')
parser.add_argument('query_type', choices=['cluster', 'seq'], help='Use "cluster" to get the sequences in a cluster, or "seq" to get information about a sequence')
parser.add_argument('search_name', help='Name of cluster or sequence to search for')
options = parser.parse_args()

def run(options):
rquery = refdata_query.RefdataQuery(options.prepareref_dir)
rquery.query(options.query_type, options.search_name)
17 changes: 2 additions & 15 deletions ariba/tasks/reportfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,7 @@
import sys
import ariba

def run():
parser = argparse.ArgumentParser(
description = 'Filters an ARIBA report tsv file',
usage = 'ariba reportfilter [options] <infile> <outprefix>'
)
parser.add_argument('--exclude_flags', help='Comma-separated list of flags to exclude. [%(default)s]', default='assembly_fail,ref_seq_choose_fail')
parser.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
parser.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
parser.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
parser.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
parser.add_argument('infile', help='Name of input tsv file')
parser.add_argument('outprefix', help='Prefix of output files. outprefix.tsv and outprefix.xls will be made')
options = parser.parse_args()

def run(options):
flags_to_exclude = options.exclude_flags.split(',')
allowed_flags = set(ariba.flag.flags_in_order)
bad_flags = [x for x in flags_to_exclude if x not in allowed_flags]
Expand All @@ -31,5 +18,5 @@ def run():
ignore_not_has_known_variant=options.discard_without_known_var,
remove_synonymous_snps=not options.keep_syn,
)
rf.run(options.outprefix)
rf.run(options.outfile)

32 changes: 1 addition & 31 deletions ariba/tasks/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,7 @@
import ariba


def run():
parser = argparse.ArgumentParser(
description = 'ARIBA: Antibiotic Resistance Identification By Assembly',
usage = 'ariba run [options] <prepareref_dir> <reads1.fq> <reads2.fq> <outdir>')
parser.add_argument('prepareref_dir', help='Name of output directory when "ariba prepareref" was run')
parser.add_argument('reads_1', help='Name of fwd reads fastq file')
parser.add_argument('reads_2', help='Name of rev reads fastq file')
parser.add_argument('outdir', help='Output directory (must not already exist)')

nucmer_group = parser.add_argument_group('nucmer options')
nucmer_group.add_argument('--nucmer_min_id', type=int, help='Minimum alignment identity (delta-filter -i) [%(default)s]', default=90, metavar='INT')
nucmer_group.add_argument('--nucmer_min_len', type=int, help='Minimum alignment length (delta-filter -i) [%(default)s]', default=20, metavar='INT')
nucmer_group.add_argument('--nucmer_breaklen', type=int, help='Value to use for -breaklen when running nucmer [%(default)s]', default=200, metavar='INT')

assembly_group = parser.add_argument_group('Assembly options')
assembly_group.add_argument('--assembly_cov', type=int, help='Target read coverage when sampling reads for assembly [%(default)s]', default=50, metavar='INT')
assembly_group.add_argument('--assembler_k', type=int, help='kmer size to use with assembler. You can use 0 to set kmer to 2/3 of the read length. Warning - lower kmers are usually better. [%(default)s]', metavar='INT', default=21)
assembly_group.add_argument('--spades_other', help='Put options string to be used with spades in quotes. This will NOT be sanity checked. Do not use -k (see --assembler_k), or -t (use ariba option --threads) [%(default)s]', default="-m 4 --careful", metavar="OPTIONS")
assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs. This is also the value used for sspace -k when scaffolding [%(default)s]', default=10, metavar='INT')

other_group = parser.add_argument_group('Other options')
other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
other_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
other_group.add_argument('--noclean', action='store_true', help='Do not clean up intermediate files')
other_group.add_argument('--tmp_dir', help='Existing directory in which to create a temporary directory used for local assemblies')
other_group.add_argument('--verbose', action='store_true', help='Be verbose')

options = parser.parse_args()

def run(options):
reads_not_found = []

for filename in [options.reads_1, options.reads_2]:
Expand Down
19 changes: 1 addition & 18 deletions ariba/tasks/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,24 +80,7 @@ def use_preset(options):
return options


def run():
presets = ['minimal', 'cluster_small', 'cluster_all', 'cluster_var_groups', 'cluster_known_vars', 'all', 'all_no_filter']

parser = argparse.ArgumentParser(
description = 'Make a summary of ARIBA report files, and Phandango files',
usage = 'ariba summary [options] <outprefix> [report1.tsv report2.tsv ...]',
epilog = 'Files must be listed after the output file and/or the option --fofn must be used. If both used, all files in the filename specified by --fofn AND the files listed after the output file will be used as input.')
parser.add_argument('-f', '--fofn', help='File of filenames of ariba reports in tsv format (not xls) to be summarised. Must be used if no input files listed after the outfile.', metavar='FILENAME')
parser.add_argument('--preset', choices=presets, help='Shorthand for setting --cluster_cols,--col_filter,--row_filter,--known_vars,--novel_vars. Using this overrides those options', metavar='|'.join(presets))
parser.add_argument('--cluster_cols', help='Comma separated list of cluster columns to include. Choose from: assembled, match, ref_seq, pct_id, known_var, novel_var [%(default)s]', default='match', metavar='col1,col2,...')
parser.add_argument('--col_filter', choices=['y', 'n'], default='y', help='Choose whether columns where all values are "no" or "NA" are removed [%(default)s]', metavar='y|n')
parser.add_argument('--row_filter', choices=['y', 'n'], default='y', help='Choose whether rows where all values are "no" or "NA" are removed [%(default)s]', metavar='y|n')
parser.add_argument('--var_cols', help='Comma separated list of variant columns to include. Choose from: groups, grouped, ungrouped, novel [none by default]', metavar='col1,col2,...', default='')
parser.add_argument('--min_id', type=float, help='Minimum percent identity cutoff to count as assembled [%(default)s]', default=90, metavar='FLOAT')
parser.add_argument('--verbose', action='store_true', help='Be verbose')
parser.add_argument('outprefix', help='Prefix of output files')
parser.add_argument('infiles', nargs='*', help='Files to be summarised')
options = parser.parse_args()
def run(options):
if len(options.infiles) == 0:
options.infiles = None

Expand Down
8 changes: 1 addition & 7 deletions ariba/tasks/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,7 @@ def boxymcboxface(message):
print('-' * 79)


def run():
parser = argparse.ArgumentParser(
description = 'Run ARIBA on a small test dataset',
usage = 'ariba test [options] <outdir>')
parser.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
parser.add_argument('outdir', help='Name of output directory')
options = parser.parse_args()
def run(options):
ariba_exe = os.path.abspath(sys.argv[0])

print('Running ARIBA on test data...')
Expand Down
2 changes: 1 addition & 1 deletion ariba/tasks/version.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
from ariba import versions

def run():
def run(options):
extern_progs, report_lines = versions.get_all_versions(raise_error=False)
print(*report_lines, sep='\n')
8 changes: 4 additions & 4 deletions ariba/tests/report_filter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,9 @@ def test_run(self):
'''Test run'''
infile = os.path.join(data_dir, 'report_filter_test_run.in.tsv')
expected_file = os.path.join(data_dir, 'report_filter_test_run.expected.tsv')
tmpprefix = 'tmp.test.report_filter.run.out'
tmpfile = 'tmp.test.report_filter.run.out.tsv'
rf = report_filter.ReportFilter(infile=infile)
rf.run(tmpprefix)
self.assertTrue(filecmp.cmp(expected_file, tmpprefix + '.tsv', shallow=False))
os.unlink(tmpprefix + '.tsv')
rf.run(tmpfile)
self.assertTrue(filecmp.cmp(expected_file, tmpfile, shallow=False))
os.unlink(tmpfile)

Loading

0 comments on commit 50c5949

Please sign in to comment.