Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add srst2 argannot #118

Merged
merged 3 commits into from
Aug 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 39 additions & 2 deletions ariba/ref_genes_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ class Error (Exception): pass
from ariba import common, card_record, vfdb_parser


argannot_ref = '"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes",\nGupta et al 2014, PMID: 24145532\n'


class RefGenesGetter:
def __init__(self, ref_db, genetic_code=11):
allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','vfdb'}
allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','srst2_argannot', 'vfdb'}
if ref_db not in allowed_ref_dbs:
raise Error('Error in RefGenesGetter. ref_db must be one of: ' + str(allowed_ref_dbs) + ', but I got "' + ref_db)
self.ref_db=ref_db
Expand Down Expand Up @@ -234,7 +237,7 @@ def _get_from_argannot(self, outprefix):
print('You can use them with ARIBA like this:')
print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
print('If you use this downloaded data, please cite:')
print('"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes",\nGupta et al 2014, PMID: 24145532\n')
print(argannot_ref)


def _get_from_plasmidfinder(self, outprefix):
Expand Down Expand Up @@ -289,6 +292,40 @@ def _get_from_plasmidfinder(self, outprefix):
print('"PlasmidFinder and pMLST: in silico detection and typing of plasmids", Carattoli et al 2014, PMID: 24777092\n')


def _get_from_srst2_argannot(self, outprefix):
srst2_version = '0.2.0'
srst2_url = 'https://github.com/katholt/srst2/raw/v' + srst2_version + '/data/ARGannot.r1.fasta'
srst2_fa = outprefix + '.original.fa'
command = 'wget -O ' + srst2_fa + ' ' + srst2_url
common.syscall(command, verbose=True)

final_fasta = outprefix + '.fa'
final_tsv = outprefix + '.tsv'

f_out_fa = pyfastaq.utils.open_file_write(final_fasta)
f_out_meta = pyfastaq.utils.open_file_write(final_tsv)
seq_reader = pyfastaq.sequences.file_reader(srst2_fa)

for seq in seq_reader:
original_id = seq.id
name, extra = seq.id.split()
cluster_id, cluster_name, allele_name, allele_id = name.split('__')
seq.id = cluster_name + '.' + name
print(seq, file=f_out_fa)
print(seq.id, 1, 0, '.', '.', 'Original name: ' + original_id, sep='\t', file=f_out_meta)

pyfastaq.utils.close(f_out_fa)
pyfastaq.utils.close(f_out_meta)

print('Finished downloading and converting data. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
print('You can use them with ARIBA like this:')
print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
print('If you use this downloaded data, please cite:')
print('"SRST2: Rapid genomic surveillance for public health and hospital microbiology labs",\nInouye et al 2014, Genome Medicine, PMID: 25422674\n')
print(argannot_ref)
print('and in your methods say that the ARG-ANNOT sequences were used from version', srst2_version, 'of SRST2.')


def _get_from_vfdb(self, outprefix):
outprefix = os.path.abspath(outprefix)
tmpdir = outprefix + '.tmp.download'
Expand Down
1 change: 0 additions & 1 deletion ariba/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,6 @@ def run(self):

# sanity check same number of columns in headers and matrix
lengths = {len(x) for x in matrix}
print(lengths, len(phandango_header), len(csv_header))
assert len(lengths) == 1
assert len(matrix[0]) == len(phandango_header) == len(csv_header)

Expand Down
8 changes: 5 additions & 3 deletions scripts/ariba
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ subparser_flag.set_defaults(func=ariba.tasks.flag.run)


#---------------------------- getref ------------------------------------
allowed_dbs = ['argannot', 'card', 'plasmidfinder', 'resfinder','vfdb']
allowed_dbs = ['argannot', 'card', 'plasmidfinder', 'resfinder', 'srst2_argannot', 'vfdb']
subparser_getref = subparsers.add_parser(
'getref',
help='Download reference data',
Expand Down Expand Up @@ -138,7 +138,8 @@ assembly_group.add_argument('--assembly_cov', type=int, help='Target read covera
assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs [%(default)s]', default=10, metavar='INT')

other_group = subparser_run.add_argument_group('Other options')
other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
#other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
other_group.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
other_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
Expand Down Expand Up @@ -179,7 +180,8 @@ subparser_test = subparsers.add_parser(
description='Run ARIBA on a small made up built-in test dataset'
)

subparser_test.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
#subparser_test.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
subparser_test.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
subparser_test.add_argument('outdir', help='Name of output directory')
subparser_test.set_defaults(func=ariba.tasks.test.run)

Expand Down