Skip to content

Commit

Permalink
Merge pull request #118 from martinghunt/add_srst2_argannot
Browse files Browse the repository at this point in the history
Add srst2 argannot
  • Loading branch information
martinghunt authored Aug 5, 2016
2 parents 3dbc431 + 0b27284 commit 32d7956
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 6 deletions.
41 changes: 39 additions & 2 deletions ariba/ref_genes_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ class Error (Exception): pass
from ariba import common, card_record, vfdb_parser


argannot_ref = '"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes",\nGupta et al 2014, PMID: 24145532\n'


class RefGenesGetter:
def __init__(self, ref_db, genetic_code=11):
allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','vfdb'}
allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','srst2_argannot', 'vfdb'}
if ref_db not in allowed_ref_dbs:
raise Error('Error in RefGenesGetter. ref_db must be one of: ' + str(allowed_ref_dbs) + ', but I got "' + ref_db)
self.ref_db=ref_db
Expand Down Expand Up @@ -234,7 +237,7 @@ def _get_from_argannot(self, outprefix):
print('You can use them with ARIBA like this:')
print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
print('If you use this downloaded data, please cite:')
print('"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes",\nGupta et al 2014, PMID: 24145532\n')
print(argannot_ref)


def _get_from_plasmidfinder(self, outprefix):
Expand Down Expand Up @@ -289,6 +292,40 @@ def _get_from_plasmidfinder(self, outprefix):
print('"PlasmidFinder and pMLST: in silico detection and typing of plasmids", Carattoli et al 2014, PMID: 24777092\n')


def _get_from_srst2_argannot(self, outprefix):
srst2_version = '0.2.0'
srst2_url = 'https://github.com/katholt/srst2/raw/v' + srst2_version + '/data/ARGannot.r1.fasta'
srst2_fa = outprefix + '.original.fa'
command = 'wget -O ' + srst2_fa + ' ' + srst2_url
common.syscall(command, verbose=True)

final_fasta = outprefix + '.fa'
final_tsv = outprefix + '.tsv'

f_out_fa = pyfastaq.utils.open_file_write(final_fasta)
f_out_meta = pyfastaq.utils.open_file_write(final_tsv)
seq_reader = pyfastaq.sequences.file_reader(srst2_fa)

for seq in seq_reader:
original_id = seq.id
name, extra = seq.id.split()
cluster_id, cluster_name, allele_name, allele_id = name.split('__')
seq.id = cluster_name + '.' + name
print(seq, file=f_out_fa)
print(seq.id, 1, 0, '.', '.', 'Original name: ' + original_id, sep='\t', file=f_out_meta)

pyfastaq.utils.close(f_out_fa)
pyfastaq.utils.close(f_out_meta)

print('Finished downloading and converting data. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
print('You can use them with ARIBA like this:')
print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
print('If you use this downloaded data, please cite:')
print('"SRST2: Rapid genomic surveillance for public health and hospital microbiology labs",\nInouye et al 2014, Genome Medicine, PMID: 25422674\n')
print(argannot_ref)
print('and in your methods say that the ARG-ANNOT sequences were used from version', srst2_version, 'of SRST2.')


def _get_from_vfdb(self, outprefix):
outprefix = os.path.abspath(outprefix)
tmpdir = outprefix + '.tmp.download'
Expand Down
1 change: 0 additions & 1 deletion ariba/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,6 @@ def run(self):

# sanity check same number of columns in headers and matrix
lengths = {len(x) for x in matrix}
print(lengths, len(phandango_header), len(csv_header))
assert len(lengths) == 1
assert len(matrix[0]) == len(phandango_header) == len(csv_header)

Expand Down
8 changes: 5 additions & 3 deletions scripts/ariba
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ subparser_flag.set_defaults(func=ariba.tasks.flag.run)


#---------------------------- getref ------------------------------------
allowed_dbs = ['argannot', 'card', 'plasmidfinder', 'resfinder','vfdb']
allowed_dbs = ['argannot', 'card', 'plasmidfinder', 'resfinder', 'srst2_argannot', 'vfdb']
subparser_getref = subparsers.add_parser(
'getref',
help='Download reference data',
Expand Down Expand Up @@ -138,7 +138,8 @@ assembly_group.add_argument('--assembly_cov', type=int, help='Target read covera
assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs [%(default)s]', default=10, metavar='INT')

other_group = subparser_run.add_argument_group('Other options')
other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
#other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
other_group.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
other_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
Expand Down Expand Up @@ -179,7 +180,8 @@ subparser_test = subparsers.add_parser(
description='Run ARIBA on a small made up built-in test dataset'
)

subparser_test.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
#subparser_test.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
subparser_test.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
subparser_test.add_argument('outdir', help='Name of output directory')
subparser_test.set_defaults(func=ariba.tasks.test.run)

Expand Down

0 comments on commit 32d7956

Please sign in to comment.