sanger-pathogens · martinghunt · Aug 5, 2016 · Aug 5, 2016 · Aug 5, 2016 · Aug 5, 2016
diff --git a/ariba/ref_genes_getter.py b/ariba/ref_genes_getter.py
@@ -12,9 +12,12 @@ class Error (Exception): pass
 from ariba import common, card_record, vfdb_parser
 
 
+argannot_ref = '"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes",\nGupta et al 2014, PMID: 24145532\n'
+
+
 class RefGenesGetter:
     def __init__(self, ref_db, genetic_code=11):
-        allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','vfdb'}
+        allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','srst2_argannot', 'vfdb'}
         if ref_db not in allowed_ref_dbs:
             raise Error('Error in RefGenesGetter. ref_db must be one of: ' + str(allowed_ref_dbs) + ', but I got "' + ref_db)
         self.ref_db=ref_db
@@ -234,7 +237,7 @@ def _get_from_argannot(self, outprefix):
         print('You can use them with ARIBA like this:')
         print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
         print('If you use this downloaded data, please cite:')
-        print('"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes",\nGupta et al 2014, PMID: 24145532\n')
+        print(argannot_ref)
 
 
     def _get_from_plasmidfinder(self, outprefix):
@@ -289,6 +292,40 @@ def _get_from_plasmidfinder(self, outprefix):
         print('"PlasmidFinder and pMLST: in silico detection and typing of plasmids", Carattoli et al 2014, PMID: 24777092\n')
 
 
+    def _get_from_srst2_argannot(self, outprefix):
+        srst2_version = '0.2.0'
+        srst2_url = 'https://github.com/katholt/srst2/raw/v' + srst2_version + '/data/ARGannot.r1.fasta'
+        srst2_fa = outprefix + '.original.fa'
+        command = 'wget -O ' + srst2_fa + ' ' + srst2_url
+        common.syscall(command, verbose=True)
+
+        final_fasta = outprefix + '.fa'
+        final_tsv = outprefix + '.tsv'
+
+        f_out_fa = pyfastaq.utils.open_file_write(final_fasta)
+        f_out_meta = pyfastaq.utils.open_file_write(final_tsv)
+        seq_reader = pyfastaq.sequences.file_reader(srst2_fa)
+
+        for seq in seq_reader:
+            original_id = seq.id
+            name, extra = seq.id.split()
+            cluster_id, cluster_name, allele_name, allele_id = name.split('__')
+            seq.id = cluster_name + '.' + name
+            print(seq, file=f_out_fa)
+            print(seq.id, 1, 0, '.', '.', 'Original name: ' + original_id, sep='\t', file=f_out_meta)
+
+        pyfastaq.utils.close(f_out_fa)
+        pyfastaq.utils.close(f_out_meta)
+
+        print('Finished downloading and converting data. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
+        print('You can use them with ARIBA like this:')
+        print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
+        print('If you use this downloaded data, please cite:')
+        print('"SRST2: Rapid genomic surveillance for public health and hospital microbiology labs",\nInouye et al 2014, Genome Medicine, PMID: 25422674\n')
+        print(argannot_ref)
+        print('and in your methods say that the ARG-ANNOT sequences were used from version', srst2_version, 'of SRST2.')
+
+
     def _get_from_vfdb(self, outprefix):
         outprefix = os.path.abspath(outprefix)
         tmpdir = outprefix + '.tmp.download'

diff --git a/ariba/summary.py b/ariba/summary.py
@@ -380,7 +380,6 @@ def run(self):
 
         # sanity check same number of columns in headers and matrix
         lengths = {len(x) for x in matrix}
-        print(lengths, len(phandango_header), len(csv_header))
         assert len(lengths) == 1
         assert len(matrix[0]) == len(phandango_header) == len(csv_header)
 

diff --git a/scripts/ariba b/scripts/ariba
@@ -42,7 +42,7 @@ subparser_flag.set_defaults(func=ariba.tasks.flag.run)
 
 
 #---------------------------- getref ------------------------------------
-allowed_dbs = ['argannot', 'card', 'plasmidfinder', 'resfinder','vfdb']
+allowed_dbs = ['argannot', 'card', 'plasmidfinder', 'resfinder', 'srst2_argannot', 'vfdb']
 subparser_getref = subparsers.add_parser(
     'getref',
     help='Download reference data',
@@ -138,7 +138,8 @@ assembly_group.add_argument('--assembly_cov', type=int, help='Target read covera
 assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs [%(default)s]', default=10, metavar='INT')
 
 other_group = subparser_run.add_argument_group('Other options')
-other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
+#other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
+other_group.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
 other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
 other_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
 other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
@@ -179,7 +180,8 @@ subparser_test = subparsers.add_parser(
     description='Run ARIBA on a small made up built-in test dataset'
 )
 
-subparser_test.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
+#subparser_test.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
+subparser_test.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
 subparser_test.add_argument('outdir', help='Name of output directory')
 subparser_test.set_defaults(func=ariba.tasks.test.run)