Skip to content

Commit

Permalink
Merge pull request #121 from martinghunt/card_add_version_option
Browse files Browse the repository at this point in the history
Add option to choose download version
  • Loading branch information
martinghunt authored Aug 12, 2016
2 parents 6533818 + b8d4d68 commit 2b3268b
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 5 deletions.
42 changes: 38 additions & 4 deletions ariba/ref_genes_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ class Error (Exception): pass


class RefGenesGetter:
def __init__(self, ref_db, genetic_code=11):
def __init__(self, ref_db, genetic_code=11, version=None):
allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','srst2_argannot', 'vfdb'}
if ref_db not in allowed_ref_dbs:
raise Error('Error in RefGenesGetter. ref_db must be one of: ' + str(allowed_ref_dbs) + ', but I got "' + ref_db)
self.ref_db=ref_db
self.genetic_code = genetic_code
self.max_download_attempts = 3
self.sleep_time = 2
self.version = version
pyfastaq.sequences.genetic_code = self.genetic_code


Expand All @@ -41,6 +42,30 @@ def _download_file(self, url, outfile):
print(' done', flush=True)


def _get_card_versions(self, tmp_file):
print('Getting available CARD versions')
self._download_file('https://card.mcmaster.ca/download', tmp_file)
p = re.compile(r'''href="(/download/.*?broad.*?v([0-9]+\.[0-9]+\.[0-9]+)\.tar\.gz)"''')
versions = {}

with open(tmp_file) as f:
for line in f:
got = p.findall(line)
for match in got:
key = tuple([int(x) for x in match[1].split('.')])
versions[key] = 'https://card.mcmaster.ca' + match[0]

if len(versions) == 0:
raise Error('Error getting CARD versions. Cannot continue')

print('Found versions:')

for key, url in sorted(versions.items()):
print('.'.join([str(x) for x in key]), url, sep='\t')

os.unlink(tmp_file)
return versions


def _get_from_card(self, outprefix):
outprefix = os.path.abspath(outprefix)
Expand All @@ -53,8 +78,17 @@ def _get_from_card(self, outprefix):
except:
raise Error('Error mkdir/chdir ' + tmpdir)

card_version = '1.0.9'
card_tarball_url = 'https://card.mcmaster.ca/download/0/broadstreet-v' + card_version + '.tar.gz'
versions = self._get_card_versions('download.html')
if self.version is not None:
key = tuple([int(x) for x in self.version.split('.')])
if key not in versions:
raise Error('Error! Did not find requested version ' + self.version)
else:
key = sorted(list(versions.keys()))[-1]
self.version = '.'.join([str(x) for x in key])

print('Getting version', self.version)
card_tarball_url = versions[key]
card_tarball = 'card.tar.gz'
print('Working in temporary directory', tmpdir)
print('Downloading data from card:', card_tarball_url, flush=True)
Expand Down Expand Up @@ -149,7 +183,7 @@ def _get_from_card(self, outprefix):
print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
print('If you use this downloaded data, please cite:')
print('"The Comprehensive Antibiotic Resistance Database", McArthur et al 2013, PMID: 23650175')
print('and in your methods say that version', card_version, 'of the database was used')
print('and in your methods say that version', self.version, 'of the database was used')


def _get_from_resfinder(self, outprefix):
Expand Down
6 changes: 5 additions & 1 deletion ariba/tasks/getref.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@


def run(options):
getter = ref_genes_getter.RefGenesGetter(options.db, genetic_code=options.genetic_code)
getter = ref_genes_getter.RefGenesGetter(
options.db,
genetic_code=options.genetic_code,
version=options.version
)
getter.run(options.outprefix)

1 change: 1 addition & 0 deletions scripts/ariba
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ subparser_getref = subparsers.add_parser(
description='Download reference data from one of a few supported public resources',
)
subparser_getref.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card')
subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name")
subparser_getref.add_argument('outprefix', help='Prefix of output filenames')
subparser_getref.set_defaults(func=ariba.tasks.getref.run)
Expand Down

0 comments on commit 2b3268b

Please sign in to comment.