diff --git a/ariba/ref_genes_getter.py b/ariba/ref_genes_getter.py index 236937de..14052b35 100644 --- a/ariba/ref_genes_getter.py +++ b/ariba/ref_genes_getter.py @@ -7,6 +7,7 @@ class Error (Exception): pass import pyfastaq import time import json +import subprocess import sys from ariba import common, card_record, vfdb_parser, megares_data_finder, megares_zip_parser @@ -187,6 +188,19 @@ def _get_from_card(self, outprefix): print('and in your methods say that version', self.version, 'of the database was used') + @classmethod + def _get_genetic_epi_database_from_bitbucket(cls, db_name, outdir, git_commit=None): + assert db_name in {'plasmidfinder', 'resfinder', 'virulence_finder'} + cmd = 'git clone ' + 'https://bitbucket.org/genomicepidemiology/' + db_name + '_db.git ' + outdir + common.syscall(cmd) + + if git_commit is not None: + common.syscall('cd ' + outdir + ' && git checkout ' + git_commit) + + print('Using this git commit for ' + db_name + ' database:') + subprocess.check_call('cd ' + outdir + ' && git log -n 1', shell=True) + + def _get_from_resfinder(self, outprefix): outprefix = os.path.abspath(outprefix) final_fasta = outprefix + '.fa' @@ -194,17 +208,22 @@ def _get_from_resfinder(self, outprefix): tmpdir = outprefix + '.tmp.download' current_dir = os.getcwd() - try: - os.mkdir(tmpdir) + if self.version =='old': + try: + os.mkdir(tmpdir) + os.chdir(tmpdir) + except: + raise Error('Error mkdir/chdir ' + tmpdir) + + zipfile = 'resfinder.zip' + cmd = 'curl -X POST --data "folder=resfinder&filename=resfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php' + print('Downloading data with:', cmd, sep='\n') + common.syscall(cmd) + common.syscall('unzip ' + zipfile) + else: + RefGenesGetter._get_genetic_epi_database_from_bitbucket('resfinder', tmpdir, git_commit=self.version) os.chdir(tmpdir) - except: - raise Error('Error mkdir/chdir ' + tmpdir) - zipfile = 'resfinder.zip' - cmd = 'curl -X POST --data "folder=resfinder&filename=resfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php' - print('Downloading data with:', cmd, sep='\n') - common.syscall(cmd) - common.syscall('unzip ' + zipfile) print('Combining downloaded fasta files...') fout_fa = pyfastaq.utils.open_file_write(final_fasta) @@ -223,7 +242,7 @@ def _get_from_resfinder(self, outprefix): except: description = '.' - # names are not unique across the files + # names are not unique across the files if seq.id in used_names: used_names[seq.id] += 1 seq.id += '_' + str(used_names[seq.id]) @@ -311,17 +330,21 @@ def _get_from_plasmidfinder(self, outprefix): tmpdir = outprefix + '.tmp.download' current_dir = os.getcwd() - try: - os.mkdir(tmpdir) + if self.version == 'old': + try: + os.mkdir(tmpdir) + os.chdir(tmpdir) + except: + raise Error('Error mkdir/chdir ' + tmpdir) + + zipfile = 'plasmidfinder.zip' + cmd = 'curl -X POST --data "folder=plasmidfinder&filename=plasmidfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php' + print('Downloading data with:', cmd, sep='\n') + common.syscall(cmd) + common.syscall('unzip ' + zipfile) + else: + RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version) os.chdir(tmpdir) - except: - raise Error('Error mkdir/chdir ' + tmpdir) - - zipfile = 'plasmidfinder.zip' - cmd = 'curl -X POST --data "folder=plasmidfinder&filename=plasmidfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php' - print('Downloading data with:', cmd, sep='\n') - common.syscall(cmd) - common.syscall('unzip ' + zipfile) print('Combining downloaded fasta files...') fout_fa = pyfastaq.utils.open_file_write(final_fasta) @@ -460,17 +483,21 @@ def _get_from_virulencefinder(self, outprefix): tmpdir = outprefix + '.tmp.download' current_dir = os.getcwd() - try: - os.mkdir(tmpdir) + if self.version == 'old': + try: + os.mkdir(tmpdir) + os.chdir(tmpdir) + except: + raise Error('Error mkdir/chdir ' + tmpdir) + + zipfile = 'plasmidfinder.zip' + cmd = 'curl -X POST --data "folder=virulencefinder&filename=virulencefinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php' + print('Downloading data with:', cmd, sep='\n') + common.syscall(cmd) + common.syscall('unzip ' + zipfile) + else: + RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version) os.chdir(tmpdir) - except: - raise Error('Error mkdir/chdir ' + tmpdir) - - zipfile = 'plasmidfinder.zip' - cmd = 'curl -X POST --data "folder=virulencefinder&filename=virulencefinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php' - print('Downloading data with:', cmd, sep='\n') - common.syscall(cmd) - common.syscall('unzip ' + zipfile) print('Combining downloaded fasta files...') fout_fa = pyfastaq.utils.open_file_write(final_fasta) diff --git a/scripts/ariba b/scripts/ariba index 8cf4a51c..6a6f9ad1 100755 --- a/scripts/ariba +++ b/scripts/ariba @@ -62,7 +62,7 @@ subparser_getref = subparsers.add_parser( description='Download reference data from one of a few supported public resources', ) subparser_getref.add_argument('--debug', action='store_true', help='Do not delete temporary downloaded files') -subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card and megares') +subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card, megares, plasmidfinder, resfinder, virulencefinder. For plasmid/res/virulencefinder: default is to get latest from bitbucket - supply git commit hash to get a specific version from bitbucket, or use "old " to get from old website.') subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name") subparser_getref.add_argument('outprefix', help='Prefix of output filenames') subparser_getref.set_defaults(func=ariba.tasks.getref.run)