Skip to content

Commit

Permalink
Merge pull request #222 from martinghunt/getref_xfinder_bitbucket
Browse files Browse the repository at this point in the history
Getref xfinder bitbucket
  • Loading branch information
martinghunt authored May 13, 2018
2 parents a7cd6a7 + 5f85c47 commit d58db1e
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 31 deletions.
87 changes: 57 additions & 30 deletions ariba/ref_genes_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class Error (Exception): pass
import pyfastaq
import time
import json
import subprocess
import sys
from ariba import common, card_record, vfdb_parser, megares_data_finder, megares_zip_parser

Expand Down Expand Up @@ -187,24 +188,42 @@ def _get_from_card(self, outprefix):
print('and in your methods say that version', self.version, 'of the database was used')


@classmethod
def _get_genetic_epi_database_from_bitbucket(cls, db_name, outdir, git_commit=None):
assert db_name in {'plasmidfinder', 'resfinder', 'virulence_finder'}
cmd = 'git clone ' + 'https://bitbucket.org/genomicepidemiology/' + db_name + '_db.git ' + outdir
common.syscall(cmd)

if git_commit is not None:
common.syscall('cd ' + outdir + ' && git checkout ' + git_commit)

print('Using this git commit for ' + db_name + ' database:')
subprocess.check_call('cd ' + outdir + ' && git log -n 1', shell=True)


def _get_from_resfinder(self, outprefix):
outprefix = os.path.abspath(outprefix)
final_fasta = outprefix + '.fa'
final_tsv = outprefix + '.tsv'
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()

try:
os.mkdir(tmpdir)
if self.version =='old':
try:
os.mkdir(tmpdir)
os.chdir(tmpdir)
except:
raise Error('Error mkdir/chdir ' + tmpdir)

zipfile = 'resfinder.zip'
cmd = 'curl -X POST --data "folder=resfinder&filename=resfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)
else:
RefGenesGetter._get_genetic_epi_database_from_bitbucket('resfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
except:
raise Error('Error mkdir/chdir ' + tmpdir)

zipfile = 'resfinder.zip'
cmd = 'curl -X POST --data "folder=resfinder&filename=resfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)

print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
Expand All @@ -223,7 +242,7 @@ def _get_from_resfinder(self, outprefix):
except:
description = '.'

# names are not unique across the files
# names are not unique across the files
if seq.id in used_names:
used_names[seq.id] += 1
seq.id += '_' + str(used_names[seq.id])
Expand Down Expand Up @@ -311,17 +330,21 @@ def _get_from_plasmidfinder(self, outprefix):
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()

try:
os.mkdir(tmpdir)
if self.version == 'old':
try:
os.mkdir(tmpdir)
os.chdir(tmpdir)
except:
raise Error('Error mkdir/chdir ' + tmpdir)

zipfile = 'plasmidfinder.zip'
cmd = 'curl -X POST --data "folder=plasmidfinder&filename=plasmidfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)
else:
RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
except:
raise Error('Error mkdir/chdir ' + tmpdir)

zipfile = 'plasmidfinder.zip'
cmd = 'curl -X POST --data "folder=plasmidfinder&filename=plasmidfinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)

print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
Expand Down Expand Up @@ -460,17 +483,21 @@ def _get_from_virulencefinder(self, outprefix):
tmpdir = outprefix + '.tmp.download'
current_dir = os.getcwd()

try:
os.mkdir(tmpdir)
if self.version == 'old':
try:
os.mkdir(tmpdir)
os.chdir(tmpdir)
except:
raise Error('Error mkdir/chdir ' + tmpdir)

zipfile = 'plasmidfinder.zip'
cmd = 'curl -X POST --data "folder=virulencefinder&filename=virulencefinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)
else:
RefGenesGetter._get_genetic_epi_database_from_bitbucket('plasmidfinder', tmpdir, git_commit=self.version)
os.chdir(tmpdir)
except:
raise Error('Error mkdir/chdir ' + tmpdir)

zipfile = 'plasmidfinder.zip'
cmd = 'curl -X POST --data "folder=virulencefinder&filename=virulencefinder.zip" -o ' + zipfile + ' https://cge.cbs.dtu.dk/cge/download_data.php'
print('Downloading data with:', cmd, sep='\n')
common.syscall(cmd)
common.syscall('unzip ' + zipfile)

print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
Expand Down
2 changes: 1 addition & 1 deletion scripts/ariba
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ subparser_getref = subparsers.add_parser(
description='Download reference data from one of a few supported public resources',
)
subparser_getref.add_argument('--debug', action='store_true', help='Do not delete temporary downloaded files')
subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card and megares')
subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card, megares, plasmidfinder, resfinder, virulencefinder. For plasmid/res/virulencefinder: default is to get latest from bitbucket - supply git commit hash to get a specific version from bitbucket, or use "old " to get from old website.')
subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name")
subparser_getref.add_argument('outprefix', help='Prefix of output filenames')
subparser_getref.set_defaults(func=ariba.tasks.getref.run)
Expand Down

0 comments on commit d58db1e

Please sign in to comment.