Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vfdb get full and core #122

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions ariba/ref_genes_getter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,21 @@ class Error (Exception): pass
from ariba import common, card_record, vfdb_parser


allowed_ref_dbs = {
'argannot',
'card',
'plasmidfinder',
'resfinder',
'srst2_argannot',
'vfdb_core',
'vfdb_full',
}

argannot_ref = '"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resistance genes in bacterial genomes",\nGupta et al 2014, PMID: 24145532\n'


class RefGenesGetter:
def __init__(self, ref_db, genetic_code=11, version=None):
allowed_ref_dbs = {'card', 'argannot', 'plasmidfinder', 'resfinder','srst2_argannot', 'vfdb'}
if ref_db not in allowed_ref_dbs:
raise Error('Error in RefGenesGetter. ref_db must be one of: ' + str(allowed_ref_dbs) + ', but I got "' + ref_db)
self.ref_db=ref_db
Expand All @@ -29,7 +38,7 @@ def __init__(self, ref_db, genetic_code=11, version=None):


def _download_file(self, url, outfile):
print('Downloading "', url, '" and saving as "', outfile, '" ...', end='', sep='')
print('Downloading "', url, '" and saving as "', outfile, '" ...', end='', sep='', flush=True)
for i in range(self.max_download_attempts):
time.sleep(self.sleep_time)
try:
Expand Down Expand Up @@ -360,7 +369,15 @@ def _get_from_srst2_argannot(self, outprefix):
print('and in your methods say that the ARG-ANNOT sequences were used from version', srst2_version, 'of SRST2.')


def _get_from_vfdb(self, outprefix):
def _get_from_vfdb_core(self, outprefix):
self._get_from_vfdb_common(outprefix, 'VFDB_setA_nt.fas.gz','core')


def _get_from_vfdb_full(self, outprefix):
self._get_from_vfdb_common(outprefix, 'VFDB_setB_nt.fas.gz','full')


def _get_from_vfdb_common(self, outprefix, filename, info_text):
outprefix = os.path.abspath(outprefix)
tmpdir = outprefix + '.tmp.download'

Expand All @@ -369,12 +386,13 @@ def _get_from_vfdb(self, outprefix):
except:
raise Error('Error mkdir ' + tmpdir)

zipfile = os.path.join(tmpdir, 'VFDB_setA_nt.fas.gz')
self._download_file('http://www.mgc.ac.cn/VFs/Down/VFDB_setA_nt.fas.gz', zipfile)
zipfile = os.path.join(tmpdir, filename)
self._download_file('http://www.mgc.ac.cn/VFs/Down/' + filename, zipfile)
print('Extracting files ... ', end='', flush=True)
vparser = vfdb_parser.VfdbParser(zipfile, outprefix)
vparser.run()
shutil.rmtree(tmpdir)
print('Extracted files.')
print('done')
final_fasta = outprefix + '.fa'
final_tsv = outprefix + '.tsv'

Expand All @@ -384,6 +402,7 @@ def _get_from_vfdb(self, outprefix):
print('If you use this downloaded data, please cite:')
print('"VFDB 2016: hierarchical and refined dataset for big data analysis-10 years on",\nChen LH et al 2016, Nucleic Acids Res. 44(Database issue):D694-D697. PMID: 26578559\n')


def run(self, outprefix):
exec('self._get_from_' + self.ref_db + '(outprefix)')

2 changes: 1 addition & 1 deletion scripts/ariba
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ subparser_flag.set_defaults(func=ariba.tasks.flag.run)


#---------------------------- getref ------------------------------------
allowed_dbs = ['argannot', 'card', 'plasmidfinder', 'resfinder', 'srst2_argannot', 'vfdb']
allowed_dbs = sorted(list(ariba.ref_genes_getter.allowed_ref_dbs))
subparser_getref = subparsers.add_parser(
'getref',
help='Download reference data',
Expand Down