Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make sspace and gapfiller optional #17

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@ Installation
ARIBA has the following dependencies, which need to be installed:
* [cd-hit] [cdhit] version >= 4.6
* [samtools and bcftools] [samtools] version >= 1.2
* [SSPACE-basic scaffolder] [sspace]
* [GapFiller] [gapfiller]
* [MUMmer] [mummer] version >= 3.23
* [SMALT] [smalt] version >= 0.7.4
* Either [SPAdes] [spades] version >= 3.5.0 or [Velvet] [velvet] version >= 1.2.07
(SPAdes is recommended)

ARIBA has the following optional dependencies. If they are installed,
they will be used. Otherwise scaffolding and gap filling will be
skipped.
* [SSPACE-basic scaffolder] [sspace]
* [GapFiller] [gapfiller]

Once the dependencies are installed, install ARIBA using pip:

pip3 install pyfastaq
Expand Down
26 changes: 16 additions & 10 deletions ariba/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,18 @@ def __init__(self,

self.bcftools_exe = bcftools_exe

self.gapfiller_exe = shutil.which(gapfiller_exe)
if self.gapfiller_exe is None:
raise Error('Error! ' + gapfiller_exe + ' not found in path')
self.gapfiller_exe = os.path.realpath(self.gapfiller_exe) # otherwise gapfiller dies loading packages
self.sspace_exe = shutil.which(sspace_exe)
if self.sspace_exe is None:
self.gapfiller_exe = None
else:
self.sspace_exe = os.path.realpath(self.sspace_exe) # otherwise sspace dies loading packages
self.gapfiller_exe = shutil.which(gapfiller_exe)
if self.gapfiller_exe is not None:
self.gapfiller_exe = os.path.realpath(self.gapfiller_exe) # otherwise gapfiller dies loading packages

self.samtools_exe = samtools_exe
self.smalt_exe = smalt_exe

self.sspace_exe = shutil.which(sspace_exe)
if self.sspace_exe is None:
raise Error('Error! ' + sspace_exe + ' not found in path')
self.sspace_exe = os.path.realpath(self.sspace_exe) # otherwise sspace dies loading packages

if self.assembler == 'velvet':
self.velveth = velvet_exe + 'h'
self.velvetg = velvet_exe + 'g'
Expand Down Expand Up @@ -305,6 +304,13 @@ def _scaffold_with_sspace(self):
raise Error('Error mkdir '+ self.scaffold_dir)

cwd = os.getcwd()

if self.sspace_exe is None:
os.chdir(self.assembly_dir)
os.symlink(os.path.basename(self.assembly_contigs), os.path.basename(self.scaffolder_scaffolds))
os.chdir(cwd)
return

os.chdir(self.scaffold_dir)
lib_file = 'lib'
with open(lib_file, 'w') as f:
Expand Down Expand Up @@ -339,7 +345,7 @@ def _gap_fill_with_gapfiller(self):

cwd = os.getcwd()

if not self._has_gaps_to_fill(self.scaffolder_scaffolds):
if self.gapfiller_exe is None or not self._has_gaps_to_fill(self.scaffolder_scaffolds):
self._rename_scaffolds(self.scaffolder_scaffolds, self.gapfilled_scaffolds)
return

Expand Down
20 changes: 11 additions & 9 deletions ariba/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,21 @@ def __init__(self,

self.bcftools_exe = bcftools_exe

self.gapfiller_exe = shutil.which(gapfiller_exe)
if self.gapfiller_exe is None:
raise Error('Error! ' + gapfiller_exe + ' not found in path')
self.gapfiller_exe = os.path.realpath(self.gapfiller_exe) # otherwise gapfiller dies loading packages
self.sspace_exe = shutil.which(sspace_exe)
if self.sspace_exe is None:
print('WARNING: SSPACE not found. Scaffolding and gap filling will be skipped!', file=sys.stderr)
self.gapfiller_exe = None
else:
self.sspace_exe = os.path.realpath(self.sspace_exe) # otherwise sspace dies loading packages
self.gapfiller_exe = shutil.which(gapfiller_exe)
if self.gapfiller_exe is None:
print('WARNING: GapFiller not found. No gap filling will be run after scaffolding!', file=sys.stderr)
else:
self.gapfiller_exe = os.path.realpath(self.gapfiller_exe) # otherwise gapfiller dies loading packages

self.samtools_exe = samtools_exe
self.spades_exe = spades_exe

self.sspace_exe = shutil.which(sspace_exe)
if self.sspace_exe is None:
raise Error('Error! ' + sspace_exe + ' not found in path')
self.sspace_exe = os.path.realpath(self.sspace_exe) # otherwise sspace dies loading packages

self.velvet = velvet_exe

self.cdhit_seq_identity_threshold = cdhit_seq_identity_threshold
Expand Down
29 changes: 23 additions & 6 deletions ariba/external_progs.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,16 @@ def set_path(prog, opts):
exec('opts.' + prog + ' = "' + prog_to_default[prog] + '"')


def get_version(prog, path=None):
def get_version(prog, path=None, raise_error=True):
assert prog in prog_to_version_cmd
if path is None:
path = prog

if not is_in_path(path):
raise Error('Error getting version of ' + path + ' - not found in path.')
if raise_error:
raise Error('Error getting version of ' + path + ' - not found in path.')
else:
return 'Not_in_path', 'Not_in_path'

path = shutil.which(path)

Expand All @@ -100,7 +103,10 @@ def get_version(prog, path=None):
return 'UNKNOWN ...\n I tried running this to get the version: "' + cmd + '"\n and the output didn\'t match this regular expression: "' + regex.pattern + '"', path


def check_versions(opts, verbose=False):
def check_versions(opts, verbose=False, not_required=None):
if not_required is None:
not_required = set()

if verbose:
print('{:_^79}'.format(' Checking dependencies and their versions '))
print('tool', 'version', 'path', sep='\t')
Expand All @@ -124,22 +130,33 @@ def check_versions(opts, verbose=False):
raise Error('Assembler ' + opts.assembler + ' not recognised. Cannot continue')

errors = []
failed_to_find = set()

for prog in to_check:
set_path(prog, opts)
version, path = get_version(prog, path=eval('opts.' + prog))
version, path = get_version(prog, path=eval('opts.' + prog), raise_error=prog not in not_required)
if verbose:
print(prog, version, path, sep='\t')
if path == 'Not_in_path':
print('\nWARNING:', prog, 'not found in path, so will be skipped during assembly\n', file=sys.stderr)

if prog in min_versions and LooseVersion(version) < LooseVersion(min_versions[prog]):
errors.append(' '.join(['Found version', version, 'of', prog, 'which is too low! Please update to at least', min_versions[prog] + '\n Found it here:', path]))
failed_to_find.add(prog)

if len(errors):
for e in errors:
print('\n*** Error! Bad dependency! ***', file=sys.stderr)
print(e, file=sys.stderr)
print()
raise Error('Cannot continue. Some dependencies need updating')
if len(failed_to_find.difference(not_required)) > 0:
raise Error('Cannot continue. Some dependencies need updating')
else:
assert failed_to_find.issubset(not_required)
if 'sspace' in failed_to_find:
print('WARNING: SSPACE not found. Will not run scaffolding or gap filling', file=sys.stderr)
elif 'gapfiller' in failed_to_find:
print('WARNING: GapFiller not found. Will not run gap filling after scaffolding', file=sys.stderr)

if verbose:
print('\n... dependencies look OK.\n')
print('\nDependencies look OK (but check in case there are warnings about SSPACE or GapFiller)\n')
2 changes: 1 addition & 1 deletion ariba/tasks/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run():
options = parser.parse_args()
if options.assembler == 'velvet':
options.velvet = 'velvet'
ariba.external_progs.check_versions(options, verbose=options.verbose)
ariba.external_progs.check_versions(options, verbose=options.verbose, not_required=set(['sspace', 'gapfiller']))
pyfastaq.sequences.genetic_code = options.genetic_code

c = ariba.clusters.Clusters(
Expand Down