Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.5.1 #328

Merged
merged 4 commits into from
Jun 24, 2021
Merged

1.5.1 #328

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/src/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
Change log
==========

1.5.1
-----

* (`#327 <https://github.com/Ecogenomics/GTDBTk/issues/327>`_) Disallow spaces in genome names/file paths due to downstream application issues.
* (`#326 <https://github.com/Ecogenomics/GTDBTk/issues/326>`_) Disallow genome names that are blank.

1.5.0
-----

Expand Down
2 changes: 1 addition & 1 deletion gtdbtk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@
__status__ = 'Production'
__title__ = 'GTDB-Tk'
__url__ = 'https://github.com/Ecogenomics/GTDBTk'
__version__ = '1.5.0'
__version__ = '1.5.1'
29 changes: 24 additions & 5 deletions gtdbtk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _check_package_compatibility(self):
f'intended for this release: {Config.MIN_REF_DATA_VERSION}',
['bright'], fg='yellow'))

def _verify_genome_id(self, genome_id):
def _verify_genome_id(self, genome_id: str) -> bool:
"""Ensure genome ID will be valid in Newick tree.

Parameters
Expand All @@ -91,16 +91,27 @@ def _verify_genome_id(self, genome_id):

Raises
------
GenomeNameInvalid
GTDBTkExit
If the genome identifier contains illegal characters.
"""

invalid_chars = set('()[],;=')
if genome_id is None or not isinstance(genome_id, str):
raise GTDBTkExit(f'The genome name is not a valid string: {genome_id}')
if len(genome_id) == 0:
raise GTDBTkExit('Genome name cannot be blank, check for input files '
'without a name, or empty columns in the batchfile.')
invalid_chars = frozenset('()[],;= ')
if any((c in invalid_chars) for c in genome_id):
self.logger.error(f'Invalid genome ID: {genome_id}')
self.logger.error(f'The following characters are invalid: '
f'{" ".join(invalid_chars)}')
raise GenomeNameInvalid(f'Invalid genome ID: {genome_id}')
raise GTDBTkExit(f'Invalid genome ID: {genome_id}')
return True

@staticmethod
def _verify_file_path(file_path: str) -> bool:
if ' ' in file_path:
raise GTDBTkExit(f'The genome path contains a space, this is '
f'unsupported by downstream applications: {file_path}')
return True

def _genomes_to_process(self, genome_dir, batchfile, extension):
Expand Down Expand Up @@ -136,6 +147,10 @@ def _genomes_to_process(self, genome_dir, batchfile, extension):
for genome_key in genomic_files:
self._verify_genome_id(genome_key)

# Check that there are no illegal characters in the file path
for file_path in genomic_files.values():
self._verify_file_path(file_path)

# Check that the prefix is valid and the path exists
invalid_paths = list()
for genome_key, genome_path in genomic_files.items():
Expand Down Expand Up @@ -620,6 +635,10 @@ def parse_options(self, options):
check_dependencies(['prodigal', 'hmmalign'])
check_dependencies(['FastTree' + ('MP' if options.cpus > 1 else '')])

if options.skip_gtdb_refs and options.custom_taxonomy_file is None:
raise GTDBTkExit("When running de_novo_wf, The '--skip_gtdb_refs' flag requires"
"'--custom_taxonomy_file' to be included to the command line.")

options.write_single_copy_genes = False
self.identify(options)

Expand Down