Skip to content

Commit

Permalink
Add the convert_to_itol function for #373
Browse files Browse the repository at this point in the history
  • Loading branch information
pchaumeil committed Apr 7, 2022
1 parent da97fb4 commit 0d95182
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 9 deletions.
12 changes: 7 additions & 5 deletions gtdbtk/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,13 @@ def print_help():
decorate -> Decorate tree with GTDB taxonomy
Tools:
infer_ranks -> Establish taxonomic ranks of internal nodes using RED
ani_rep -> Calculates ANI to GTDB representative genomes
trim_msa -> Trim an untrimmed MSA file based on a mask
export_msa -> Export the untrimmed archaeal or bacterial MSA file
remove_labels -> Remove labels (bootstrap values, node labels) from an Newick tree
infer_ranks -> Establish taxonomic ranks of internal nodes using RED
ani_rep -> Calculates ANI to GTDB representative genomes
trim_msa -> Trim an untrimmed MSA file based on a mask
export_msa -> Export the untrimmed archaeal or bacterial MSA file
remove_labels -> Remove labels (bootstrap values, node labels) from an Newick tree
convert_to_itol -> Convert a GTDB-Tk Newick tree to an iTOL tree
Testing:
test -> Validate the classify_wf pipeline with 3 archaeal genomes
Expand Down
11 changes: 10 additions & 1 deletion gtdbtk/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,16 @@ def get_main_parser():

# Remove labels
with subparser(sub_parsers, 'remove_labels', 'Remove labels (bootstrap values, node labels) from an Newick tree to '
'to improve compatibility with tree viewers') as parser:
'to improve compatibility with tree viewers.') as parser:
with arg_group(parser, 'required named arguments') as grp:
__input_tree(grp, required=True)
__output_tree(grp, required=True)
with arg_group(parser, 'optional arguments') as grp:
__debug(grp)
__help(grp)

# Remove labels
with subparser(sub_parsers, 'convert_to_itol', 'Reformat the GTDB-Tk tree to be iTOL compatible.') as parser:
with arg_group(parser, 'required named arguments') as grp:
__input_tree(grp, required=True)
__output_tree(grp, required=True)
Expand Down
2 changes: 1 addition & 1 deletion gtdbtk/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@
MASK_DIR: '50e414a9de18170e8cb97f990f89ff60a0fe29d5',
MARKER_DIR: '163f542c3f0a40f59df45d453aa235b39aa96e27',
RADII_DIR: '8fd13b1c5d7a7b073ba96fb628581613b293a374',
MSA_FOLDER: '4bd032c90d5e5f0cbc96338445721a317f7d90b4',
MSA_FOLDER: '24f250d7cf0eb0bc65dccd2f3c9247e553ea322f',
METADATA_DIR: '9772fbeac1311b31e10293fa610eb33aa1ec8e15',
TAX_FOLDER: '6fb0233b05633242369b40c026fd1ee53e266afa',
FASTANI_DIR: '973c456c02f55bb82908a6811c7076e207e9b206',
Expand Down
22 changes: 21 additions & 1 deletion gtdbtk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def run_test(self, options):
shutil.copytree(input_dir, genome_test_dir)

args = ['gtdbtk', 'classify_wf', '--genome_dir', genome_test_dir,
'--out_dir', output_dir, '--cpus', str(options.cpus)]
'--out_dir', output_dir, '--cpus', str(options.cpus), '-f']
self.logger.info('Command: {}'.format(' '.join(args)))

# Pipe the output and write to disk.
Expand Down Expand Up @@ -464,6 +464,10 @@ def classify(self, options):
fulltreeopt=options.full_tree,
recalculate_red=False)

self.logger.info('Note that Tk classification mode is insufficient for publication of new taxonomic '
'designations. New designations should be based on one or more de novo trees, an '
'example of which can be produced by Tk in de novo mode.')

self.logger.info('Done.')

def trim_msa(self, options):
Expand Down Expand Up @@ -624,6 +628,20 @@ def ani_rep(self, options):

self.logger.info('Done.')

def convert_to_itol(self, options):
"""Convert Tree to iTOL format.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
check_file_exists(options.input_tree)

r = Misc()
r.convert_to_itol(options.input_tree, options.output_tree)
self.logger.info('Done.')

def remove_labels(self, options):
"""Remove labels from tree.
Expand Down Expand Up @@ -826,6 +844,8 @@ def parse_options(self, options):
self.ani_rep(options)
elif options.subparser_name == 'remove_labels':
self.remove_labels(options)
elif options.subparser_name == 'convert_to_itol':
self.convert_to_itol(options)
elif options.subparser_name == 'trim_msa':
self.trim_msa(options)
elif options.subparser_name == 'export_msa':
Expand Down
32 changes: 31 additions & 1 deletion gtdbtk/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import gtdbtk.config.config as Config
from gtdbtk.biolib_lite.execute import check_dependencies
from gtdbtk.biolib_lite.logger import colour
from gtdbtk.biolib_lite.newick import parse_label
from gtdbtk.biolib_lite.seq_io import read_fasta
from gtdbtk.config.output import DIR_CLASSIFY_INTERMEDIATE, DIR_ALIGN_INTERMEDIATE, DIR_IDENTIFY_INTERMEDIATE
from gtdbtk.exceptions import GTDBTkException, GTDBTkExit
Expand Down Expand Up @@ -126,7 +127,36 @@ def remove_labels(self, input_file, output_file):
for node in intree.internal_nodes():
node.label = None

intree.write_to_path(output_file, schema='newick', suppress_rooting=True)
intree.write_to_path(output_file, schema='newick', suppress_rooting=True,unquoted_underscores=True)


def convert_to_itol(self, input_file, output_file):
"""Remove labels from a Newick Tree.
Parameters
----------
input_file : str
The path to the input Newick tree.
output_file : str
The path to the output Newick tree.
"""

self.logger.info("Convert GTDB-Tk tree to iTOL format")
intree= dendropy.Tree.get_from_path(input_file,
schema='newick',
rooting='force-rooted',
preserve_underscores=True)

for node in intree.internal_nodes():
if node.label:
bootstrap,label,_aux = parse_label(node.label)
if label:
label = label.replace('; ',';').replace(';','|').replace("'","").lstrip('')
node.label = label
if node.edge.length:
node.edge.length = f'{node.edge.length}[{bootstrap}]'

intree.write_to_path(output_file, schema='newick', suppress_rooting=True,unquoted_underscores=True)


def remove_intermediate_files(self,output_dir,wf_name):
Expand Down

0 comments on commit 0d95182

Please sign in to comment.