From bafb22501240c54fe8b38aef12241e6a43c4526f Mon Sep 17 00:00:00 2001 From: martinghunt Date: Wed, 20 Jul 2016 08:28:37 +0000 Subject: [PATCH 1/6] Write distance matrix compatible with dendropy --- ariba/summary.py | 5 +++-- .../data/summary_test_write_distance_matrix.distances | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ariba/summary.py b/ariba/summary.py index ea385952..d8f5fec1 100644 --- a/ariba/summary.py +++ b/ariba/summary.py @@ -4,6 +4,7 @@ import sys import openpyxl import pyfastaq +import dendropy from ariba import flag, common, report, summary_cluster, summary_sample class Error (Exception): pass @@ -328,10 +329,10 @@ def _write_distance_matrix(cls, lines, outfile): scores[j][i] = scores[i][j] with open(outfile, 'w') as f: - sample_names = [x[0] for x in lines] + sample_names = [''] + [x[0] for x in lines] print(*sample_names, sep='\t', file=f) for i in range(len(scores)): - print(lines[i][0], *scores[i][1:], sep='\t', file=f) + print(lines[i][0], *scores[i], sep='\t', file=f) @classmethod diff --git a/ariba/tests/data/summary_test_write_distance_matrix.distances b/ariba/tests/data/summary_test_write_distance_matrix.distances index c3ffce3c..4c83bf91 100644 --- a/ariba/tests/data/summary_test_write_distance_matrix.distances +++ b/ariba/tests/data/summary_test_write_distance_matrix.distances @@ -1,4 +1,4 @@ -file1 file2 file3 -file1 3 2 -file2 0 1 -file3 1 0 + file1 file2 file3 +file1 0 3 2 +file2 3 0 1 +file3 2 1 0 From 74689acb68c97ab7a07561888cb2c8cb5cfbb0ae Mon Sep 17 00:00:00 2001 From: martinghunt Date: Wed, 20 Jul 2016 08:42:31 +0000 Subject: [PATCH 2/6] Use dendropy instead of R to make newick file --- ariba/summary.py | 17 +++++------------ ...mmary_test_newick_from_dist_matrix.distances | 8 ++++---- .../summary_test_newick_from_dist_matrix.tre | 1 - ariba/tests/summary_test.py | 5 +++-- 4 files changed, 12 insertions(+), 19 deletions(-) delete mode 100644 ariba/tests/data/summary_test_newick_from_dist_matrix.tre diff --git a/ariba/summary.py b/ariba/summary.py index d8f5fec1..18b54674 100644 --- a/ariba/summary.py +++ b/ariba/summary.py @@ -337,18 +337,11 @@ def _write_distance_matrix(cls, lines, outfile): @classmethod def _newick_from_dist_matrix(cls, distance_file, outfile): - r_script = outfile + '.tmp.R' - - with open(r_script, 'w') as f: - print('library(ape)', file=f) - print('a=read.table("', distance_file, '", header=TRUE, row.names=1, comment.char="")', sep='', file=f) - print('h=hclust(dist(a))', file=f) - print('write.tree(as.phylo(h), file="', outfile, '")', sep='', file=f) - - common.syscall('Rscript --no-save ' + r_script) - if os.path.exists(r_script + 'out'): - os.unlink(r_script + 'out') - os.unlink(r_script) + with open(distance_file) as f: + pdm = dendropy.PhylogeneticDistanceMatrix.from_csv(src=f, delimiter='\t') + upgma_tree = pdm.upgma_tree() + with open(outfile, 'w') as f: + print(upgma_tree.as_string("newick"), end='', file=f) def run(self): diff --git a/ariba/tests/data/summary_test_newick_from_dist_matrix.distances b/ariba/tests/data/summary_test_newick_from_dist_matrix.distances index c3ffce3c..4c83bf91 100644 --- a/ariba/tests/data/summary_test_newick_from_dist_matrix.distances +++ b/ariba/tests/data/summary_test_newick_from_dist_matrix.distances @@ -1,4 +1,4 @@ -file1 file2 file3 -file1 3 2 -file2 0 1 -file3 1 0 + file1 file2 file3 +file1 0 3 2 +file2 3 0 1 +file3 2 1 0 diff --git a/ariba/tests/data/summary_test_newick_from_dist_matrix.tre b/ariba/tests/data/summary_test_newick_from_dist_matrix.tre deleted file mode 100644 index a3f5ac3a..00000000 --- a/ariba/tests/data/summary_test_newick_from_dist_matrix.tre +++ /dev/null @@ -1 +0,0 @@ -(file1:1.58113883,(file2:0.7071067812,file3:0.7071067812):0.8740320489); diff --git a/ariba/tests/summary_test.py b/ariba/tests/summary_test.py index daa84995..fb354744 100644 --- a/ariba/tests/summary_test.py +++ b/ariba/tests/summary_test.py @@ -440,7 +440,8 @@ def test_newick_from_dist_matrix(self): tmp_tree = 'tmp.test.newick_from_dist_matrix.tre' dist_file = os.path.join(data_dir, 'summary_test_newick_from_dist_matrix.distances') summary.Summary._newick_from_dist_matrix(dist_file, tmp_tree) - expected = os.path.join(data_dir, 'summary_test_newick_from_dist_matrix.tre') - self.assertTrue(filecmp.cmp(expected, tmp_tree, shallow=False)) + # the exact ordering of the nodes is not predictable, so we'll trust dendropy + # and just check that an output file got written + self.assertTrue(os.path.exists(tmp_tree)) os.unlink(tmp_tree) From 79dfc2d2b4bb6b646036422f06f5483274ad074b Mon Sep 17 00:00:00 2001 From: martinghunt Date: Wed, 20 Jul 2016 08:44:26 +0000 Subject: [PATCH 3/6] Require dendropy --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 03cc72a6..03674d82 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ test_suite='nose.collector', tests_require=['nose >= 1.3'], install_requires=[ + 'dendropy >= 4.0.3', 'openpyxl >= 1.6.2', 'pyfastaq >= 3.12.0', 'pysam >= 0.8.1', From ad6dacb64fdfc853b354a4aa8c82b4c8e8d6c582 Mon Sep 17 00:00:00 2001 From: martinghunt Date: Wed, 20 Jul 2016 08:46:20 +0000 Subject: [PATCH 4/6] Remove R dependency --- .travis.yml | 3 --- ariba/external_progs.py | 3 --- install_dependencies.sh | 7 ------- 3 files changed, 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index d2e1f1a6..fb4b1573 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,6 @@ addons: - liblapack-dev - libgfortran3 - libncurses5-dev - - r-base - - r-base-dev - - r-base-core cache: directories: - "build" diff --git a/ariba/external_progs.py b/ariba/external_progs.py index 1e052fcc..96a77789 100644 --- a/ariba/external_progs.py +++ b/ariba/external_progs.py @@ -20,7 +20,6 @@ class Error (Exception): pass 'samtools': 'samtools', 'spades': 'spades.py', 'sspace': 'SSPACE_Basic_v2.0.pl', - 'r': 'Rscript', } @@ -38,7 +37,6 @@ class Error (Exception): pass 'samtools': ('', re.compile('^Version: ([0-9\.]+)')), 'spades': ('', re.compile('^SPAdes genome assembler v\.?([0-9\.]+)')), 'sspace': ('', re.compile('^Usage: .*pl \[SSPACE_(.*)\]')), - 'r': ('--version', re.compile('^R .*version ([0-9\.]+)')), } @@ -51,7 +49,6 @@ class Error (Exception): pass 'nucmer': '3.1', 'samtools': '1.2', 'spades': '3.5.0', - 'r': '2.14.0' } diff --git a/install_dependencies.sh b/install_dependencies.sh index 8c4347e4..9559a0b9 100755 --- a/install_dependencies.sh +++ b/install_dependencies.sh @@ -116,10 +116,3 @@ update_path ${mummer_dir} update_path ${samtools_dir} update_path ${spades_dir} - -# -------------- R packages --------------- -mkdir -p ~/R/libs -echo "R_LIBS=~/R/libs" > ~/.Renviron -wget https://cran.r-project.org/src/contrib/Archive/ape/ape_3.1.tar.gz -R CMD INSTALL ape_3.1.tar.gz - From bbe1776f192ab0ce11c379b9cd400b24fb149bdd Mon Sep 17 00:00:00 2001 From: martinghunt Date: Wed, 20 Jul 2016 09:15:22 +0000 Subject: [PATCH 5/6] Require mash at least 1.0.2 --- ariba/external_progs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ariba/external_progs.py b/ariba/external_progs.py index 96a77789..58ada356 100644 --- a/ariba/external_progs.py +++ b/ariba/external_progs.py @@ -45,7 +45,7 @@ class Error (Exception): pass 'bowtie2': '2.1.0', 'cdhit': '4.6', 'cdhit2d': '4.6', - 'mash': '1.1', + 'mash': '1.0.2', 'nucmer': '3.1', 'samtools': '1.2', 'spades': '3.5.0', From 2ed2ba1298c16c8bc30b78ea97537fe6376dc1bd Mon Sep 17 00:00:00 2001 From: martinghunt Date: Wed, 20 Jul 2016 09:16:48 +0000 Subject: [PATCH 6/6] dendropy at least 4.1.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 03674d82..2566e963 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ test_suite='nose.collector', tests_require=['nose >= 1.3'], install_requires=[ - 'dendropy >= 4.0.3', + 'dendropy >= 4.1.0', 'openpyxl >= 1.6.2', 'pyfastaq >= 3.12.0', 'pysam >= 0.8.1',