diff --git a/ariba/summary.py b/ariba/summary.py index ab6584c6..b98f35e0 100644 --- a/ariba/summary.py +++ b/ariba/summary.py @@ -122,6 +122,7 @@ def _gather_unfiltered_output_data(self): 'match': 'no', 'novel_var': 'NA', 'pct_id': 'NA', + 'ctg_cov': 'NA', 'ref_seq': 'NA' } else: @@ -164,11 +165,11 @@ def _to_matrix(cls, filenames, all_data, all_potential_columns, cluster_cols): matrix = [] making_header_lines = True phandango_header = ['name'] - phandango_suffixes = {'assembled': ':o1', 'match': ':o1', 'ref_seq': ':o2', 'pct_id': ':c1', 'known_var': ':o1', 'novel_var': ':o1'} + phandango_suffixes = {'assembled': ':o1', 'match': ':o1', 'ref_seq': ':o2', 'pct_id': ':c1', 'ctg_cov': ':c3', 'known_var': ':o1', 'novel_var': ':o1'} ref_seq_counter = 2 csv_header = ['name'] - summary_cols_in_order = ['assembled', 'match', 'ref_seq', 'pct_id', 'known_var', 'novel_var'] - summary_cols_set = set(['assembled', 'match', 'ref_seq', 'pct_id', 'known_var', 'novel_var']) + summary_cols_in_order = ['assembled', 'match', 'ref_seq', 'pct_id', 'ctg_cov', 'known_var', 'novel_var'] + summary_cols_set = set(['assembled', 'match', 'ref_seq', 'pct_id', 'ctg_cov', 'known_var', 'novel_var']) summary_cols_in_order = [x for x in summary_cols_in_order if cluster_cols[x]] for filename in sorted(filenames): diff --git a/ariba/tests/data/summary_test_whole_run.out.csv b/ariba/tests/data/summary_test_whole_run.out.csv index daa0ae38..2d4bd9e2 100644 --- a/ariba/tests/data/summary_test_whole_run.out.csv +++ b/ariba/tests/data/summary_test_whole_run.out.csv @@ -1,3 +1,3 @@ -name,23S.assembled,23S.match,23S.ref_seq,23S.pct_id,23S.known_var,23S.2597CT,23S.2597CT.%,23S.2597TC,23S.2597TC.%,coding1.assembled,coding1.match,coding1.ref_seq,coding1.pct_id,coding2.assembled,coding2.match,coding2.ref_seq,coding2.pct_id,coding3.assembled,coding3.match,coding3.ref_seq,coding3.pct_id,coding5.assembled,coding5.match,coding5.ref_seq,coding5.pct_id,coding5.known_var,coding5.A42S,coding6.assembled,coding6.match,coding6.ref_seq,coding6.pct_id,coding6.known_var,coding6.A52S,coding7.assembled,coding7.ref_seq,coding7.pct_id,coding8.assembled,coding8.match,coding8.ref_seq,coding8.pct_id,coding8.novel_var,coding8.A53S,mdfA.assembled,mdfA.ref_seq,mdfA.pct_id,mdfA.novel_var,noncoding1.assembled,noncoding1.match,noncoding1.ref_seq,noncoding1.pct_id,noncoding10.assembled,noncoding10.match,noncoding10.ref_seq,noncoding10.pct_id,noncoding10.novel_var,noncoding10.100T,noncoding10.100T.%,noncoding11.assembled,noncoding11.match,noncoding11.ref_seq,noncoding11.pct_id,noncoding11.novel_var,noncoding11.101AG,noncoding11.101AG.%,noncoding2.assembled,noncoding2.match,noncoding2.ref_seq,noncoding2.pct_id,noncoding3.assembled,noncoding3.match,noncoding3.ref_seq,noncoding3.pct_id,noncoding5.assembled,noncoding5.match,noncoding5.ref_seq,noncoding5.pct_id,noncoding5.known_var,noncoding5.42T,noncoding5.42T.%,noncoding6.assembled,noncoding6.match,noncoding6.ref_seq,noncoding6.pct_id,noncoding6.known_var,noncoding6.52CT,noncoding6.52CT.%,noncoding7.assembled,noncoding7.match,noncoding7.ref_seq,noncoding7.pct_id,noncoding7.known_var,noncoding7.53T,noncoding7.53T.%,noncoding8.assembled,noncoding8.match,noncoding8.ref_seq,noncoding8.pct_id,noncoding9.assembled,noncoding9.ref_seq,noncoding9.pct_id -summary_test_whole_run.in.1.tsv,yes,yes,23S.rDNA_WHO_F_01358c,99.86,yes,no,NA,yes,100.0,interrupted,no,coding1_ref1,99.1,yes,yes,coding2_ref1,98.2,no,no,NA,NA,yes,no,coding5_ref1,97.4,no,no,yes,yes,coding6_ref1,95.5,yes,yes,yes,coding7_ref1,95.4,yes,yes,coding8_ref1,95.3,yes,yes,interrupted,mdfA.3001328.JQ394987.0_1233.561,97.0,yes,yes,yes,noncoding1_ref1,99.1,yes,yes,noncoding10_ref1,95.1,yes,yes,99.0,yes,yes,noncoding11_ref1,95.05,het,het,30.0,yes,yes,noncoding2_ref1,98.2,no,no,NA,NA,yes,yes,noncoding5_ref1,97.4,yes,yes,100.0,yes,yes,noncoding6_ref1,95.5,yes,het,70.0,yes,yes,noncoding7_ref1,95.4,yes,yes,98.6,yes,yes,noncoding8_ref1,95.3,yes,noncoding9_ref1,95.2 -summary_test_whole_run.in.2.tsv,yes_nonunique,no,23S.rDNA_WHO_F_01358c,99.84,het,het,12.8,no,NA,yes,yes,coding1_ref2,99.2,no,no,NA,NA,yes,yes,coding3_ref1,97.6,yes,yes,coding5_ref1,97.4,yes,yes,no,no,NA,NA,NA,NA,no,NA,NA,no,no,NA,NA,NA,NA,no,NA,NA,NA,yes,yes,noncoding1_ref2,99.2,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,yes,yes,noncoding3_ref1,97.6,yes,no,noncoding5_ref1,99.42,no,no,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,no,NA,NA +name,23S.assembled,23S.match,23S.ref_seq,23S.pct_id,23S.ctg_cov,23S.known_var,23S.2597CT,23S.2597CT.%,23S.2597TC,23S.2597TC.%,coding1.assembled,coding1.match,coding1.ref_seq,coding1.pct_id,coding1.ctg_cov,coding2.assembled,coding2.match,coding2.ref_seq,coding2.pct_id,coding2.ctg_cov,coding3.assembled,coding3.match,coding3.ref_seq,coding3.pct_id,coding3.ctg_cov,coding5.assembled,coding5.match,coding5.ref_seq,coding5.pct_id,coding5.ctg_cov,coding5.known_var,coding5.A42S,coding6.assembled,coding6.match,coding6.ref_seq,coding6.pct_id,coding6.ctg_cov,coding6.known_var,coding6.A52S,coding7.assembled,coding7.ref_seq,coding7.pct_id,coding7.ctg_cov,coding8.assembled,coding8.match,coding8.ref_seq,coding8.pct_id,coding8.ctg_cov,coding8.novel_var,coding8.A53S,mdfA.assembled,mdfA.ref_seq,mdfA.pct_id,mdfA.ctg_cov,mdfA.novel_var,noncoding1.assembled,noncoding1.match,noncoding1.ref_seq,noncoding1.pct_id,noncoding1.ctg_cov,noncoding10.assembled,noncoding10.match,noncoding10.ref_seq,noncoding10.pct_id,noncoding10.ctg_cov,noncoding10.novel_var,noncoding10.100T,noncoding10.100T.%,noncoding11.assembled,noncoding11.match,noncoding11.ref_seq,noncoding11.pct_id,noncoding11.ctg_cov,noncoding11.novel_var,noncoding11.101AG,noncoding11.101AG.%,noncoding2.assembled,noncoding2.match,noncoding2.ref_seq,noncoding2.pct_id,noncoding2.ctg_cov,noncoding3.assembled,noncoding3.match,noncoding3.ref_seq,noncoding3.pct_id,noncoding3.ctg_cov,noncoding5.assembled,noncoding5.match,noncoding5.ref_seq,noncoding5.pct_id,noncoding5.ctg_cov,noncoding5.known_var,noncoding5.42T,noncoding5.42T.%,noncoding6.assembled,noncoding6.match,noncoding6.ref_seq,noncoding6.pct_id,noncoding6.ctg_cov,noncoding6.known_var,noncoding6.52CT,noncoding6.52CT.%,noncoding7.assembled,noncoding7.match,noncoding7.ref_seq,noncoding7.pct_id,noncoding7.ctg_cov,noncoding7.known_var,noncoding7.53T,noncoding7.53T.%,noncoding8.assembled,noncoding8.match,noncoding8.ref_seq,noncoding8.pct_id,noncoding8.ctg_cov,noncoding9.assembled,noncoding9.ref_seq,noncoding9.pct_id,noncoding9.ctg_cov +summary_test_whole_run.in.1.tsv,yes,yes,23S.rDNA_WHO_F_01358c,99.86,744.8,yes,no,NA,yes,100.0,interrupted,no,coding1_ref1,99.1,10.1,yes,yes,coding2_ref1,98.2,42.42,no,no,NA,NA,NA,yes,no,coding5_ref1,97.4,14.1,no,no,yes,yes,coding6_ref1,95.5,24.32,yes,yes,yes,coding7_ref1,95.4,24.32,yes,yes,coding8_ref1,95.3,24.31,yes,yes,interrupted,mdfA.3001328.JQ394987.0_1233.561,97.0,16.2,yes,yes,yes,noncoding1_ref1,99.1,10.1,yes,yes,noncoding10_ref1,95.1,24.27,yes,yes,99.0,yes,yes,noncoding11_ref1,95.05,24.26,het,het,30.0,yes,yes,noncoding2_ref1,98.2,42.42,no,no,NA,NA,NA,yes,yes,noncoding5_ref1,97.4,14.1,yes,yes,100.0,yes,yes,noncoding6_ref1,95.5,24.32,yes,het,70.0,yes,yes,noncoding7_ref1,95.4,24.31,yes,yes,98.6,yes,yes,noncoding8_ref1,95.3,24.29,yes,noncoding9_ref1,95.2,24.28 +summary_test_whole_run.in.2.tsv,yes_nonunique,no,23S.rDNA_WHO_F_01358c,99.84,344.0,het,het,12.8,no,NA,yes,yes,coding1_ref2,99.2,10.1,no,no,NA,NA,NA,yes,yes,coding3_ref1,97.6,37.6,yes,yes,coding5_ref1,97.4,14.1,yes,yes,no,no,NA,NA,NA,NA,NA,no,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,NA,NA,NA,NA,yes,yes,noncoding1_ref2,99.2,10.1,no,no,NA,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,NA,no,no,NA,NA,NA,yes,yes,noncoding3_ref1,97.6,37.6,yes,no,noncoding5_ref1,99.42,14.1,no,no,NA,no,no,NA,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,NA,no,no,NA,NA,NA,no,NA,NA,NA diff --git a/ariba/tests/summary_test.py b/ariba/tests/summary_test.py index 3d702bc0..4e2ef18a 100644 --- a/ariba/tests/summary_test.py +++ b/ariba/tests/summary_test.py @@ -301,11 +301,11 @@ def test_to_matrix_all_cols(self): s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) - expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1'] - expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V'] + expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.ctg_cov:c3', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.ctg_cov:c3', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.ctg_cov:c3', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1'] + expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.ctg_cov', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.ctg_cov', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.ctg_cov', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V'] expected_matrix = [ - ['sample1', 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'], - [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes'] + ['sample1', 'yes', 'yes', 'noncoding_ref1', '98.33', '10.0', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', '20.1', 'no', 'yes', 'yes'], + [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', '50.1', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'het', 40.0, 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', '51.1', 'no', 'yes', 'yes'] ] self.assertEqual(expected_phandango_header, got_phandango_header) @@ -325,11 +325,11 @@ def test_to_matrix_with_groups(self): s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) - expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1'] - expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var'] + expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.ctg_cov:c3', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.ctg_cov:c3', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.ctg_cov:c3', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1'] + expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.ctg_cov', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.ctg_cov', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.ctg_cov', 'presence_absence1.known_var', 'presence_absence1.novel_var'] expected_matrix = [ - [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'], - [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes'] + [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', '10.0', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes', 'presence_absence_ref1', '98.96', '20.1', 'no', 'yes'], + [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', '50.1', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', '51.1', 'no', 'yes'] ] self.assertEqual(expected_phandango_header, got_phandango_header) @@ -349,11 +349,11 @@ def test_to_matrix_with_vars(self): s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) - expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1'] - expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V'] + expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.ctg_cov:c3', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.ctg_cov:c3', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.ctg_cov:c3', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1'] + expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.ctg_cov', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.ctg_cov', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.ctg_cov', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V'] expected_matrix = [ - [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'], - [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes'] + [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', '10.0', 'yes', 'no', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', '20.1', 'no', 'yes', 'yes'], + [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', '50.1', 'yes', 'no', 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', '51.1', 'no', 'yes', 'yes'] ] self.assertEqual(expected_phandango_header, got_phandango_header) @@ -373,11 +373,11 @@ def test_to_matrix_cluster_only(self): s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) - expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1'] - expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var'] + expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.ctg_cov:c3', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.ctg_cov:c3', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.ctg_cov:c3', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1'] + expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.ctg_cov', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.ctg_cov', 'noncoding2.known_var', 'noncoding2.novel_var', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.ctg_cov', 'presence_absence1.known_var', 'presence_absence1.novel_var'] expected_matrix = [ - [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'], - [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes'] + [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', '10.0', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'yes', 'yes', 'presence_absence_ref1', '98.96', '20.1', 'no', 'yes'], + [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', '50.1', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'yes', 'yes', 'presence_absence1', '98.96', '51.1', 'no', 'yes'] ] self.assertEqual(expected_phandango_header, got_phandango_header) diff --git a/scripts/ariba b/scripts/ariba index d59e2649..9a76385e 100755 --- a/scripts/ariba +++ b/scripts/ariba @@ -238,7 +238,7 @@ subparser_summary = subparsers.add_parser( subparser_summary.add_argument('-f', '--fofn', help='File of filenames of ariba reports to be summarised. Must be used if no input files listed after the outfile. The first column should be the filename. An optional second column can be used to specify a sample name for that file, which will be used instead of the filename in output files. Columns separated by whitespace.', metavar='FILENAME') subparser_summary.add_argument('--preset', choices=summary_presets, help='Shorthand for setting --cluster_cols,--col_filter,--row_filter,--v_groups,--variants. Using this overrides those options', metavar='|'.join(summary_presets)) -subparser_summary.add_argument('--cluster_cols', help='Comma separated list of cluster columns to include. Choose from: assembled, match, ref_seq, pct_id, known_var, novel_var [%(default)s]', default='match', metavar='col1,col2,...') +subparser_summary.add_argument('--cluster_cols', help='Comma separated list of cluster columns to include. Choose from: assembled, match, ref_seq, pct_id, ctg_cov, known_var, novel_var [%(default)s]', default='match', metavar='col1,col2,...') subparser_summary.add_argument('--col_filter', choices=['y', 'n'], default='y', help='Choose whether columns where all values are "no" or "NA" are removed [%(default)s]', metavar='y|n') subparser_summary.add_argument('--no_tree', action='store_true', help='Do not make phandango tree') subparser_summary.add_argument('--row_filter', choices=['y', 'n'], default='y', help='Choose whether rows where all values are "no" or "NA" are removed [%(default)s]', metavar='y|n')