From 205b1d4104ab7a54d3ffaada2dd43b36cc925672 Mon Sep 17 00:00:00 2001 From: martinghunt Date: Mon, 16 May 2016 09:48:25 +0000 Subject: [PATCH 1/2] Report all variants, including for variants only genes --- ariba/assembly_variants.py | 4 +--- ariba/tests/assembly_variants_test.py | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ariba/assembly_variants.py b/ariba/assembly_variants.py index 52f5c9c5..ab8658fa 100644 --- a/ariba/assembly_variants.py +++ b/ariba/assembly_variants.py @@ -301,9 +301,7 @@ def get_variants(self, ref_sequence_name, nucmer_coords): else: new_variant, used_variants = self._get_one_variant_for_one_contig_coding(ref_sequence, refdata_var_dict, mummer_variant_list) - # include new variant, except if the ref type is variants only and - # the new variant matches to a known variant - if new_variant is not None and (ref_sequence_type != 'variants_only' or len(new_variant[5]) > 0 or new_variant[3] in ['MULTIPLE', 'INDELS']): + if new_variant is not None: variants[contig].append(new_variant) used_known_variants.update(used_variants) diff --git a/ariba/tests/assembly_variants_test.py b/ariba/tests/assembly_variants_test.py index 9f5501f1..c8dea8bb 100644 --- a/ariba/tests/assembly_variants_test.py +++ b/ariba/tests/assembly_variants_test.py @@ -372,6 +372,7 @@ def test_get_variants_variants_only(self): expected = { 'contig1': [ + (4, 'p', 'A5D', 'NONSYN', [v2, v3], set(), set()), (None, 'p', None, None, None, {meta1}, set()), (None, 'p', None, None, None, {meta3}, set()), ], From ee53b1a8534e7add06c5866b879062f58c277d01 Mon Sep 17 00:00:00 2001 From: martinghunt Date: Mon, 16 May 2016 10:24:05 +0000 Subject: [PATCH 2/2] Swap default to keep known variants --- ariba/report_filter.py | 2 +- ariba/tasks/reportfilter.py | 4 ++-- ariba/tests/report_filter_test.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ariba/report_filter.py b/ariba/report_filter.py index 223d986b..166fcc42 100644 --- a/ariba/report_filter.py +++ b/ariba/report_filter.py @@ -9,7 +9,7 @@ def __init__(self, infile=None, min_pc_ident=90, min_ref_base_assembled=1, - ignore_not_has_known_variant=True, + ignore_not_has_known_variant=False, remove_synonymous_snps=True, exclude_flags=None, ): diff --git a/ariba/tasks/reportfilter.py b/ariba/tasks/reportfilter.py index d5d39090..4e85fa08 100644 --- a/ariba/tasks/reportfilter.py +++ b/ariba/tasks/reportfilter.py @@ -11,7 +11,7 @@ def run(): parser.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT') parser.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT') parser.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed') - parser.add_argument('--keep_without_known_var', action='store_true', help='Use this option to not filter out where there is a known variant, but the assembly has the wild type. By default these rows are removed.') + parser.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.') parser.add_argument('infile', help='Name of input tsv file') parser.add_argument('outprefix', help='Prefix of output files. outprefix.tsv and outprefix.xls will be made') options = parser.parse_args() @@ -28,7 +28,7 @@ def run(): infile=options.infile, min_pc_ident=options.min_pc_id, min_ref_base_assembled=options.min_ref_base_asm, - ignore_not_has_known_variant=not options.keep_without_known_var, + ignore_not_has_known_variant=options.discard_without_known_var, remove_synonymous_snps=not options.keep_syn, ) rf.run(options.outprefix) diff --git a/ariba/tests/report_filter_test.py b/ariba/tests/report_filter_test.py index af68b32c..5c8919b4 100644 --- a/ariba/tests/report_filter_test.py +++ b/ariba/tests/report_filter_test.py @@ -223,7 +223,7 @@ def test_filter_list_of_dicts_all_fail(self): def test_filter_list_of_dicts_with_essential(self): '''Test _filter_list_of_dicts with an essential line but all others fail''' - rf = report_filter.ReportFilter() + rf = report_filter.ReportFilter(ignore_not_has_known_variant=True) line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text' line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text' dict1 = report_filter.ReportFilter._report_line_to_dict(line1) @@ -237,7 +237,7 @@ def test_filter_list_of_dicts_with_essential(self): def test_filter_list_of_dicts_with_pass(self): '''Test _filter_list_of_dicts with a line that passes''' - rf = report_filter.ReportFilter() + rf = report_filter.ReportFilter(ignore_not_has_known_variant=True) line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text' line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text' line3 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text' @@ -265,7 +265,7 @@ def test_remove_all_after_first_frameshift(self): def test_filter_dicts(self): '''Test _filter_dicts''' - rf = report_filter.ReportFilter(min_ref_base_assembled=10) + rf = report_filter.ReportFilter(min_ref_base_assembled=10, ignore_not_has_known_variant=True) ref_2_dict = {x: '.' for x in report.columns} ref_2_dict['pc_ident'] = 91.0 ref_2_dict['ref_base_assembled'] = 10