From 205b1d4104ab7a54d3ffaada2dd43b36cc925672 Mon Sep 17 00:00:00 2001
From: martinghunt <martin.g.hunt@gmail.com>
Date: Mon, 16 May 2016 09:48:25 +0000
Subject: [PATCH 1/2] Report all variants, including for variants only genes

---
 ariba/assembly_variants.py            | 4 +---
 ariba/tests/assembly_variants_test.py | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ariba/assembly_variants.py b/ariba/assembly_variants.py
index 52f5c9c5..ab8658fa 100644
--- a/ariba/assembly_variants.py
+++ b/ariba/assembly_variants.py
@@ -301,9 +301,7 @@ def get_variants(self, ref_sequence_name, nucmer_coords):
                     else:
                         new_variant, used_variants = self._get_one_variant_for_one_contig_coding(ref_sequence, refdata_var_dict, mummer_variant_list)
 
-                    # include new variant, except if the ref type is variants only and
-                    # the new variant matches to a known variant
-                    if new_variant is not None and (ref_sequence_type != 'variants_only' or len(new_variant[5]) > 0 or new_variant[3] in ['MULTIPLE', 'INDELS']):
+                    if new_variant is not None:
                             variants[contig].append(new_variant)
                     used_known_variants.update(used_variants)
 
diff --git a/ariba/tests/assembly_variants_test.py b/ariba/tests/assembly_variants_test.py
index 9f5501f1..c8dea8bb 100644
--- a/ariba/tests/assembly_variants_test.py
+++ b/ariba/tests/assembly_variants_test.py
@@ -372,6 +372,7 @@ def test_get_variants_variants_only(self):
 
         expected = {
             'contig1': [
+                (4, 'p', 'A5D', 'NONSYN', [v2, v3], set(), set()),
                 (None, 'p', None, None, None, {meta1}, set()),
                 (None, 'p', None, None, None, {meta3}, set()),
             ],

From ee53b1a8534e7add06c5866b879062f58c277d01 Mon Sep 17 00:00:00 2001
From: martinghunt <martin.g.hunt@gmail.com>
Date: Mon, 16 May 2016 10:24:05 +0000
Subject: [PATCH 2/2] Swap default to keep known variants

---
 ariba/report_filter.py            | 2 +-
 ariba/tasks/reportfilter.py       | 4 ++--
 ariba/tests/report_filter_test.py | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ariba/report_filter.py b/ariba/report_filter.py
index 223d986b..166fcc42 100644
--- a/ariba/report_filter.py
+++ b/ariba/report_filter.py
@@ -9,7 +9,7 @@ def __init__(self,
             infile=None,
             min_pc_ident=90,
             min_ref_base_assembled=1,
-            ignore_not_has_known_variant=True,
+            ignore_not_has_known_variant=False,
             remove_synonymous_snps=True,
             exclude_flags=None,
         ):
diff --git a/ariba/tasks/reportfilter.py b/ariba/tasks/reportfilter.py
index d5d39090..4e85fa08 100644
--- a/ariba/tasks/reportfilter.py
+++ b/ariba/tasks/reportfilter.py
@@ -11,7 +11,7 @@ def run():
     parser.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
     parser.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
     parser.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
-    parser.add_argument('--keep_without_known_var', action='store_true', help='Use this option to not filter out where there is a known variant, but the assembly has the wild type. By default these rows are removed.')
+    parser.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
     parser.add_argument('infile', help='Name of input tsv file')
     parser.add_argument('outprefix', help='Prefix of output files. outprefix.tsv and outprefix.xls will be made')
     options = parser.parse_args()
@@ -28,7 +28,7 @@ def run():
         infile=options.infile,
         min_pc_ident=options.min_pc_id,
         min_ref_base_assembled=options.min_ref_base_asm,
-        ignore_not_has_known_variant=not options.keep_without_known_var,
+        ignore_not_has_known_variant=options.discard_without_known_var,
         remove_synonymous_snps=not options.keep_syn,
     )
     rf.run(options.outprefix)
diff --git a/ariba/tests/report_filter_test.py b/ariba/tests/report_filter_test.py
index af68b32c..5c8919b4 100644
--- a/ariba/tests/report_filter_test.py
+++ b/ariba/tests/report_filter_test.py
@@ -223,7 +223,7 @@ def test_filter_list_of_dicts_all_fail(self):
 
     def test_filter_list_of_dicts_with_essential(self):
         '''Test _filter_list_of_dicts with an essential line but all others fail'''
-        rf = report_filter.ReportFilter()
+        rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
         line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
@@ -237,7 +237,7 @@ def test_filter_list_of_dicts_with_essential(self):
 
     def test_filter_list_of_dicts_with_pass(self):
         '''Test _filter_list_of_dicts with a line that passes'''
-        rf = report_filter.ReportFilter()
+        rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
         line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
         line3 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
@@ -265,7 +265,7 @@ def test_remove_all_after_first_frameshift(self):
 
     def test_filter_dicts(self):
         '''Test _filter_dicts'''
-        rf = report_filter.ReportFilter(min_ref_base_assembled=10)
+        rf = report_filter.ReportFilter(min_ref_base_assembled=10, ignore_not_has_known_variant=True)
         ref_2_dict = {x: '.' for x in report.columns}
         ref_2_dict['pc_ident'] = 91.0
         ref_2_dict['ref_base_assembled'] = 10