Merge pull request #84 from martinghunt/always_report_presabs_vars

Always report presabs vars
sanger-pathogens · May 16, 2016 · e560604 · e560604
2 parents f576002 + ee53b1a
commit e560604
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 9 deletions.
diff --git a/ariba/assembly_variants.py b/ariba/assembly_variants.py
@@ -301,9 +301,7 @@ def get_variants(self, ref_sequence_name, nucmer_coords):
                     else:
                         new_variant, used_variants = self._get_one_variant_for_one_contig_coding(ref_sequence, refdata_var_dict, mummer_variant_list)
 
-                    # include new variant, except if the ref type is variants only and
-                    # the new variant matches to a known variant
-                    if new_variant is not None and (ref_sequence_type != 'variants_only' or len(new_variant[5]) > 0 or new_variant[3] in ['MULTIPLE', 'INDELS']):
+                    if new_variant is not None:
                             variants[contig].append(new_variant)
                     used_known_variants.update(used_variants)
 

diff --git a/ariba/report_filter.py b/ariba/report_filter.py
@@ -9,7 +9,7 @@ def __init__(self,
             infile=None,
             min_pc_ident=90,
             min_ref_base_assembled=1,
-            ignore_not_has_known_variant=True,
+            ignore_not_has_known_variant=False,
             remove_synonymous_snps=True,
             exclude_flags=None,
         ):

diff --git a/ariba/tasks/reportfilter.py b/ariba/tasks/reportfilter.py
@@ -11,7 +11,7 @@ def run():
     parser.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
     parser.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
     parser.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
-    parser.add_argument('--keep_without_known_var', action='store_true', help='Use this option to not filter out where there is a known variant, but the assembly has the wild type. By default these rows are removed.')
+    parser.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
     parser.add_argument('infile', help='Name of input tsv file')
     parser.add_argument('outprefix', help='Prefix of output files. outprefix.tsv and outprefix.xls will be made')
     options = parser.parse_args()
@@ -28,7 +28,7 @@ def run():
         infile=options.infile,
         min_pc_ident=options.min_pc_id,
         min_ref_base_assembled=options.min_ref_base_asm,
-        ignore_not_has_known_variant=not options.keep_without_known_var,
+        ignore_not_has_known_variant=options.discard_without_known_var,
         remove_synonymous_snps=not options.keep_syn,
     )
     rf.run(options.outprefix)

diff --git a/ariba/tests/assembly_variants_test.py b/ariba/tests/assembly_variants_test.py
@@ -372,6 +372,7 @@ def test_get_variants_variants_only(self):
 
         expected = {
             'contig1': [
+                (4, 'p', 'A5D', 'NONSYN', [v2, v3], set(), set()),
                 (None, 'p', None, None, None, {meta1}, set()),
                 (None, 'p', None, None, None, {meta3}, set()),
             ],

diff --git a/ariba/tests/report_filter_test.py b/ariba/tests/report_filter_test.py
@@ -223,7 +223,7 @@ def test_filter_list_of_dicts_all_fail(self):
 
     def test_filter_list_of_dicts_with_essential(self):
         '''Test _filter_list_of_dicts with an essential line but all others fail'''
-        rf = report_filter.ReportFilter()
+        rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
         line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
@@ -237,7 +237,7 @@ def test_filter_list_of_dicts_with_essential(self):
 
     def test_filter_list_of_dicts_with_pass(self):
         '''Test _filter_list_of_dicts with a line that passes'''
-        rf = report_filter.ReportFilter()
+        rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
         line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
         line3 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
@@ -265,7 +265,7 @@ def test_remove_all_after_first_frameshift(self):
 
     def test_filter_dicts(self):
         '''Test _filter_dicts'''
-        rf = report_filter.ReportFilter(min_ref_base_assembled=10)
+        rf = report_filter.ReportFilter(min_ref_base_assembled=10, ignore_not_has_known_variant=True)
         ref_2_dict = {x: '.' for x in report.columns}
         ref_2_dict['pc_ident'] = 91.0
         ref_2_dict['ref_base_assembled'] = 10