Skip to content

Commit

Permalink
Merge pull request #84 from martinghunt/always_report_presabs_vars
Browse files Browse the repository at this point in the history
Always report presabs vars
  • Loading branch information
martinghunt committed May 16, 2016
2 parents f576002 + ee53b1a commit e560604
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 9 deletions.
4 changes: 1 addition & 3 deletions ariba/assembly_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,7 @@ def get_variants(self, ref_sequence_name, nucmer_coords):
else:
new_variant, used_variants = self._get_one_variant_for_one_contig_coding(ref_sequence, refdata_var_dict, mummer_variant_list)

# include new variant, except if the ref type is variants only and
# the new variant matches to a known variant
if new_variant is not None and (ref_sequence_type != 'variants_only' or len(new_variant[5]) > 0 or new_variant[3] in ['MULTIPLE', 'INDELS']):
if new_variant is not None:
variants[contig].append(new_variant)
used_known_variants.update(used_variants)

Expand Down
2 changes: 1 addition & 1 deletion ariba/report_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self,
infile=None,
min_pc_ident=90,
min_ref_base_assembled=1,
ignore_not_has_known_variant=True,
ignore_not_has_known_variant=False,
remove_synonymous_snps=True,
exclude_flags=None,
):
Expand Down
4 changes: 2 additions & 2 deletions ariba/tasks/reportfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def run():
parser.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
parser.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
parser.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
parser.add_argument('--keep_without_known_var', action='store_true', help='Use this option to not filter out where there is a known variant, but the assembly has the wild type. By default these rows are removed.')
parser.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
parser.add_argument('infile', help='Name of input tsv file')
parser.add_argument('outprefix', help='Prefix of output files. outprefix.tsv and outprefix.xls will be made')
options = parser.parse_args()
Expand All @@ -28,7 +28,7 @@ def run():
infile=options.infile,
min_pc_ident=options.min_pc_id,
min_ref_base_assembled=options.min_ref_base_asm,
ignore_not_has_known_variant=not options.keep_without_known_var,
ignore_not_has_known_variant=options.discard_without_known_var,
remove_synonymous_snps=not options.keep_syn,
)
rf.run(options.outprefix)
Expand Down
1 change: 1 addition & 0 deletions ariba/tests/assembly_variants_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ def test_get_variants_variants_only(self):

expected = {
'contig1': [
(4, 'p', 'A5D', 'NONSYN', [v2, v3], set(), set()),
(None, 'p', None, None, None, {meta1}, set()),
(None, 'p', None, None, None, {meta3}, set()),
],
Expand Down
6 changes: 3 additions & 3 deletions ariba/tests/report_filter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_filter_list_of_dicts_all_fail(self):

def test_filter_list_of_dicts_with_essential(self):
'''Test _filter_list_of_dicts with an essential line but all others fail'''
rf = report_filter.ReportFilter()
rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
Expand All @@ -237,7 +237,7 @@ def test_filter_list_of_dicts_with_essential(self):

def test_filter_list_of_dicts_with_pass(self):
'''Test _filter_list_of_dicts with a line that passes'''
rf = report_filter.ReportFilter()
rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
line3 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
Expand Down Expand Up @@ -265,7 +265,7 @@ def test_remove_all_after_first_frameshift(self):

def test_filter_dicts(self):
'''Test _filter_dicts'''
rf = report_filter.ReportFilter(min_ref_base_assembled=10)
rf = report_filter.ReportFilter(min_ref_base_assembled=10, ignore_not_has_known_variant=True)
ref_2_dict = {x: '.' for x in report.columns}
ref_2_dict['pc_ident'] = 91.0
ref_2_dict['ref_base_assembled'] = 10
Expand Down

0 comments on commit e560604

Please sign in to comment.