Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always report presabs vars #84

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions ariba/assembly_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,7 @@ def get_variants(self, ref_sequence_name, nucmer_coords):
else:
new_variant, used_variants = self._get_one_variant_for_one_contig_coding(ref_sequence, refdata_var_dict, mummer_variant_list)

# include new variant, except if the ref type is variants only and
# the new variant matches to a known variant
if new_variant is not None and (ref_sequence_type != 'variants_only' or len(new_variant[5]) > 0 or new_variant[3] in ['MULTIPLE', 'INDELS']):
if new_variant is not None:
variants[contig].append(new_variant)
used_known_variants.update(used_variants)

Expand Down
2 changes: 1 addition & 1 deletion ariba/report_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self,
infile=None,
min_pc_ident=90,
min_ref_base_assembled=1,
ignore_not_has_known_variant=True,
ignore_not_has_known_variant=False,
remove_synonymous_snps=True,
exclude_flags=None,
):
Expand Down
4 changes: 2 additions & 2 deletions ariba/tasks/reportfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def run():
parser.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
parser.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
parser.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
parser.add_argument('--keep_without_known_var', action='store_true', help='Use this option to not filter out where there is a known variant, but the assembly has the wild type. By default these rows are removed.')
parser.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
parser.add_argument('infile', help='Name of input tsv file')
parser.add_argument('outprefix', help='Prefix of output files. outprefix.tsv and outprefix.xls will be made')
options = parser.parse_args()
Expand All @@ -28,7 +28,7 @@ def run():
infile=options.infile,
min_pc_ident=options.min_pc_id,
min_ref_base_assembled=options.min_ref_base_asm,
ignore_not_has_known_variant=not options.keep_without_known_var,
ignore_not_has_known_variant=options.discard_without_known_var,
remove_synonymous_snps=not options.keep_syn,
)
rf.run(options.outprefix)
Expand Down
1 change: 1 addition & 0 deletions ariba/tests/assembly_variants_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ def test_get_variants_variants_only(self):

expected = {
'contig1': [
(4, 'p', 'A5D', 'NONSYN', [v2, v3], set(), set()),
(None, 'p', None, None, None, {meta1}, set()),
(None, 'p', None, None, None, {meta3}, set()),
],
Expand Down
6 changes: 3 additions & 3 deletions ariba/tests/report_filter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_filter_list_of_dicts_all_fail(self):

def test_filter_list_of_dicts_with_essential(self):
'''Test _filter_list_of_dicts with an essential line but all others fail'''
rf = report_filter.ReportFilter()
rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
Expand All @@ -237,7 +237,7 @@ def test_filter_list_of_dicts_with_essential(self):

def test_filter_list_of_dicts_with_pass(self):
'''Test _filter_list_of_dicts with a line that passes'''
rf = report_filter.ReportFilter()
rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
line1 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
line2 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
line3 = 'cluster1\tnon_coding\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
Expand Down Expand Up @@ -265,7 +265,7 @@ def test_remove_all_after_first_frameshift(self):

def test_filter_dicts(self):
'''Test _filter_dicts'''
rf = report_filter.ReportFilter(min_ref_base_assembled=10)
rf = report_filter.ReportFilter(min_ref_base_assembled=10, ignore_not_has_known_variant=True)
ref_2_dict = {x: '.' for x in report.columns}
ref_2_dict['pc_ident'] = 91.0
ref_2_dict['ref_base_assembled'] = 10
Expand Down