From c45eddb4c5b84ed9a7357ef17ba8f34fcb664667 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 7 Apr 2015 09:19:45 +0100 Subject: [PATCH 1/2] Convert sequences to uppercase --- ariba/refcheck.py | 2 ++ ariba/tests/data/refcheck_test_check_ok.fa | 2 +- ariba/tests/data/refcheck_test_fix_in.fa | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ariba/refcheck.py b/ariba/refcheck.py index cbe1dc66..7bcbea76 100644 --- a/ariba/refcheck.py +++ b/ariba/refcheck.py @@ -18,6 +18,7 @@ def check(self, error_code_on_exit=None): file_reader = pyfastaq.sequences.file_reader(self.infile) for seq in file_reader: + seq.seq = seq.seq.upper() if not seq.looks_like_gene(): return False, 'Not a gene', seq elif len(seq) < self.min_length: @@ -41,6 +42,7 @@ def fix(self, outprefix): log_out_fh = pyfastaq.utils.open_file_write(log_out) for seq in file_reader: + seq.seq = seq.seq.upper() if len(seq) < self.min_length: print(seq.id, 'Too short. Skipping', sep='\t', file=log_out_fh) print(seq, file=bad_seqs_out_fh) diff --git a/ariba/tests/data/refcheck_test_check_ok.fa b/ariba/tests/data/refcheck_test_check_ok.fa index 6a210e67..50dd3f61 100644 --- a/ariba/tests/data/refcheck_test_check_ok.fa +++ b/ariba/tests/data/refcheck_test_check_ok.fa @@ -1,2 +1,2 @@ >gene1 -TTGTGGTGA +ttgtggtga diff --git a/ariba/tests/data/refcheck_test_fix_in.fa b/ariba/tests/data/refcheck_test_fix_in.fa index 8bdd6c0f..fbdb3a2a 100644 --- a/ariba/tests/data/refcheck_test_fix_in.fa +++ b/ariba/tests/data/refcheck_test_fix_in.fa @@ -1,7 +1,7 @@ >gene1 TTGTCGTAA >gene2 -TTGTCGTCGTCGTAA +ttgtcgtcgtcgtaa >gene3 TTGTCGTCGTCGTCGTAA >gene3 From a4b7614ef68354d123712088a1552105dd576c59 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Tue, 7 Apr 2015 09:30:34 +0100 Subject: [PATCH 2/2] Require uppercase when checking --- ariba/refcheck.py | 1 - ariba/tests/data/refcheck_test_check_ok.fa | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ariba/refcheck.py b/ariba/refcheck.py index 7bcbea76..566bf09d 100644 --- a/ariba/refcheck.py +++ b/ariba/refcheck.py @@ -18,7 +18,6 @@ def check(self, error_code_on_exit=None): file_reader = pyfastaq.sequences.file_reader(self.infile) for seq in file_reader: - seq.seq = seq.seq.upper() if not seq.looks_like_gene(): return False, 'Not a gene', seq elif len(seq) < self.min_length: diff --git a/ariba/tests/data/refcheck_test_check_ok.fa b/ariba/tests/data/refcheck_test_check_ok.fa index 50dd3f61..6a210e67 100644 --- a/ariba/tests/data/refcheck_test_check_ok.fa +++ b/ariba/tests/data/refcheck_test_check_ok.fa @@ -1,2 +1,2 @@ >gene1 -ttgtggtga +TTGTGGTGA