From 0ed3983099dddb5a753c651074e86057c181636e Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Wed, 10 Jun 2015 15:15:40 +0100 Subject: [PATCH 1/4] New flag hit_both_strands --- ariba/flag.py | 3 ++- ariba/tests/flag_test.py | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ariba/flag.py b/ariba/flag.py index 06b0da1b..97f8e1a2 100644 --- a/ariba/flag.py +++ b/ariba/flag.py @@ -10,6 +10,7 @@ class Error (Exception): pass 'scaffold_graph_bad', 'assembly_fail', 'variants_suggest_collapsed_repeat', + 'hit_both_strands', ] @@ -26,7 +27,7 @@ def set_flag(self, n): for f in self.flags: if flag_bits[f] & n != 0: self.flags[f] = True - + def add(self, f): self.flags[f] = True diff --git a/ariba/tests/flag_test.py b/ariba/tests/flag_test.py index e70c4374..b187db1c 100644 --- a/ariba/tests/flag_test.py +++ b/ariba/tests/flag_test.py @@ -8,14 +8,14 @@ class TestFlag(unittest.TestCase): def test_init_and_to_number(self): '''Test __init__ and to_number''' - for i in range(128): + for i in range(512): f = flag.Flag(i) self.assertEqual(f.to_number(), i) def test_set_flag(self): '''Test set_flag''' - for i in range(128): + for i in range(512): f = flag.Flag() f.set_flag(i) self.assertEqual(f.to_number(), i) @@ -24,7 +24,7 @@ def test_set_flag(self): def test_add(self): '''Test add''' f = flag.Flag() - expected = [1, 3, 7, 15, 31, 63, 127, 255] + expected = [1, 3, 7, 15, 31, 63, 127, 255, 511] for i in range(len(flag.flags_in_order)): f.add(flag.flags_in_order[i]) self.assertEqual(f.to_number(), expected[i]) @@ -32,7 +32,7 @@ def test_add(self): def test_str(self): '''Test __str__''' - for i in range(256): + for i in range(512): f = flag.Flag(i) self.assertEqual(str(f), str(i)) @@ -49,6 +49,7 @@ def test_to_long_str(self): '[ ] scaffold_graph_bad', '[ ] assembly_fail', '[ ] variants_suggest_collapsed_repeat', + '[ ] hit_both_strands', ]) self.assertEqual(expected, f.to_long_string()) From 99aec36e901308c799135480f1e2b9ec242c25ac Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Wed, 10 Jun 2015 16:07:17 +0100 Subject: [PATCH 2/4] Use new flag hit_both_strands --- ariba/cluster.py | 3 ++- ariba/summary.py | 8 ++++---- ariba/tests/summary_test.py | 7 ++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ariba/cluster.py b/ariba/cluster.py index 644144cf..caf6dc1b 100644 --- a/ariba/cluster.py +++ b/ariba/cluster.py @@ -434,8 +434,9 @@ def _fix_contig_orientation(self): os.unlink(tmp_coords) in_both = to_revcomp.intersection(not_revcomp) for name in in_both: - print('WARNING: hits to both strands of gene for scaffold. Interpretation of any variants cannot be trusted', name, file=sys.stderr) + print('WARNING: hits to both strands of gene for scaffold. Interpretation of any variants cannot be trusted for this scaffold:', name, file=sys.stderr) to_revcomp.remove(name) + self.status_flag.add('hit_both_strands') f = pyfastaq.utils.open_file_write(self.final_assembly_fa) seq_reader = pyfastaq.sequences.file_reader(self.gapfilled_scaffolds) diff --git a/ariba/summary.py b/ariba/summary.py index e3235fd8..0262524c 100644 --- a/ariba/summary.py +++ b/ariba/summary.py @@ -70,7 +70,7 @@ def _load_fofn(self, fofn): filenames = [x.rstrip() for x in f.readlines()] pyfastaq.utils.close(f) return filenames - + def _check_files_exist(self): for fname in self.filenames: @@ -119,7 +119,7 @@ def _to_summary_number(self, l): if f.has('assembly_fail') or not f.has('gene_assembled') or self._pc_id_of_longest(l) <= self.min_id: return 0 - if not f.has('complete_orf'): + if f.has('hit_both_strands') or (not f.has('complete_orf')): return 1 if f.has('unique_contig') and f.has('gene_assembled_into_one_contig'): @@ -185,11 +185,11 @@ def _write_tsv(self): for row in self.rows_out: print('\t'.join([str(x) for x in row]), file=f) pyfastaq.utils.close(f) - + def _write_xls(self): workbook = openpyxl.Workbook() - worksheet = workbook.worksheets[0] + worksheet = workbook.worksheets[0] worksheet.title = 'ARIBA_summary' for row in self.rows_out: worksheet.append(row) diff --git a/ariba/tests/summary_test.py b/ariba/tests/summary_test.py index 4ce793a2..32740d6a 100644 --- a/ariba/tests/summary_test.py +++ b/ariba/tests/summary_test.py @@ -62,7 +62,7 @@ def test_load_file(self): ] dicts = [s._line2dict('\t'.join(x)) for x in lines] expected = {'gene1': [dicts[0]], 'gene2': dicts[1:3], 'gene3': [dicts[3]]} - got = s._load_file(infile) + got = s._load_file(infile) self.assertEqual(expected, got) @@ -73,6 +73,7 @@ def test_to_summary_number(self): (0, 0), (64, 0), (7, 1), + (259, 1), (15, 2), (27, 3), ] @@ -97,7 +98,7 @@ def test_gather_output_rows(self): ['filename', 'gene1', 'gene2', 'gene3'], [infiles[0], 3, 2, 0], [infiles[1], 3, 0, 3], - ] + ] self.assertEqual(expected, s.rows_out) @@ -110,7 +111,7 @@ def test_filter_output_rows(self): ['file2', 1, 0, 3], ['file3', 2, 0, 4], ] - + expected = [ ['filename', 'gene1', 'gene3'], ['file2', 1, 3], From 31696ea29a38c64c19a69f6f94c1513f59971e96 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Wed, 10 Jun 2015 16:11:12 +0100 Subject: [PATCH 3/4] nose required by tests, not install --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 27543e55..3a01b683 100644 --- a/setup.py +++ b/setup.py @@ -15,8 +15,8 @@ url='https://github.com/sanger-pathogens/ariba', scripts=glob.glob('scripts/*'), test_suite='nose.collector', + tests_require=['nose >= 1.3'], install_requires=[ - 'nose >= 1.3', 'openpyxl', 'pyfastaq >= 3.0.1', 'pysam >= 0.8.1', From 5faa9ff5fbddfda8b8b3aa673a7e3b045d210109 Mon Sep 17 00:00:00 2001 From: Martin Hunt Date: Wed, 10 Jun 2015 16:11:38 +0100 Subject: [PATCH 4/4] Version bump --- ariba/common.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ariba/common.py b/ariba/common.py index 1dc9f338..16d0e4cf 100644 --- a/ariba/common.py +++ b/ariba/common.py @@ -1,7 +1,7 @@ import sys import subprocess -version = '0.3.2' +version = '0.4.0' def syscall(cmd, allow_fail=False, verbose=False): if verbose: diff --git a/setup.py b/setup.py index 3a01b683..740e5441 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='ariba', - version='0.3.2', + version='0.4.0', description='ARIBA: Antibiotic Resistance Identification By Assembly', packages = find_packages(), author='Martin Hunt',