Skip to content

Commit

Permalink
Merge pull request #19 from martinghunt/report_read_counts
Browse files Browse the repository at this point in the history
Report read counts
  • Loading branch information
martinghunt committed Mar 25, 2015
2 parents a0bddc4 + 075b3b5 commit 8fe5d29
Show file tree
Hide file tree
Showing 15 changed files with 78 additions and 18 deletions.
12 changes: 12 additions & 0 deletions ariba/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,15 @@ def __init__(self,
self.percent_identities = {}


def _get_read_counts(self):
count1 = pyfastaq.tasks.count_sequences(self.reads1)
count2 = pyfastaq.tasks.count_sequences(self.reads2)
if count1 == count2:
return count1 + count2
else:
raise Error('Different number of fwd/rev reads in cluster ' + self.name + '! Cannot continue')


def _get_total_alignment_score(self, gene_name):
tmp_bam = os.path.join(self.root_dir, 'tmp.get_total_alignment_score.bam')
assert not os.path.exists(tmp_bam)
Expand Down Expand Up @@ -744,12 +753,14 @@ def _get_vcf_variant_counts(self):
def _make_report_lines(self):
self.report_lines = []
cov_per_contig = self._nucmer_hits_to_gene_cov_per_contig()
total_reads = self._get_read_counts()

if len(self.variants) == 0:
for contig in self.percent_identities:
self.report_lines.append([
self.gene.id,
self.status_flag.to_number(),
total_reads,
self.name,
len(self.gene),
cov_per_contig[contig],
Expand All @@ -767,6 +778,7 @@ def _make_report_lines(self):
self.report_lines.append([
self.gene.id,
self.status_flag.to_number(),
total_reads,
self.name,
len(self.gene),
cov_per_contig[contig],
Expand Down
1 change: 1 addition & 0 deletions ariba/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ def _write_reports(self):
columns = [
'#gene',
'flag',
'reads',
'cluster',
'gene_len',
'assembled',
Expand Down
2 changes: 2 additions & 0 deletions ariba/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class Error (Exception): pass
columns = [
'gene',
'flag',
'reads',
'cluster',
'gene_len',
'assembled',
Expand All @@ -26,6 +27,7 @@ class Error (Exception): pass
]

int_columns = [
'reads',
'gene_len',
'assembled',
'gene_start',
Expand Down
20 changes: 20 additions & 0 deletions ariba/tests/cluster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,25 @@ def test_init_fail_files_missing(self):
clean_cluster_dir(d)


def test_get_read_counts(self):
'''test _get_read_counts pass'''
cluster_dir = os.path.join(data_dir, 'cluster_test_get_read_counts')
clean_cluster_dir(cluster_dir)
c = cluster.Cluster(cluster_dir, 'name')
self.assertEqual(2, c._get_read_counts())
clean_cluster_dir(cluster_dir)


def test_get_read_counts_fail(self):
'''test _get_read_counts fail'''
cluster_dir = os.path.join(data_dir, 'cluster_test_get_read_counts_fail')
clean_cluster_dir(cluster_dir)
c = cluster.Cluster(cluster_dir, 'name')
with self.assertRaises(cluster.Error):
c._get_read_counts()
clean_cluster_dir(cluster_dir)


def test_get_total_alignment_score(self):
'''test _get_total_alignment_score'''
cluster_dir = os.path.join(data_dir, 'cluster_test_get_total_alignment_score')
Expand Down Expand Up @@ -629,6 +648,7 @@ def test_make_report_lines(self):
expected = [[
'gene',
42,
2,
'cluster_name',
39,
10,
Expand Down
2 changes: 2 additions & 0 deletions ariba/tests/data/cluster_test_get_read_counts/genes.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>name_of_gene
AAACCCGGGTTT
4 changes: 4 additions & 0 deletions ariba/tests/data/cluster_test_get_read_counts/reads_1.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@read1/1
ACGTACGT
+
IIIIIIII
4 changes: 4 additions & 0 deletions ariba/tests/data/cluster_test_get_read_counts/reads_2.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@read1/2
ACGTACGT
+
IIIIIIII
2 changes: 2 additions & 0 deletions ariba/tests/data/cluster_test_get_read_counts_fail/genes.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>name_of_gene
AAACCCGGGTTT
8 changes: 8 additions & 0 deletions ariba/tests/data/cluster_test_get_read_counts_fail/reads_1.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
@read1/1
ACGTACGT
+
IIIIIIII
@read2/1
TCATCATA
+
:D:D:D:D
4 changes: 4 additions & 0 deletions ariba/tests/data/cluster_test_get_read_counts_fail/reads_2.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@read1/2
ACGTACGT
+
IIIIIIII
2 changes: 1 addition & 1 deletion ariba/tests/data/clusters_test_write_report.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#gene flag cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
#gene flag reads cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
gene1 line1
gene2 line2
8 changes: 4 additions & 4 deletions ariba/tests/data/summary_test_gather_output_rows.in.1.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#gene flag cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
gene1 27 1 822 822 100.0 . . . . . . gene1.scaffold.1 1490 . . .
gene2 15 2 780 780 100.0 . . . . . . gene2.scaffold.2 1124 . . .
gene2 15 2 780 770 99.0 . . . . . . gene2.scaffold.3 1097 . . .
#gene flag reads cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
gene1 27 42 1 822 822 100.0 . . . . . . gene1.scaffold.1 1490 . . .
gene2 15 44 2 780 780 100.0 . . . . . . gene2.scaffold.2 1124 . . .
gene2 15 46 2 780 770 99.0 . . . . . . gene2.scaffold.3 1097 . . .
6 changes: 3 additions & 3 deletions ariba/tests/data/summary_test_gather_output_rows.in.2.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#gene flag cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
gene1 27 1 822 822 100.0 . . . . . . gene1.scaffold.1 1490 . . .
gene3 27 3 750 750 98.93 . . . . . . gene3.scaffold.1 1047 . . .
#gene flag reads cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
gene1 27 142 1 822 822 100.0 . . . . . . gene1.scaffold.1 1490 . . .
gene3 27 144 3 750 750 98.93 . . . . . . gene3.scaffold.1 1047 . . .
10 changes: 5 additions & 5 deletions ariba/tests/data/summary_test_load_file.in.tsv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#gene flag cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
gene1 27 1 822 822 100.0 . . . . . . gene1.scaffold.1 1490 . . .
gene2 15 2 780 780 100.0 . . . . . . gene2.scaffold.2 1124 . . .
gene2 15 2 780 770 99.0 . . . . . . gene2.scaffold.3 1097 . . .
gene3 187 3 750 750 98.93 SNP SYN . 318 318 C gene3.scaffold.1 1047 319 319 G
#gene flag reads cluster gene_len assembled pc_ident var_type var_effect new_aa gene_start gene_end gene_nt scaffold scaff_len scaff_start scaff_end scaff_nt
gene1 27 42 1 822 822 100.0 . . . . . . gene1.scaffold.1 1490 . . .
gene2 15 44 2 780 780 100.0 . . . . . . gene2.scaffold.2 1124 . . .
gene2 15 46 2 780 770 99.0 . . . . . . gene2.scaffold.3 1097 . . .
gene3 187 48 3 750 750 98.93 SNP SYN . 318 318 C gene3.scaffold.1 1047 319 319 G
11 changes: 6 additions & 5 deletions ariba/tests/summary_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ def test_init(self):

def test_line2dict(self):
'''Test _line2dict'''
line = '\t'.join(['gene1', '187', '3', '750', '750', '98.93', 'SNP', 'SYN', '.', '66', '66', 'A', 'gene1.scaffold.1', '1047', '67', '67', 'C'])
line = '\t'.join(['gene1', '187', '42', '3', '750', '750', '98.93', 'SNP', 'SYN', '.', '66', '66', 'A', 'gene1.scaffold.1', '1047', '67', '67', 'C'])
s = summary.Summary('out', filenames=['spam', 'eggs'])
expected = {
'gene': 'gene1',
'flag': flag.Flag(187),
'reads': 42,
'cluster': '3',
'gene_len': 750,
'assembled': 750,
Expand All @@ -51,10 +52,10 @@ def test_load_file(self):
infile = os.path.join(data_dir, 'summary_test_load_file.in.tsv')

lines = [
['gene1', '27', '1', '822', '822', '100.0', '.', '.', '.', '.', '.', '.', 'gene1.scaffold.1', '1490', '.', '.', '.'],
['gene2', '15', '2', '780', '780', '100.0', '.', '.', '.', '.', '.', '.', 'gene2.scaffold.2', '1124', '.', '.', '.'],
['gene2', '15', '2', '780', '770', '99.0', '.', '.', '.', '.', '.', '.', 'gene2.scaffold.3', '1097', '.', '.', '.'],
['gene3', '187', '3', '750', '750', '98.93', 'SNP', 'SYN', '.', '318', '318', 'C', 'gene3.scaffold.1', '1047', '319', '319', 'G']
['gene1', '27', '42', '1', '822', '822', '100.0', '.', '.', '.', '.', '.', '.', 'gene1.scaffold.1', '1490', '.', '.', '.'],
['gene2', '15', '44', '2', '780', '780', '100.0', '.', '.', '.', '.', '.', '.', 'gene2.scaffold.2', '1124', '.', '.', '.'],
['gene2', '15', '46', '2', '780', '770', '99.0', '.', '.', '.', '.', '.', '.', 'gene2.scaffold.3', '1097', '.', '.', '.'],
['gene3', '187', '48', '3', '750', '750', '98.93', 'SNP', 'SYN', '.', '318', '318', 'C', 'gene3.scaffold.1', '1047', '319', '319', 'G']
]
dicts = [s._line2dict('\t'.join(x)) for x in lines]
expected = {'gene1': [dicts[0]], 'gene2': dicts[1:3], 'gene3': [dicts[3]]}
Expand Down

0 comments on commit 8fe5d29

Please sign in to comment.