Merge pull request #123 from martinghunt/summary_improvements

When assembled is no, everything else should be no or NA
sanger-pathogens · Aug 16, 2016 · b6d5ae7 · b6d5ae7
2 parents 9431797 + 5633070
commit b6d5ae7
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 26 deletions.
diff --git a/ariba/summary.py b/ariba/summary.py
@@ -99,31 +99,43 @@ def _gather_unfiltered_output_data(self):
                 if cluster.name not in self.all_potential_columns:
                     self.all_potential_columns[cluster.name] = {'summary' : set(), 'groups': set(), 'vars': set()}
 
-                this_cluster_dict = {'summary': copy.copy(cluster.summary), 'groups': {}, 'vars': {}}
-                seen_groups = {}
-
-                for variant in cluster.variants:
-                    if self.show_vars:
-                        this_cluster_dict['vars'][variant.var_string] = 'yes' if variant.het_percent is None else 'het'
-                        if variant.het_percent is not None:
-                            this_cluster_dict['vars'][variant.var_string + '.%'] = variant.het_percent
-
-                    if self.show_var_groups and variant.var_group != '.':
-                        if variant.var_group not in seen_groups:
-                            seen_groups[variant.var_group] = {'yes': 0, 'het': 0}
-
-                        if variant.het_percent is None:
-                            seen_groups[variant.var_group]['yes'] += 1
-                            this_cluster_dict['groups'][variant.var_group] = 'yes'
-                        else:
-                            seen_groups[variant.var_group]['het'] += 1
-                            this_cluster_dict['groups'][variant.var_group] = 'het'
-                            this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent
-
-                for group, d in seen_groups.items():
-                    if d['het'] > 0 and d['het'] + d['yes'] > 1:
-                        this_cluster_dict['groups'][group] = 'yes_multi_het'
-                        this_cluster_dict['groups'][group + '.%'] = 'NA'
+                this_cluster_dict = {'groups': {}, 'vars': {}}
+
+                if cluster.summary['assembled'] == 'no':
+                    this_cluster_dict['summary'] = {
+                            'assembled': 'no',
+                            'known_var': 'NA',
+                            'match': 'no',
+                            'novel_var': 'NA',
+                            'pct_id': 'NA',
+                            'ref_seq': 'NA'
+                    }
+                else:
+                    this_cluster_dict['summary'] = copy.copy(cluster.summary)
+                    seen_groups = {}
+
+                    for variant in cluster.variants:
+                        if self.show_vars:
+                            this_cluster_dict['vars'][variant.var_string] = 'yes' if variant.het_percent is None else 'het'
+                            if variant.het_percent is not None:
+                                this_cluster_dict['vars'][variant.var_string + '.%'] = variant.het_percent
+
+                        if self.show_var_groups and variant.var_group != '.':
+                            if variant.var_group not in seen_groups:
+                                seen_groups[variant.var_group] = {'yes': 0, 'het': 0}
+
+                            if variant.het_percent is None:
+                                seen_groups[variant.var_group]['yes'] += 1
+                                this_cluster_dict['groups'][variant.var_group] = 'yes'
+                            else:
+                                seen_groups[variant.var_group]['het'] += 1
+                                this_cluster_dict['groups'][variant.var_group] = 'het'
+                                this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent
+
+                    for group, d in seen_groups.items():
+                        if d['het'] > 0 and d['het'] + d['yes'] > 1:
+                            this_cluster_dict['groups'][group] = 'yes_multi_het'
+                            this_cluster_dict['groups'][group + '.%'] = 'NA'
 
                 for x in this_cluster_dict:
                     self.all_potential_columns[cluster.name][x].update(set(this_cluster_dict[x].keys()))

diff --git a/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv b/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv
@@ -3,3 +3,4 @@ noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	10.0
 noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A42T	1	A42T	SNP	42	42	A	84	84	T	17	.	17	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
 noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
 presence_absence_ref1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	20.1	0	SNP	p	A10V	.	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
+presence_absence_ref2	1	0	528	232	presence_absence2	1005	554	99.1	presence_absence2.scaffold.1	1032	22.3	0	.	p	.	0	V175L	NONSYN	522	522	G	265	265	C	36	.	36	.	Description foo bar
diff --git a/ariba/tests/summary_test.py b/ariba/tests/summary_test.py
@@ -108,6 +108,18 @@ def test_gather_unfiltered_output_data(self):
                     },
                     'groups': {},
                     'vars': {},
+                },
+                'presence_absence2': {
+                    'summary': {
+                            'assembled': 'no',
+                            'known_var': 'NA',
+                            'match': 'no',
+                            'novel_var': 'NA',
+                            'pct_id': 'NA',
+                            'ref_seq': 'NA'
+                    },
+                    'groups': {},
+                    'vars': {}
                 }
             },
             infiles[1]: {
@@ -145,7 +157,7 @@ def test_gather_unfiltered_output_data(self):
                     },
                     'groups': {},
                     'vars': {}
-                }
+                },
             }
         }
 
@@ -185,6 +197,18 @@ def test_gather_unfiltered_output_data(self):
                 },
                 'groups': set(),
                 'vars': set()
+            },
+            'presence_absence2': {
+                'summary': {
+                    'assembled',
+                    'known_var',
+                    'match',
+                    'novel_var',
+                    'pct_id',
+                    'ref_seq'
+                },
+                'groups': set(),
+                'vars': set()
             }
         }