Skip to content

Commit

Permalink
Merge pull request #123 from martinghunt/summary_improvements
Browse files Browse the repository at this point in the history
When assembled is no, everything else should be no or NA
  • Loading branch information
martinghunt authored Aug 16, 2016
2 parents 9431797 + 5633070 commit b6d5ae7
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 26 deletions.
62 changes: 37 additions & 25 deletions ariba/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,31 +99,43 @@ def _gather_unfiltered_output_data(self):
if cluster.name not in self.all_potential_columns:
self.all_potential_columns[cluster.name] = {'summary' : set(), 'groups': set(), 'vars': set()}

this_cluster_dict = {'summary': copy.copy(cluster.summary), 'groups': {}, 'vars': {}}
seen_groups = {}

for variant in cluster.variants:
if self.show_vars:
this_cluster_dict['vars'][variant.var_string] = 'yes' if variant.het_percent is None else 'het'
if variant.het_percent is not None:
this_cluster_dict['vars'][variant.var_string + '.%'] = variant.het_percent

if self.show_var_groups and variant.var_group != '.':
if variant.var_group not in seen_groups:
seen_groups[variant.var_group] = {'yes': 0, 'het': 0}

if variant.het_percent is None:
seen_groups[variant.var_group]['yes'] += 1
this_cluster_dict['groups'][variant.var_group] = 'yes'
else:
seen_groups[variant.var_group]['het'] += 1
this_cluster_dict['groups'][variant.var_group] = 'het'
this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent

for group, d in seen_groups.items():
if d['het'] > 0 and d['het'] + d['yes'] > 1:
this_cluster_dict['groups'][group] = 'yes_multi_het'
this_cluster_dict['groups'][group + '.%'] = 'NA'
this_cluster_dict = {'groups': {}, 'vars': {}}

if cluster.summary['assembled'] == 'no':
this_cluster_dict['summary'] = {
'assembled': 'no',
'known_var': 'NA',
'match': 'no',
'novel_var': 'NA',
'pct_id': 'NA',
'ref_seq': 'NA'
}
else:
this_cluster_dict['summary'] = copy.copy(cluster.summary)
seen_groups = {}

for variant in cluster.variants:
if self.show_vars:
this_cluster_dict['vars'][variant.var_string] = 'yes' if variant.het_percent is None else 'het'
if variant.het_percent is not None:
this_cluster_dict['vars'][variant.var_string + '.%'] = variant.het_percent

if self.show_var_groups and variant.var_group != '.':
if variant.var_group not in seen_groups:
seen_groups[variant.var_group] = {'yes': 0, 'het': 0}

if variant.het_percent is None:
seen_groups[variant.var_group]['yes'] += 1
this_cluster_dict['groups'][variant.var_group] = 'yes'
else:
seen_groups[variant.var_group]['het'] += 1
this_cluster_dict['groups'][variant.var_group] = 'het'
this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent

for group, d in seen_groups.items():
if d['het'] > 0 and d['het'] + d['yes'] > 1:
this_cluster_dict['groups'][group] = 'yes_multi_het'
this_cluster_dict['groups'][group + '.%'] = 'NA'

for x in this_cluster_dict:
self.all_potential_columns[cluster.name][x].update(set(this_cluster_dict[x].keys()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0
noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A42T 1 A42T SNP 42 42 A 84 84 T 17 . 17 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
presence_absence_ref1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 0 SNP p A10V . A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
presence_absence_ref2 1 0 528 232 presence_absence2 1005 554 99.1 presence_absence2.scaffold.1 1032 22.3 0 . p . 0 V175L NONSYN 522 522 G 265 265 C 36 . 36 . Description foo bar
26 changes: 25 additions & 1 deletion ariba/tests/summary_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,18 @@ def test_gather_unfiltered_output_data(self):
},
'groups': {},
'vars': {},
},
'presence_absence2': {
'summary': {
'assembled': 'no',
'known_var': 'NA',
'match': 'no',
'novel_var': 'NA',
'pct_id': 'NA',
'ref_seq': 'NA'
},
'groups': {},
'vars': {}
}
},
infiles[1]: {
Expand Down Expand Up @@ -145,7 +157,7 @@ def test_gather_unfiltered_output_data(self):
},
'groups': {},
'vars': {}
}
},
}
}

Expand Down Expand Up @@ -185,6 +197,18 @@ def test_gather_unfiltered_output_data(self):
},
'groups': set(),
'vars': set()
},
'presence_absence2': {
'summary': {
'assembled',
'known_var',
'match',
'novel_var',
'pct_id',
'ref_seq'
},
'groups': set(),
'vars': set()
}
}

Expand Down

0 comments on commit b6d5ae7

Please sign in to comment.