Skip to content

Commit

Permalink
fix tpm output
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewprzh committed Apr 11, 2024
1 parent d1bb2cd commit fbde9bd
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 25 deletions.
4 changes: 2 additions & 2 deletions src/dataset_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ def merge_assignments(self, sample, aggregator, chr_ids):
for p in aggregator.global_counter.counters:
merge_files(
[rreplace(p.output_counts_file_name, sample.prefix, sample.prefix + "_" + chr_id) for chr_id in chr_ids],
p.output_file,
p.get_output_file_handler(),
stats_file_names=[rreplace(p.output_stats_file_name, sample.prefix, sample.prefix + "_" + chr_id) for
chr_id in chr_ids]
if p.output_stats_file_name else None,
Expand All @@ -641,7 +641,7 @@ def merge_transcript_models(self, label, aggregator, chr_ids, gff_printer):
gff_printer.out_r2t, copy_header=False)
for p in aggregator.transcript_model_global_counter.counters:
merge_files([rreplace(p.output_counts_file_name, label, label + "_" + chr_id) for chr_id in chr_ids],
p.output_file,
p.get_output_file_handler(),
stats_file_names=[rreplace(p.output_stats_file_name, label, label + "_" + chr_id) for chr_id in
chr_ids]
if p.output_stats_file_name else None,
Expand Down
48 changes: 25 additions & 23 deletions src/long_read_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,14 @@ class AbstractCounter:
def __init__(self, output_prefix, ignore_read_groups=False, output_zeroes=True):
self.ignore_read_groups = ignore_read_groups
self.output_counts_file_name = output_prefix + "_counts.tsv"
self.output_file = open(self.output_counts_file_name, "w")
self.output_file = self.output_counts_file_name
open(self.output_file, "w").close()
self.output_tpm_file_name = output_prefix + "_tpm.tsv"
self.output_zeroes = output_zeroes
self.output_stats_file_name = None

def __del__(self):
self.output_file.close()
def get_output_file_handler(self):
return open(self.output_file, "a")

def add_read_info(self, read_assignment):
raise NotImplementedError()
Expand Down Expand Up @@ -250,24 +251,25 @@ def dump(self):
continue
self.feature_counter[group_id][feature_id] = 0.0

self.output_file.write(self.format_header(all_groups))
for feature_id in all_features:
if self.ignore_read_groups:
count = self.feature_counter[all_groups[0]][feature_id]
if not self.output_zeroes and count == 0:
continue
total_counts[all_groups[0]] += count
self.output_file.write("%s\t%.2f\n" % (feature_id, count))
else:
row_count = 0
for group_id in all_groups:
count = self.feature_counter[group_id][feature_id]
total_counts[group_id] += count
row_count += count
if not self.output_zeroes and row_count == 0:
continue
count_values = [self.feature_counter[group_id][feature_id] for group_id in all_groups]
self.output_file.write("%s\t%s\n" % (feature_id, "\t".join(["%.2f" % c for c in count_values])))
with self.get_output_file_handler() as output_file:
output_file.write(self.format_header(all_groups))
for feature_id in all_features:
if self.ignore_read_groups:
count = self.feature_counter[all_groups[0]][feature_id]
if not self.output_zeroes and count == 0:
continue
total_counts[all_groups[0]] += count
output_file.write("%s\t%.2f\n" % (feature_id, count))
else:
row_count = 0
for group_id in all_groups:
count = self.feature_counter[group_id][feature_id]
total_counts[group_id] += count
row_count += count
if not self.output_zeroes and row_count == 0:
continue
count_values = [self.feature_counter[group_id][feature_id] for group_id in all_groups]
output_file.write("%s\t%s\n" % (feature_id, "\t".join(["%.2f" % c for c in count_values])))

if self.ignore_read_groups:
with open(self.output_stats_file_name, "w") as f:
Expand All @@ -280,8 +282,8 @@ def convert_counts_to_tpm(self):
with open(self.output_counts_file_name) as f:
for line in f:
if line.startswith('_'): break
fs = line.split()
if line.startswith('#'): continue
fs = line.rstrip().split('\t')
if self.ignore_read_groups:
total_counts[AbstractReadGrouper.default_group_id] += float(fs[1])
else:
Expand All @@ -300,7 +302,7 @@ def convert_counts_to_tpm(self):
if line.startswith('#'):
outf.write(line.replace("count", "TPM"))
continue
fs = line.split()
fs = line.rstrip().split('\t')
if self.ignore_read_groups:
feature_id, count = fs[0], float(fs[1])
tpm = scale_factors[AbstractReadGrouper.default_group_id] * count
Expand Down

0 comments on commit fbde9bd

Please sign in to comment.