Skip to content

Commit

Permalink
Checkpoint work.
Browse files Browse the repository at this point in the history
  • Loading branch information
galv committed Dec 6, 2023
1 parent 74e09f1 commit cb6d4d9
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 20 deletions.
2 changes: 1 addition & 1 deletion examples/asr/export/transducer/infer_transducer_trt.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def main():

# Evaluate Pytorch Model (CPU/GPU)
torch.cuda.cudart().cudaProfilerStart()
with torch.inference_mode(): # , torch.autocast("cuda"):
with torch.inference_mode(), torch.autocast("cuda"):
actual_transcripts = nemo_model.transcribe(audio_filepath, batch_size=args.batch_size)[0]
print("GALVEZ:")
for at in actual_transcripts:
Expand Down
5 changes: 5 additions & 0 deletions nemo/collections/asr/models/rnnt_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,13 +293,16 @@ def transcribe(
if augmentor:
config['augmentor'] = augmentor

print("GALVEZ:augmentor=", augmentor)

temporary_datalayer = self._setup_transcribe_dataloader(config)
for test_batch in tqdm(temporary_datalayer, desc="Transcribing", disable=(not verbose)):
torch.cuda.nvtx.range_push("encoder")
encoded, encoded_len = self.forward(
input_signal=test_batch[0].to(device),
input_signal_length=test_batch[1].to(device)
)
# print("GALVEZ:encoded=", encoded)
torch.cuda.nvtx.range_pop()
torch.cuda.nvtx.range_push("decoding")
best_hyp, all_hyp = self.decoding.rnnt_decoder_predictions_tensor(
Expand All @@ -316,6 +319,8 @@ def transcribe(
else:
all_hypotheses += best_hyp

print("GALVEZ:best_hyp=", best_hyp)

del encoded
del test_batch
finally:
Expand Down
32 changes: 15 additions & 17 deletions nemo/collections/asr/parts/submodules/fast_rnnt_greedy_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ def __call__(

# This seems wrong. Do I need to negate this?
k.masked_scatter_(self.blank_mask, self.last_label)
# This doesn't seem right. Why is my last label blank? It should be SOS, right?
# I should not copy k if last_label is SOS, right?
self.last_label.copy_(k)

# It seems that I am unconditionally copying. That is wrong... I should do a masked copy
Expand Down Expand Up @@ -345,40 +347,36 @@ def __call__(
torch.cuda.cudart().cudaProfilerStart()
cu_call(cudart.cudaGraphLaunch(self.graph_exec, torch.cuda.current_stream().cuda_stream))
cu_call(cudart.cudaStreamSynchronize(torch.cuda.current_stream().cuda_stream))
torch.cuda.cudart().cudaProfilerStop()
end = time.time()
print("total time:", end - start)

torch.set_printoptions(threshold=100_000)
print("GALVEZ:", self.symbols_per_time_step_cpu)
print("GALVEZ:scores=", self.scores_cpu)
print("GALVEZ:labels=", self.labels_cpu)
print("GALVEZ:symbols_per_time_step=", self.symbols_per_time_step_cpu)


torch.cuda.nvtx.range_push("Copy data out")
# js = torch.zeros(batch_size, dtype=torch.int64, device="cpu")
j = 0
for t in range(max_time):
max_non_blank_symbols = self.symbols_per_time_step_cpu[t]
print("GALVEZ:", t, max_non_blank_symbols)
for _ in range(max_non_blank_symbols):
for i in range(batch_size):
if self.labels_cpu[j, i] == caller._blank_index:
# Ooops! This is not correct!!!!! It's continue... It's fine...
continue
hypotheses[i].y_sequence.append(self.labels_cpu[j, i])
hypotheses[i].timestep.append(t)
hypotheses[i].score += self.scores_cpu[j, i]
j += 1
# for i in range(batch_size):
# j =
# hypotheses[i].y_sequence.append(self.labels_cpu[, i])
torch.cuda.nvtx.range_pop()
torch.cuda.cudart().cudaProfilerStop()

print("NEW:", hypotheses)

# import ipdb; ipdb.set_trace()

# out_len_cpu = out_len.to("cpu")
# for i, t in product(range(batch_size), range(out_len_cpu[i])):
# # Need best_label at each seq_idx_t
# # Need time_idx_t as well, can derive via dividing by max_symbols_per_step? No, that is not true.
# # Need score, which comes from v
# j = 0
# while j < self.labels_cpu.shape[2] and self.labels_cpu[i, t, j] != caller._blank_index:
# hypotheses[i].y_sequence.append(self.labels_cpu[i, t, j])
# hypotheses[i].timestep.append(t)
# hypotheses[i].score += self.scores_cpu[i, t, j]
# j += 1
import ipdb; ipdb.set_trace()

return hypotheses
4 changes: 2 additions & 2 deletions nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,8 +603,8 @@ def forward(

with self.decoder.as_frozen(), self.joint.as_frozen():
inseq = encoder_output # [B, T, D]
# inseq = inseq[:, :5, :]
# logitlen.fill_(5)
# inseq = inseq[:, :1, :]
# logitlen.fill_(1)
if isinstance(self._greedy_decode, RNNTGreedyDecodeFast):
hypotheses = self._greedy_decode(
self, inseq, logitlen, device=inseq.device, partial_hypotheses=partial_hypotheses
Expand Down

0 comments on commit cb6d4d9

Please sign in to comment.