Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 4, 2024
1 parent f132c52 commit 1276c1a
Show file tree
Hide file tree
Showing 16 changed files with 299 additions and 264 deletions.
2 changes: 1 addition & 1 deletion examples/asr/export/transducer/infer_transducer_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def assert_args(args):

def export_model_if_required(args, nemo_model):
if args.export:
nemo_model.export("temp_rnnt.onnx", onnx_opset_version=18) #, verbose=True)
nemo_model.export("temp_rnnt.onnx", onnx_opset_version=18) # , verbose=True)
args.onnx_encoder = "encoder-temp_rnnt.onnx"
args.onnx_decoder = "decoder_joint-temp_rnnt.onnx"

Expand Down
17 changes: 10 additions & 7 deletions examples/asr/export/transducer/infer_transducer_trt.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,16 @@
import os
import tempfile
from argparse import ArgumentParser
import tensorrt as trt # you must import trt before nemo import

import tensorrt as trt # you must import trt before nemo import
import torch
from tqdm import tqdm
from trt_greedy_batched_rnnt import TRTGreedyBatchedRNNTInfer

from nemo.collections.asr.metrics.wer import word_error_rate
from nemo.collections.asr.models import ASRModel
from trt_greedy_batched_rnnt import TRTGreedyBatchedRNNTInfer
from nemo.utils import logging


"""
Script to compare the outputs of a NeMo Pytorch based RNNT Model and its ONNX exported representation.
Expand Down Expand Up @@ -155,7 +154,7 @@ def main():

audio_filepath = resolve_audio_filepaths(args)

audio_filepath = audio_filepath[:16 * 4]
audio_filepath = audio_filepath[: 16 * 4]

# Evaluate Pytorch Model (CPU/GPU)
torch.cuda.cudart().cudaProfilerStart()
Expand All @@ -165,10 +164,12 @@ def main():
for at in actual_transcripts:
print(at)
torch.cuda.cudart().cudaProfilerStop()
import sys; sys.exit()
import sys

sys.exit()
assert False

#torch.cuda.empty_cache() # to empty all cuda memory
# torch.cuda.empty_cache() # to empty all cuda memory

# Evaluate TRT model
with tempfile.TemporaryDirectory() as tmpdir:
Expand All @@ -189,7 +190,9 @@ def main():
if i == 2:
torch.cuda.cudart().cudaProfilerStart()
if i == 5:
import sys; sys.exit()
import sys

sys.exit()
torch.cuda.cudart().cudaProfilerStop()
torch.cuda.nvtx.range_push("iteration")
# if i < 36:
Expand Down
17 changes: 4 additions & 13 deletions examples/asr/export/transducer/reproduce_length_error/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,19 @@
This script runs an identity model with ONNX-Runtime and TensorRT,
then compares outputs.
"""
import numpy as np
from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx
from polygraphy.backend.trt import EngineFromNetwork, NetworkFromOnnxPath, TrtRunner
from polygraphy.backend.trt.config import CreateConfig
from polygraphy.backend.trt.profile import Profile
from polygraphy.comparator import Comparator, CompareFunc

import numpy as np


def main():
build_onnxrt_session = SessionFromOnnx("just_length_computation2.onnx")

config = CreateConfig(fp16=True, profiles=[
(Profile()
.add("length", min=(1, ), opt=(16, ), max=(32, ))
)
]
)
build_engine = EngineFromNetwork(
NetworkFromOnnxPath("just_length_computation2.onnx"),
config
)
config = CreateConfig(fp16=True, profiles=[(Profile().add("length", min=(1,), opt=(16,), max=(32,)))])
build_engine = EngineFromNetwork(NetworkFromOnnxPath("just_length_computation2.onnx"), config)

runners = [
TrtRunner(build_engine),
Expand All @@ -53,6 +44,6 @@ def main():
assert bool(Comparator.compare_accuracy(run_results, compare_func=CompareFunc.simple(atol=1e-8)))
run_results.save("inference_results.json")


if __name__ == "__main__":
main()

46 changes: 26 additions & 20 deletions examples/asr/export/transducer/reproduce_length_error/run_full.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import os

import numpy as np
from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx
from polygraphy.backend.trt import EngineFromBytes, EngineFromNetwork, NetworkFromOnnxPath, save_engine, TrtRunner
from polygraphy.backend.trt import EngineFromBytes, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner, save_engine
from polygraphy.backend.trt.config import CreateConfig
from polygraphy.backend.trt.profile import Profile
from polygraphy.comparator import Comparator, CompareFunc

import numpy as np

import os

from nemo.collections.asr.models import ASRModel


Expand All @@ -19,20 +18,20 @@ def main():
build_onnxrt_session = SessionFromOnnx("encoder-temp_rnnt.onnx")

if not os.path.exists("encoder-temp_rnnt.engine"):
timing_cache="trt.cache"

config = CreateConfig(fp16=True, profiles=[
(Profile()
.add("audio_signal", min=(1, 80, 25), opt=(16, 80, 1024), max=(32, 80, 4096))
.add("length", min=(1, ), opt=(16, ), max=(32, ))
)
]
timing_cache = "trt.cache"

config = CreateConfig(
fp16=True,
profiles=[
(
Profile()
.add("audio_signal", min=(1, 80, 25), opt=(16, 80, 1024), max=(32, 80, 4096))
.add("length", min=(1,), opt=(16,), max=(32,))
)
],
)

build_engine = EngineFromNetwork(
NetworkFromOnnxPath("encoder-temp_rnnt.onnx"),
config,
timing_cache)
build_engine = EngineFromNetwork(NetworkFromOnnxPath("encoder-temp_rnnt.onnx"), config, timing_cache)

engine = build_engine()
save_engine(engine, "encoder-temp_rnnt.engine")
Expand All @@ -44,12 +43,19 @@ def main():
OnnxrtRunner(build_onnxrt_session),
]

run_results = Comparator.run(runners, data_loader=[{"audio_signal": np.zeros((1, 80, 2135), dtype=np.float32), "length": np.array([2135], dtype=np.int64)}])
run_results = Comparator.run(
runners,
data_loader=[
{"audio_signal": np.zeros((1, 80, 2135), dtype=np.float32), "length": np.array([2135], dtype=np.int64)}
],
)

trt_runner_name, onnxrt_runner_name = list(run_results.keys())

assert run_results[trt_runner_name][0]["encoded_lengths"] == run_results[onnxrt_runner_name][0]["encoded_lengths"], f'{run_results[trt_runner_name][0]["encoded_lengths"]} vs. {run_results[onnxrt_runner_name][0]["encoded_lengths"]}'
assert (
run_results[trt_runner_name][0]["encoded_lengths"] == run_results[onnxrt_runner_name][0]["encoded_lengths"]
), f'{run_results[trt_runner_name][0]["encoded_lengths"]} vs. {run_results[onnxrt_runner_name][0]["encoded_lengths"]}'


if __name__ == "__main__":
main()

Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import os
import subprocess

import numpy as np
from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx
from polygraphy.backend.trt import EngineFromBytes, EngineFromNetwork, NetworkFromOnnxPath, save_engine, TrtRunner
from polygraphy.backend.trt import EngineFromBytes, EngineFromNetwork, NetworkFromOnnxPath, TrtRunner, save_engine
from polygraphy.backend.trt.config import CreateConfig
from polygraphy.backend.trt.profile import Profile
from polygraphy.comparator import Comparator, CompareFunc

import numpy as np

import os
import subprocess

from nemo.collections.asr.models import ASRModel


Expand All @@ -17,29 +16,21 @@ def main():
nemo_model = ASRModel.from_pretrained("stt_en_conformer_transducer_large", map_location='cuda')
nemo_model.export("temp_rnnt.onnx", onnx_opset_version=18)

subprocess.check_call("polygraphy surgeon extract encoder-temp_rnnt.onnx --inputs length:auto:auto --outputs encoded_lengths:auto -o just_length_computation.onnx", shell=True)
subprocess.check_call(
"polygraphy surgeon extract encoder-temp_rnnt.onnx --inputs length:auto:auto --outputs encoded_lengths:auto -o just_length_computation.onnx",
shell=True,
)

build_onnxrt_session = SessionFromOnnx("just_length_computation.onnx")
build_onnxrt_cuda_session = SessionFromOnnx("just_length_computation.onnx", ["cuda"])

config = CreateConfig(fp16=True, profiles=[
(Profile()
.add("length", min=(1, ), opt=(16, ), max=(32, ))
)
]
)
config = CreateConfig(fp16=True, profiles=[(Profile().add("length", min=(1,), opt=(16,), max=(32,)))])

build_engine = EngineFromNetwork(
NetworkFromOnnxPath("just_length_computation.onnx"),
config)
build_engine = EngineFromNetwork(NetworkFromOnnxPath("just_length_computation.onnx"), config)

engine = build_engine()

runners = [
TrtRunner(engine),
OnnxrtRunner(build_onnxrt_session),
OnnxrtRunner(build_onnxrt_cuda_session)
]
runners = [TrtRunner(engine), OnnxrtRunner(build_onnxrt_session), OnnxrtRunner(build_onnxrt_cuda_session)]

run_results = Comparator.run(runners, data_loader=[{"length": np.array([2135], dtype=np.int64)}])

Expand All @@ -49,7 +40,10 @@ def main():
print(run_results[onnxrt_runner_name][0]["encoded_lengths"])
print(run_results[onnxrt_cuda_runner_name][0]["encoded_lengths"])

assert run_results[trt_runner_name][0]["encoded_lengths"] == run_results[onnxrt_runner_name][0]["encoded_lengths"], f'{run_results[trt_runner_name][0]["encoded_lengths"]} vs. {run_results[onnxrt_runner_name][0]["encoded_lengths"]}'
assert (
run_results[trt_runner_name][0]["encoded_lengths"] == run_results[onnxrt_runner_name][0]["encoded_lengths"]
), f'{run_results[trt_runner_name][0]["encoded_lengths"]} vs. {run_results[onnxrt_runner_name][0]["encoded_lengths"]}'


if __name__ == "__main__":
main()
6 changes: 5 additions & 1 deletion examples/asr/export/transducer/shrink_graph.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import onnx
import onnx_graphsurgeon as gs


def main():
onnx_model = onnx.load("/home/dgalvez/scratch/code/asr/nemo_conformer_benchmark/NeMo/examples/asr/export/transducer/encoder-temp_rnnt.onnx")
onnx_model = onnx.load(
"/home/dgalvez/scratch/code/asr/nemo_conformer_benchmark/NeMo/examples/asr/export/transducer/encoder-temp_rnnt.onnx"
)
graph = gs.import_onnx(onnx_model)

for node in graph.nodes:
Expand All @@ -12,6 +15,7 @@ def main():
graph.cleanup()
graph.toposort()


if __name__ == "__main__":
main()

Expand Down
Loading

0 comments on commit 1276c1a

Please sign in to comment.