diff --git a/inference/benchmarks/bertLarge/README.md b/inference/benchmarks/bertLarge/README.md index e7fc57df9..96c5cce94 100644 --- a/inference/benchmarks/bertLarge/README.md +++ b/inference/benchmarks/bertLarge/README.md @@ -58,6 +58,25 @@ bert_reference_results_text_md5.txt - XTCL 2.1 +#### 2.3 天数智芯 MR-100 + +- ##### 硬件环境 + - 机器、加速卡型号: MR-100 + +- ##### 软件环境 + - OS版本:Ubuntu 20.04 + - OS kernel版本: 5.15.0-89-generic + - 加速卡驱动版本:3.2.0 + - Docker 版本:24.0.4 + - 依赖软件版本: + - torch-1.13.1+corex.3.2.1 + - onnxsim + +- 推理工具包 + + - IXRT: ixrt-0.8.0+corex.3.2.1 + + ### 4. 运行情况(BERT-Large) * 指标列表 @@ -83,3 +102,5 @@ bert_reference_results_text_md5.txt | tensorrt | fp16 | 32 | 1283.9 | 257.3 | 260.4 | 408.3 | 418.1 | 45.3% | 0.600/0.638 | 17.4/40.0 | | tensorrt | fp32 | 32 | 1868.8 | 150.4 | 152.2 | 190.4 | 194.1 | 42.0% | 0.638/0.638 | 16.9/40.0 | | kunlunxin_xtcl| W32A16 | 32 |/ | / | / | / | / | / | 0.638/0.638| /| +| iluvatar_ixrt| fp16 | 32 |/ | / | / | / | / | / | 0.599/0.638| /| + diff --git a/inference/benchmarks/bertLarge/pytorch/iluvatar_requirements.txt b/inference/benchmarks/bertLarge/pytorch/iluvatar_requirements.txt new file mode 100644 index 000000000..ea980c9cd --- /dev/null +++ b/inference/benchmarks/bertLarge/pytorch/iluvatar_requirements.txt @@ -0,0 +1,2 @@ +transformers +onnxsim \ No newline at end of file diff --git a/inference/configs/bertLarge/vendor_config/iluvatar_configurations.yaml b/inference/configs/bertLarge/vendor_config/iluvatar_configurations.yaml new file mode 100644 index 000000000..3f1c5fec7 --- /dev/null +++ b/inference/configs/bertLarge/vendor_config/iluvatar_configurations.yaml @@ -0,0 +1,5 @@ +ixrt_tmp_path: iluvatar_tmp/bertLarge.trt +compiler: ixrt +# no_validation: true +has_dynamic_axis: false +torchtrt_full_compile: true \ No newline at end of file diff --git a/inference/docker_images/iluvatar/pytorch/packages/README.md b/inference/docker_images/iluvatar/pytorch/packages/README.md index 224dbe2af..9314892bc 100644 --- a/inference/docker_images/iluvatar/pytorch/packages/README.md +++ b/inference/docker_images/iluvatar/pytorch/packages/README.md @@ -2,7 +2,7 @@ >联系邮箱: contact-us@iluvatar.com -ixrt-0.7.0+corex.latest.version-cp310-cp310-linux_x86_64.whl +ixrt-0.8.0+corex.latest.version-cp310-cp310-linux_x86_64.whl torchvision-0.14.1+corex.3.2.1.20231006.892-cp310-cp310-linux_x86_64.whl diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py index 44fc85c4b..abb5cd17e 100644 --- a/inference/inference_engine/iluvatar/ixrt.py +++ b/inference/inference_engine/iluvatar/ixrt.py @@ -9,7 +9,6 @@ import time import subprocess - class InferModel: class HostDeviceMem(object): @@ -66,27 +65,32 @@ def __init__(self, config, onnx_path, model): def build_engine(self, config, onnx_path): if config.exist_compiler_path is None: - trt_path = config.log_dir + "/" + config.ixrt_tmp_path + ixrt_path = config.log_dir + "/" + config.ixrt_tmp_path - dir_trt_path = os.path.dirname(trt_path) + dir_trt_path = os.path.dirname(ixrt_path) os.makedirs(dir_trt_path, exist_ok=True) time.sleep(10) - trtexec_cmd = "ixrtexec --onnx=" + onnx_path + " --save_engine=" + trt_path + onnxsim_cmd = f"onnxsim {onnx_path} {onnx_path}" + + onnxsim_cmd = subprocess.Popen(onnxsim_cmd, shell=True) + onnxsim_cmd.wait() + + ixrtexec_cmd = "ixrtexec --onnx=" + onnx_path + " --save_engine=" + ixrt_path if config.fp16: - trtexec_cmd += " --precision fp16" + ixrtexec_cmd += " --precision fp16" if config.has_dynamic_axis: - trtexec_cmd += " --minShapes=" + config.minShapes - trtexec_cmd += " --optShapes=" + config.optShapes - trtexec_cmd += " --maxShapes=" + config.maxShapes + ixrtexec_cmd += " --minShapes=" + config.minShapes + ixrtexec_cmd += " --optShapes=" + config.optShapes + ixrtexec_cmd += " --maxShapes=" + config.maxShapes - p = subprocess.Popen(trtexec_cmd, shell=True) + p = subprocess.Popen(ixrtexec_cmd, shell=True) p.wait() else: - trt_path = config.exist_compiler_path + ixrt_path = config.exist_compiler_path - with open(trt_path, "rb") as f: + with open(ixrt_path, "rb") as f: return self.runtime.deserialize_cuda_engine(f.read()) def allocate_buffers(self, engine):