FlagOpen · upvenly · Dec 11, 2023 · Dec 5, 2023 · Dec 5, 2023 · shh2000
diff --git a/inference/benchmarks/bertLarge/README.md b/inference/benchmarks/bertLarge/README.md
@@ -58,6 +58,25 @@ bert_reference_results_text_md5.txt
 
    - XTCL 2.1
 
+#### 2.3 天数智芯 MR-100
+
+- ##### 硬件环境
+    - 机器、加速卡型号: MR-100
+
+- ##### 软件环境
+   - OS版本：Ubuntu 20.04
+   - OS kernel版本: 5.15.0-89-generic
+   - 加速卡驱动版本：3.2.0
+   - Docker 版本：24.0.4
+   - 依赖软件版本：
+      - torch-1.13.1+corex.3.2.1
+      - onnxsim
+
+- 推理工具包
+
+   - IXRT: ixrt-0.8.0+corex.3.2.1
+
+
 ### 4. 运行情况（BERT-Large）
 
 * 指标列表
@@ -83,3 +102,5 @@ bert_reference_results_text_md5.txt
 | tensorrt | fp16      | 32 | 1283.9   | 257.3       | 260.4      | 408.3         | 418.1          | 45.3% | 0.600/0.638 | 17.4/40.0 |
 | tensorrt | fp32   | 32 | 1868.8   | 150.4       | 152.2      | 190.4         | 194.1       | 42.0% | 0.638/0.638 | 16.9/40.0 |
 | kunlunxin_xtcl| W32A16   | 32 |/ | /          | /       | /          | /          | / | 0.638/0.638| /|
+| iluvatar_ixrt| fp16  | 32 |/ | /          | /       | /          | /          | / | 0.599/0.638| /|
+
diff --git a/inference/benchmarks/bertLarge/pytorch/iluvatar_requirements.txt b/inference/benchmarks/bertLarge/pytorch/iluvatar_requirements.txt
@@ -0,0 +1,2 @@
+transformers
+onnxsim
diff --git a/inference/configs/bertLarge/vendor_config/iluvatar_configurations.yaml b/inference/configs/bertLarge/vendor_config/iluvatar_configurations.yaml
@@ -0,0 +1,5 @@
+ixrt_tmp_path: iluvatar_tmp/bertLarge.trt
+compiler: ixrt
+# no_validation: true
+has_dynamic_axis: false
+torchtrt_full_compile: true
diff --git a/inference/docker_images/iluvatar/pytorch/packages/README.md b/inference/docker_images/iluvatar/pytorch/packages/README.md
@@ -2,7 +2,7 @@
 
 >联系邮箱: [email protected]
 
-ixrt-0.7.0+corex.latest.version-cp310-cp310-linux_x86_64.whl
+ixrt-0.8.0+corex.latest.version-cp310-cp310-linux_x86_64.whl
 
 torchvision-0.14.1+corex.3.2.1.20231006.892-cp310-cp310-linux_x86_64.whl
 

diff --git a/inference/inference_engine/iluvatar/ixrt.py b/inference/inference_engine/iluvatar/ixrt.py
@@ -9,7 +9,6 @@
 import time
 import subprocess
 
-
 class InferModel:
 
     class HostDeviceMem(object):
@@ -66,27 +65,32 @@ def __init__(self, config, onnx_path, model):
 
     def build_engine(self, config, onnx_path):
         if config.exist_compiler_path is None:
-            trt_path = config.log_dir + "/" + config.ixrt_tmp_path
+            ixrt_path = config.log_dir + "/" + config.ixrt_tmp_path
 
-            dir_trt_path = os.path.dirname(trt_path)
+            dir_trt_path = os.path.dirname(ixrt_path)
             os.makedirs(dir_trt_path, exist_ok=True)
 
             time.sleep(10)
 
-            trtexec_cmd = "ixrtexec --onnx=" + onnx_path + " --save_engine=" + trt_path
+            onnxsim_cmd = f"onnxsim {onnx_path} {onnx_path}"
+
+            onnxsim_cmd = subprocess.Popen(onnxsim_cmd, shell=True)
+            onnxsim_cmd.wait()
+
+            ixrtexec_cmd = "ixrtexec --onnx=" + onnx_path + " --save_engine=" + ixrt_path
             if config.fp16:
-                trtexec_cmd += " --precision fp16"
+                ixrtexec_cmd += " --precision fp16"
             if config.has_dynamic_axis:
-                trtexec_cmd += " --minShapes=" + config.minShapes
-                trtexec_cmd += " --optShapes=" + config.optShapes
-                trtexec_cmd += " --maxShapes=" + config.maxShapes
+                ixrtexec_cmd += " --minShapes=" + config.minShapes
+                ixrtexec_cmd += " --optShapes=" + config.optShapes
+                ixrtexec_cmd += " --maxShapes=" + config.maxShapes
 
-            p = subprocess.Popen(trtexec_cmd, shell=True)
+            p = subprocess.Popen(ixrtexec_cmd, shell=True)
             p.wait()
         else:
-            trt_path = config.exist_compiler_path
+            ixrt_path = config.exist_compiler_path
 
-        with open(trt_path, "rb") as f:
+        with open(ixrt_path, "rb") as f:
             return self.runtime.deserialize_cuda_engine(f.read())
 
     def allocate_buffers(self, engine):