diff --git a/inference/benchmarks/sam_h/README.md b/inference/benchmarks/sam_h/README.md index 02d6cf352..8c2926812 100644 --- a/inference/benchmarks/sam_h/README.md +++ b/inference/benchmarks/sam_h/README.md @@ -36,6 +36,24 @@ - TensorRT 8.6.1 +#### 2.2 昆仑芯R200 + +- ##### 硬件环境 + - 机器、加速卡型号: R200 + +- ##### 软件环境 + - OS版本:Ubuntu 20.04 + - OS kernel版本: 5.15.0-56-generic + - 加速卡驱动版本:4.0 + - Docker 版本:20.10.21 + - 依赖软件版本: + - pytorch: 1.13.0+cpu + - onnx: 1.14.0 + +- 推理工具包 + + - XTCL 2.0.0.67 + ### 3. 运行情况 * 指标列表 @@ -59,4 +77,5 @@ | ----------- | --------- | ---- | ---- | -------- | ----------- | ---------- | ------------- | ------------ | ----------- | ----------- | | tensorrt | fp16 | 4 |1895.1 | 9.3 | 10.7 | 7.9 | 11.8 | 11.8% | 0.89/1.0 | 23.7/40.0 | | tensorrt | fp32 | 2 | 1895.1 | 6.8 | 7.5 | 5.5 | 7.0 | 13.9% | 1.0/1.0 | 18.1/40.0 | +| kunlunxin_xtcl | fp16 | 4 | 8380.5 | 0.6 | 0.6 | 0.6 | 0.6 | 13.9% | 0.89/1.0 | 8.5/32.0 | diff --git a/inference/benchmarks/sam_h/pytorch/forward.py b/inference/benchmarks/sam_h/pytorch/forward.py index df61177fa..9ff355c68 100644 --- a/inference/benchmarks/sam_h/pytorch/forward.py +++ b/inference/benchmarks/sam_h/pytorch/forward.py @@ -84,7 +84,6 @@ def engine_forward(model, dataloader, evaluator, config): for step, (x, y, osize, dsize) in enumerate(dataloader): if config.fp16: x = x.to(torch.float16) - y = y.to(torch.float16) torch_sync(config) core_time_start = time.time() @@ -101,7 +100,7 @@ def engine_forward(model, dataloader, evaluator, config): torch_sync(config) core_time += time.time() - core_time_start - pred = pred[0] + pred = pred[1] pred = pred.reshape(config.batch_size, 1, 3, 256, 256).float() pred = pred.cpu() diff --git a/inference/configs/sam_h/vendor_config/kunlunxin_configurations.yaml b/inference/configs/sam_h/vendor_config/kunlunxin_configurations.yaml new file mode 100644 index 000000000..3561bebfc --- /dev/null +++ b/inference/configs/sam_h/vendor_config/kunlunxin_configurations.yaml @@ -0,0 +1,8 @@ +compiler: xtcl +no_validation: true +build_config: + FuseWithoutPattern: + - FuseConv2dTransposeBiasAdd +disabled_pass: + - xgraph_layout_opt +exist_onnx_path: onnxs/sam_h_bs4_pytorch_fp16True.onnx diff --git a/inference/inference_engine/kunlunxin/xtcl.py b/inference/inference_engine/kunlunxin/xtcl.py index 396cc3ae9..7f3c18f35 100755 --- a/inference/inference_engine/kunlunxin/xtcl.py +++ b/inference/inference_engine/kunlunxin/xtcl.py @@ -11,54 +11,48 @@ import numpy as np import time + class InferModel: - - def __init__(self, config , onnx_path, model): + + def __init__(self, config, onnx_path, model): self.input_names = [] self.engine = self.build_engine(config, onnx_path) def build_engine(self, config, onnx_path): onnx_model = onnx.load(onnx_path) shape_dict = {} - for input in onnx_model.graph.input: - input_shape = input.type.tensor_type.shape.dim - input_shape = [a.dim_value for a in input_shape] + for inp in onnx_model.graph.input: + input_name, input_shape, _, _ = relay.frontend.onnx.get_info(inp) input_shape[0] = config.batch_size - input_name = input.name #'inputs:0' self.input_names.append(input_name) shape_dict[input_name] = input_shape - + mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) target_host = f'llvm -acc=xpu{os.environ.get("XPUSIM_DEVICE_MODEL", "KUNLUN1")[-1]}' ctx = tvm.device("xpu", 0) - build_config = {} + build_config = config.build_config if config.build_config else {} + disabled_pass = config.disabled_pass if config.disabled_pass else [] if config.fp16 == True: os.environ["XTCL_USE_NEW_ALTER_PASS"] = '1' - input_fp16 = { name:"float16" for name in self.input_names} build_config["XPUOutDtypeConfig"] = xpu_config.XPUOutDtypeConfig( - default_precision="float16", - config_last_node=True, - config_map={ - }, - config_var_dtype_map=input_fp16, - ).value() - else: ## fp32 + default_precision="float16", + config_last_node=True, + config_map={}, + ).value() + else: ## fp32 os.environ['XTCL_USE_FP16'] = '0' os.environ['XTCL_QUANTIZE_WEIGHT'] = '0' - with tvm.transform.PassContext(opt_level=3, config=build_config): - vm_exec = relay.backend.vm.compile(mod, - target=target_host, - target_host=target_host, - params=params) + with tvm.transform.PassContext(opt_level=3, config=build_config, disabled_pass=disabled_pass): + vm_exec = relay.backend.vm.compile(mod, target=target_host, target_host=target_host, params=params) from tvm.runtime.vm import VirtualMachine vm = VirtualMachine(vm_exec, ctx) return vm def __call__(self, model_inputs: list): for index, input_name in enumerate(self.input_names): - self.engine.set_one_input("main",input_name, tvm.nd.array(model_inputs[index])) + self.engine.set_one_input("main", input_name, tvm.nd.array(model_inputs[index])) self.engine.run() output_list = [self.engine.get_output(i) for i in range(self.engine.get_num_outputs())] foo_time_start = time.time() @@ -66,5 +60,3 @@ def __call__(self, model_inputs: list): output_list = [torch.from_numpy(output.asnumpy()) for output in output_list] foo_time = time.time() - foo_time_start return output_list, foo_time - -