Skip to content

Commit

Permalink
kunlunxin sam_h
Browse files Browse the repository at this point in the history
  • Loading branch information
liquanfeng committed Sep 11, 2023
1 parent 9cb8c63 commit 1230129
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 26 deletions.
19 changes: 19 additions & 0 deletions inference/benchmarks/sam_h/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,24 @@

- TensorRT 8.6.1

#### 2.2 昆仑芯R200

- ##### 硬件环境
- 机器、加速卡型号: R200

- ##### 软件环境
- OS版本:Ubuntu 20.04
- OS kernel版本: 5.15.0-56-generic
- 加速卡驱动版本:4.0
- Docker 版本:20.10.21
- 依赖软件版本:
- pytorch: 1.13.0+cpu
- onnx: 1.14.0

- 推理工具包

- XTCL 2.0.0.67

### 3. 运行情况

* 指标列表
Expand All @@ -59,4 +77,5 @@
| ----------- | --------- | ---- | ---- | -------- | ----------- | ---------- | ------------- | ------------ | ----------- | ----------- |
| tensorrt | fp16 | 4 |1895.1 | 9.3 | 10.7 | 7.9 | 11.8 | 11.8% | 0.89/1.0 | 23.7/40.0 |
| tensorrt | fp32 | 2 | 1895.1 | 6.8 | 7.5 | 5.5 | 7.0 | 13.9% | 1.0/1.0 | 18.1/40.0 |
| kunlunxin_xtcl | fp16 | 4 | 8380.5 | 0.6 | 0.6 | 0.6 | 0.6 | 13.9% | 0.89/1.0 | 8.5/32.0 |

3 changes: 1 addition & 2 deletions inference/benchmarks/sam_h/pytorch/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ def engine_forward(model, dataloader, evaluator, config):
for step, (x, y, osize, dsize) in enumerate(dataloader):
if config.fp16:
x = x.to(torch.float16)
y = y.to(torch.float16)
torch_sync(config)
core_time_start = time.time()

Expand All @@ -101,7 +100,7 @@ def engine_forward(model, dataloader, evaluator, config):
torch_sync(config)
core_time += time.time() - core_time_start

pred = pred[0]
pred = pred[1]
pred = pred.reshape(config.batch_size, 1, 3, 256, 256).float()
pred = pred.cpu()

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
compiler: xtcl
no_validation: true
build_config:
FuseWithoutPattern:
- FuseConv2dTransposeBiasAdd
disabled_pass:
- xgraph_layout_opt
exist_onnx_path: onnxs/sam_h_bs4_pytorch_fp16True.onnx
40 changes: 16 additions & 24 deletions inference/inference_engine/kunlunxin/xtcl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,60 +11,52 @@
import numpy as np
import time


class InferModel:
def __init__(self, config , onnx_path, model):

def __init__(self, config, onnx_path, model):
self.input_names = []
self.engine = self.build_engine(config, onnx_path)

def build_engine(self, config, onnx_path):
onnx_model = onnx.load(onnx_path)
shape_dict = {}
for input in onnx_model.graph.input:
input_shape = input.type.tensor_type.shape.dim
input_shape = [a.dim_value for a in input_shape]
for inp in onnx_model.graph.input:
input_name, input_shape, _, _ = relay.frontend.onnx.get_info(inp)
input_shape[0] = config.batch_size
input_name = input.name #'inputs:0'
self.input_names.append(input_name)
shape_dict[input_name] = input_shape

mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

target_host = f'llvm -acc=xpu{os.environ.get("XPUSIM_DEVICE_MODEL", "KUNLUN1")[-1]}'
ctx = tvm.device("xpu", 0)
build_config = {}
build_config = config.build_config if config.build_config else {}
disabled_pass = config.disabled_pass if config.disabled_pass else []
if config.fp16 == True:
os.environ["XTCL_USE_NEW_ALTER_PASS"] = '1'
input_fp16 = { name:"float16" for name in self.input_names}
build_config["XPUOutDtypeConfig"] = xpu_config.XPUOutDtypeConfig(
default_precision="float16",
config_last_node=True,
config_map={
},
config_var_dtype_map=input_fp16,
).value()
else: ## fp32
default_precision="float16",
config_last_node=True,
config_map={},
).value()
else: ## fp32
os.environ['XTCL_USE_FP16'] = '0'
os.environ['XTCL_QUANTIZE_WEIGHT'] = '0'

with tvm.transform.PassContext(opt_level=3, config=build_config):
vm_exec = relay.backend.vm.compile(mod,
target=target_host,
target_host=target_host,
params=params)
with tvm.transform.PassContext(opt_level=3, config=build_config, disabled_pass=disabled_pass):
vm_exec = relay.backend.vm.compile(mod, target=target_host, target_host=target_host, params=params)
from tvm.runtime.vm import VirtualMachine
vm = VirtualMachine(vm_exec, ctx)
return vm

def __call__(self, model_inputs: list):
for index, input_name in enumerate(self.input_names):
self.engine.set_one_input("main",input_name, tvm.nd.array(model_inputs[index]))
self.engine.set_one_input("main", input_name, tvm.nd.array(model_inputs[index]))
self.engine.run()
output_list = [self.engine.get_output(i) for i in range(self.engine.get_num_outputs())]
foo_time_start = time.time()
# d2h
output_list = [torch.from_numpy(output.asnumpy()) for output in output_list]
foo_time = time.time() - foo_time_start
return output_list, foo_time


0 comments on commit 1230129

Please sign in to comment.