diff --git a/README.md b/README.md
index 1f85d5a..795fa86 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ Class Index: 277, Score: 8.452778816223145
 Class Index: 281, Score: 8.320704460144043
 Class Index: 287, Score: 7.924479961395264
 
-# 默认将自动检测计算设备，但也可以强制要求跑在AX650 M.2算力卡上，设备号是1
+# 默认将自动检测计算设备，但也可以强制要求跑在AX650 M.2算力卡上，假设设备号是1，（设备号必须大于等于0，具体查看axcl-smi）
 root@ax650:~/samples# python3 classification.py -b axcl -d 1
 [INFO] SOC Name: AX650N
 [INFO] Runtime version: 1.0.0
diff --git a/axengine/_axcl_capi.py b/axengine/_axcl_capi.py
index 103b1a9..f8bac80 100644
--- a/axengine/_axcl_capi.py
+++ b/axengine/_axcl_capi.py
@@ -20,6 +20,7 @@
     """
     #define AXCL_MAX_DEVICE_COUNT 256
     typedef int32_t axclError;
+    typedef void *axclrtContext;
 """
 )
 
@@ -149,6 +150,18 @@
     """
     axclError axclrtGetDeviceList(axclrtDeviceList *deviceList);
     axclError axclrtSetDevice(int32_t deviceId);
+    axclError axclrtResetDevice(int32_t deviceId);
+"""
+)
+
+# axcl_rt_context.h
+O.cdef(
+    """
+    axclError axclrtCreateContext(axclrtContext *context, int32_t deviceId);
+    axclError axclrtDestroyContext(axclrtContext context);
+    axclError axclrtSetCurrentContext(axclrtContext context);
+    axclError axclrtGetCurrentContext(axclrtContext *context);
+    axclError axclrtGetDefaultContext(axclrtContext *context, int32_t deviceId);
 """
 )
 
diff --git a/axengine/ax_session.py b/axengine/ax_session.py
index 919b02c..12e65d6 100644
--- a/axengine/ax_session.py
+++ b/axengine/ax_session.py
@@ -116,7 +116,11 @@ def __init__(
         print(f"[INFO] Compiler version: {self._get_model_tool_version()}")
 
         # get shape group count
-        self._shape_count = self._get_shape_count()
+        try:
+            self._shape_count = self._get_shape_count()
+        except AttributeError as e:
+            print(f"[WARNING] {e}")
+            self._shape_count = 1
 
         # get model shape
         self._info = self._get_info()
diff --git a/axengine/axcl_session.py b/axengine/axcl_session.py
index c3668a6..ade4d96 100644
--- a/axengine/axcl_session.py
+++ b/axengine/axcl_session.py
@@ -26,6 +26,8 @@ def __init__(
 
         super(BaseInferenceSession).__init__()
 
+        self.device_id = device_id
+
         # load shared library
         self._rt_lib = _capi.R
         self._rt_ffi = _capi.O
@@ -34,12 +36,18 @@ def __init__(
         print(f"[INFO] SOC Name: {self.soc_name}")
 
         # init axcl
+        self.axcl_device_id = -1  # axcl_device_id != device_id, device_id is just the index of the list of axcl_device_ids
         ret = self._init(device_id)
         if 0 != ret:
             raise RuntimeError("Failed to initialize axclrt.")
         print(f"[INFO] Runtime version: {self._get_version()}")
 
-        # handle, context, info, io
+        self._thread_context = self._rt_ffi.new("axclrtContext *")
+        ret = self._rt_lib.axclrtGetCurrentContext(self._thread_context)
+        if ret != 0:
+            raise RuntimeError("axclrtGetCurrentContext failed")
+
+        # model handle, context, info, io
         self._handle = self._rt_ffi.new("uint64_t *")
         self._context = self._rt_ffi.new("uint64_t *")
         self.io_info = self._rt_ffi.new("axclrtEngineIOInfo *")
@@ -249,14 +257,15 @@ def _free_io(self, io_data):
     def _init(self, device_id=0, vnpu=VNPUType.DISABLED):  # vnpu type, the default is disabled
         ret = self._rt_lib.axclInit([])
         if ret != 0:
-            raise RuntimeError("Failed to initialize runtime.")
+            raise RuntimeError(f"Failed to initialize runtime. {ret}.")
 
         lst = self._rt_ffi.new("axclrtDeviceList *")
         ret = self._rt_lib.axclrtGetDeviceList(lst)
         if ret != 0 or lst.num == 0:
             raise RuntimeError(f"Get AXCL device failed 0x{ret:08x}, find total {lst.num} device.")
 
-        ret = self._rt_lib.axclrtSetDevice(lst.devices[device_id])
+        self.axcl_device_id = lst.devices[device_id]
+        ret = self._rt_lib.axclrtSetDevice(self.axcl_device_id)
         if ret != 0 or lst.num == 0:
             raise RuntimeError(f"Set AXCL device failed 0x{ret:08x}.")
 
@@ -269,6 +278,7 @@ def _init(self, device_id=0, vnpu=VNPUType.DISABLED):  # vnpu type, the default
     def _final(self):
         if self._handle[0] is not None:
             self._unload()
+        self._rt_lib.axclrtResetDevice(self.axcl_device_id)
         self._rt_lib.axclFinalize()
         return
 
@@ -331,6 +341,10 @@ def run(self, output_names, input_feed, run_options=None):
         self._validate_input(list(input_feed.keys()))
         self._validate_output(output_names)
 
+        ret = self._rt_lib.axclrtSetCurrentContext(self._thread_context[0])
+        if ret != 0:
+            raise RuntimeError("axclrtSetCurrentContext failed")
+
         if None is output_names:
             output_names = [o.name for o in self.get_outputs()]
 
@@ -384,8 +398,8 @@ def run(self, output_names, input_feed, run_options=None):
                    for i, output_tensor in enumerate(self.mgroup_output_tensors[grp_id])
                    if self.get_outputs()[i].name in output_names]
 
-        print(f'[INFO] cost time in host to device: {cost_host_to_device * 1009:.3f}ms, '
-              f'inference: {cost_inference * 1009:.3f}ms, '
-              f'device to host: {cost_device_to_host * 1009:.3f}ms')
+        print(f'[INFO] cost time in host to device: {cost_host_to_device * 1000:.3f}ms, '
+              f'inference: {cost_inference * 1000:.3f}ms, '
+              f'device to host: {cost_device_to_host * 1000:.3f}ms')
 
         return outputs
diff --git a/axengine/session.py b/axengine/session.py
index b0eb0c7..9cc4040 100644
--- a/axengine/session.py
+++ b/axengine/session.py
@@ -16,7 +16,7 @@ def InferenceSession(path_or_bytes: str | bytes | os.PathLike, device_id: int =
             print("axcl_rt not found, please install axcl_host driver")
 
     if is_axcl:
-        print(f"Using axclrt backend, device_no: {device_id}")
+        print(f"Using axclrt backend, device_id: {device_id}")
         return AXCLInferenceSession(path_or_bytes, device_id)
     else:
         print("Using ax backend with onboard npu")
diff --git a/examples/classification.py b/examples/classification.py
index 10c046f..dbffe3f 100644
--- a/examples/classification.py
+++ b/examples/classification.py
@@ -10,13 +10,13 @@
 from PIL import Image
 
 
-def load_model(model_path, backend='auto', device_no=-1):
+def load_model(model_path, backend='auto', device_id=-1):
     if backend == 'auto':
-        session = axe.InferenceSession(model_path, device_no)
+        session = axe.InferenceSession(model_path, device_id)
     elif backend == 'ax':
         session = axe.AXInferenceSession(model_path)
     elif backend == 'axcl':
-        session = axe.AXCLInferenceSession(model_path, device_no)
+        session = axe.AXCLInferenceSession(model_path, device_id)
     return session
 
 
@@ -62,16 +62,21 @@ def get_top_k_predictions(output, k=5):
     return top_k_indices, top_k_scores
 
 
-def main(model_path, image_path, target_size, crop_size, k, backend='auto', device_no=-1):
+def main(model_path, image_path, target_size, crop_size, k, backend='auto', device_id=-1):
     # Load the model
-    session = load_model(model_path, backend, device_no)
+    session = load_model(model_path, backend, device_id)
 
     # Preprocess the image
     input_tensor = preprocess_image(image_path, target_size, crop_size)
 
     # Get input name and run inference
     input_name = session.get_inputs()[0].name
-    output = session.run(None, {input_name: input_tensor})
+    import time
+    for i in range(10):
+        t1 = time.time()
+        output = session.run(None, {input_name: input_tensor})
+        t2 = time.time()
+        print(t2 - t1)
 
     # Get top k predictions
     top_k_indices, top_k_scores = get_top_k_predictions(output, k)
@@ -86,14 +91,14 @@ def main(model_path, image_path, target_size, crop_size, k, backend='auto', devi
     import argparse
     ap = argparse.ArgumentParser()
     ap.add_argument('-b', '--backend', type=str, help='auto/ax/axcl', default='auto')
-    ap.add_argument('-d', '--device_no', type=int, help='axcl device no, -1: onboard npu, >0: axcl devices', default=0)
+    ap.add_argument('-d', '--device_id', type=int, help='axcl device no, -1: onboard npu, >0: axcl devices', default=0)
     args = ap.parse_args()
     assert args.backend in ['auto', 'ax', 'axcl'], "backend must be auto/ax/axcl"
-    assert args.device_no >= -1, "device_no must be greater than -1"
+    assert args.device_id >= -1, "device_id must be greater than -1"
 
     MODEL_PATH = "../mobilenetv2.axmodel"
     IMAGE_PATH = "../cat.jpg"
     TARGET_SIZE = (256, 256)  # Resize to 256x256
     CROP_SIZE = (224, 224)  # Crop to 224x224
     K = 5  # Top K predictions
-    main(MODEL_PATH, IMAGE_PATH, TARGET_SIZE, CROP_SIZE, K, args.backend, args.device_no)
+    main(MODEL_PATH, IMAGE_PATH, TARGET_SIZE, CROP_SIZE, K, args.backend, args.device_id)
diff --git a/examples/yolov5_example.py b/examples/yolov5_example.py
index b0e44d3..f9e864e 100644
--- a/examples/yolov5_example.py
+++ b/examples/yolov5_example.py
@@ -453,14 +453,14 @@ def post_processing(outputs, origin_shape, input_shape):
     return pred
 
 
-def detect_yolov5(model_path, image_path, save_path, backend='auto', device_no=-1):
+def detect_yolov5(model_path, image_path, save_path, backend='auto', device_id=-1):
 
     if backend == 'auto':
-        session = axe.InferenceSession(model_path, device_no)
+        session = axe.InferenceSession(model_path, device_id)
     elif backend == 'ax':
         session = axe.AXInferenceSession(model_path)
     elif backend == 'axcl':
-        session = axe.AXCLInferenceSession(model_path, device_no)
+        session = axe.AXCLInferenceSession(model_path, device_id)
     image_data = cv2.imread(image_path)
     inputs, origin_shape = pre_processing(image_data, (640, 640))
     inputs = np.ascontiguousarray(inputs)
@@ -475,13 +475,13 @@ def parse_args() -> argparse.ArgumentParser:
     parser.add_argument("--model", type=str, required=True, help="axmodel path")
     parser.add_argument("--image_path", type=str, required=True, help="image path")
     parser.add_argument('-b', '--backend', type=str, help='auto/ax/axcl', default='auto')
-    parser.add_argument('-d', '--device_no', type=int, help='axcl device no, -1: onboard npu, >0: axcl devices', default=0)
+    parser.add_argument('-d', '--device_id', type=int, help='axcl device no, -1: onboard npu, >0: axcl devices', default=0)
     parser.add_argument(
         "--save_path", type=str, default="save.jpg", help="save image path"
     )
     args = parser.parse_args()
     assert args.backend in ['auto', 'ax', 'axcl'], "backend must be ax or axcl"
-    assert args.device_no >= -1, "device_no must be greater than -1"
+    assert args.device_id >= -1, "device_id must be greater than -1"
     return args
 
 
@@ -490,7 +490,7 @@ def parse_args() -> argparse.ArgumentParser:
     print(f"model             : {args.model}")
     print(f"image path        : {args.image_path}")
     print(f"backend           : {args.backend}")
-    print(f"device_no         : {args.device_no}")
+    print(f"device_id         : {args.device_id}")
     print(f"save draw image to: {args.save_path}")
-    detect_yolov5(args.model, args.image_path, args.save_path, args.backend, args.device_no)
+    detect_yolov5(args.model, args.image_path, args.save_path, args.backend, args.device_id)
 # python3 yolov5_example.py --model /opt/data/npu/models/yolov5s.axmodel --image_path /opt/data/npu/images/dog.jpg --save_path ./detect_dog.jpg