diff --git a/inference/benchmarks/vit_l_16/README.md b/inference/benchmarks/vit_l_16/README.md
index 5998c0cf9..cd77ab738 100644
--- a/inference/benchmarks/vit_l_16/README.md
+++ b/inference/benchmarks/vit_l_16/README.md
@@ -83,4 +83,5 @@ find ./val -name "*JPEG" | wc -l
 | ----------- | --------- | ---- | ---- | -------- | ----------- | ---------- | ------------- | ------------ | ----------- | ----------- |
 | tensorrt | fp16    | 64   |1009.7 | 777.8 | 796.7 | 825.8 | 1329.2 | 26.2% | 79.0/79.3 | 35.0/40.0 |
 | tensorrt | fp32   | 32 | 1275.9 | 482.4  | 491.1 | 555.5    | 590.5 | 23.3% | 79.3/79.3 | 35.0/40.0 |
+| kunlunxin_xtcl | W32A16   | 32 | 2118.307 | / | / | 130.006    | 144.914 | 27.9% | 79.3/79.3 | / |
 
diff --git a/inference/configs/host.yaml b/inference/configs/host.yaml
index f5ec9d0ac..25c7f796b 100644
--- a/inference/configs/host.yaml
+++ b/inference/configs/host.yaml
@@ -13,4 +13,4 @@ PIP_SOURCE: "https://mirror.baidu.com/pypi/simple"
 CLEAR_CACHES: True
 ACCE_VISIBLE_DEVICE_ENV_NAME: "CUDA_VISIBLE_DEVICES"
 CASES: 
-    "resnet50:pytorch_1.13": "/raid/dataset/ImageNet/imagenet/val"
\ No newline at end of file
+    "resnet50:pytorch_1.13": "/raid/dataset/ImageNet/imagenet/val"
diff --git a/inference/configs/vit_l_16/vendor_config/kunlunxin_configurations.yaml b/inference/configs/vit_l_16/vendor_config/kunlunxin_configurations.yaml
new file mode 100644
index 000000000..bf71dd82c
--- /dev/null
+++ b/inference/configs/vit_l_16/vendor_config/kunlunxin_configurations.yaml
@@ -0,0 +1,5 @@
+compiler: xtcl
+# skip validation(will also skip create_model, export onnx). Assert exist_onnx_path != null
+no_validation: true
+# set a real onnx_path to use exist, or set it to anything but null to avoid export onnx manually(like torch-tensorrt)
+exist_onnx_path: /home/FlagPerf/inference/onnxs/vit_l_16_bs32_pytorch_fp16False.onnx
diff --git a/inference/docker_images/kunlunxin/kunlunxin_analysis.py b/inference/docker_images/kunlunxin/kunlunxin_analysis.py
index 388f89cee..be1a60b1b 100644
--- a/inference/docker_images/kunlunxin/kunlunxin_analysis.py
+++ b/inference/docker_images/kunlunxin/kunlunxin_analysis.py
@@ -1,23 +1,21 @@
-def analysis_log(logpath):
-    logfile = open(logpath)
-
-    max_usage = 0.0 ## usage_mem
-    max_mem = 0.0 
-    for line in logfile.readlines():
-        '''
-        xpu_smi temp power mem w_mem use_rate
-        '''
-        if "xpu_smi" in line:
-            line = line[:-1]
-            usage = line.split(" ")[4]
-            usage = float(usage)
-            max_usage = max(max_usage, usage)
-            max_mem = line.split(" ")[5]
-            max_mem = float(max_mem)
-
-    return round(max_usage / 1024.0,
-                 2), round(max_mem / 1024.0, 2), eval("32e12"), eval("128e12")
-
-
-if __name__ == "__main__":
-    max1, max2, max2,max4 = analysis_log("/home/zhoujiamin01/workspace/zjm_flag/FlagPerf/inference/result/run20230809192313/resnet50:pytorch_1.13/127.0.0.1_noderank0/kunlunxin_monitor.log")
+def analysis_log(logpath):
+    logfile = open(logpath)
+
+    max_usage = 0.0 ## usage_mem
+    max_mem = 0.0 
+    for line in logfile.readlines():
+        '''
+        xpu_smi temp power mem w_mem use_rate
+        '''
+        if "xpu_smi" in line:
+            line = line[:-1]
+            usage = line.split(" ")[4]
+            usage = float(usage)
+            max_usage = max(max_usage, usage)
+            max_mem = line.split(" ")[5]
+            max_mem = float(max_mem)
+
+    return round(max_usage / 1024.0,
+                 2), round(max_mem / 1024.0, 2), eval("32e12"), eval("128e12")
+
+
diff --git a/inference/docker_images/kunlunxin/kunlunxin_monitor.py b/inference/docker_images/kunlunxin/kunlunxin_monitor.py
index ba5a877a1..7d31179ae 100644
--- a/inference/docker_images/kunlunxin/kunlunxin_monitor.py
+++ b/inference/docker_images/kunlunxin/kunlunxin_monitor.py
@@ -1,256 +1,257 @@
-# ！/usr/bin/env python3
-# encoding: utf-8
-'''
-Usage:  python3 sys-monitor.py -o operation -l [log_path]
-            -o, --operation     start|stop|restart|status
-            -l, --log           log path , ./logs/ default
-'''
-
-import os
-import sys
-import time
-import signal
-import atexit
-import argparse
-import datetime
-from multiprocessing import Process
-import subprocess
-import schedule
-
-
-class Daemon:
-    '''
-    daemon subprocess class.
-    usage: subclass this daemon and override the run() method.
-    sys-monitor.pid: in the /tmp/, auto del when unexpected exit.
-    verbose: debug mode, disabled default.
-    '''
-
-    def __init__(self,
-                 pid_file,
-                 log_file,
-                 err_file,
-                 gpu_log,
-                 log_path,
-                 rate=5,
-                 stdin=os.devnull,
-                 stdout=os.devnull,
-                 stderr=os.devnull,
-                 home_dir='.',
-                 umask=0o22,
-                 verbose=0):
-        self.stdin = stdin
-        self.stdout = stdout
-        self.stderr = stderr
-        self.home_dir = home_dir
-        self.verbose = verbose
-        self.pidfile = pid_file
-        self.logfile = log_file
-        self.errfile = err_file
-        self.gpufile = gpu_log
-        self.logpath = log_path
-        self.rate = rate
-        self.umask = umask
-        self.verbose = verbose
-        self.daemon_alive = True
-
-    def get_pid(self):
-        try:
-            with open(self.pidfile, 'r') as pf:
-                pid = int(pf.read().strip())
-        except IOError:
-            pid = None
-        except SystemExit:
-            pid = None
-        return pid
-
-    def del_pid(self):
-        if os.path.exists(self.pidfile):
-            os.remove(self.pidfile)
-
-    def run(self):
-        '''
-        NOTE: override the method in subclass
-        '''
-
-        def gpu_mon(file):
-            TIMESTAMP = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
-            cmd = "xpu_smi |grep '/dev/xpu0'|awk '{print $29,$27,$22,$24,$14}'"  ## temp power mem w_mem use_rate
-            process = subprocess.Popen(cmd,
-                                       shell=True,
-                                       stdout=subprocess.PIPE,
-                                       stderr=subprocess.STDOUT,
-                                       encoding='utf-8')
-            try:
-                out = process.communicate(timeout=10)
-            except subprocess.TimeoutExpired:
-                process.kill()
-                out = process.communicate()
-
-            if process.returncode != 0:
-                result = "error"
-            result = TIMESTAMP + "\n xpu_smi " + out[0] + "\n"
-            with open(file, 'a') as f:
-                f.write(result)
-
-        def timer_gpu_mon():
-            gpu_process = Process(target=gpu_mon, args=(self.gpufile, ))
-            gpu_process.start()
-
-        schedule.every(self.rate).seconds.do(timer_gpu_mon)
-        while True:
-            schedule.run_pending()
-            time.sleep(5)
-
-    def daemonize(self):
-        if self.verbose >= 1:
-            print('daemon process starting ...')
-        try:
-            pid = os.fork()
-            if pid > 0:
-                sys.exit(0)
-        except OSError as e:
-            sys.stderr.write('fork #1 failed: %d (%s)\n' %
-                             (e.errno, e.strerror))
-            sys.exit(1)
-        os.chdir(self.home_dir)
-        os.setsid()
-        os.umask(self.umask)
-        try:
-            pid = os.fork()
-            if pid > 0:
-                sys.exit(0)
-        except OSError as e:
-            sys.stderr.write('fork #2 failed: %d (%s)\n' %
-                             (e.errno, e.strerror))
-            sys.exit(1)
-        sys.stdout.flush()
-        sys.stderr.flush()
-        si = open(self.stdin, 'r')
-        so = open(self.stdout, 'a+')
-        if self.stderr:
-            se = open(self.stderr, 'a+')
-        else:
-            se = so
-        os.dup2(si.fileno(), sys.stdin.fileno())
-        os.dup2(so.fileno(), sys.stdout.fileno())
-        os.dup2(se.fileno(), sys.stderr.fileno())
-        atexit.register(self.del_pid)
-        pid = str(os.getpid())
-        with open(self.pidfile, 'w+') as f:
-            f.write('%s\n' % pid)
-
-    def start(self):
-        if not os.path.exists(self.logpath):
-            os.makedirs(self.logpath)
-        elif os.path.exists(self.gpufile):
-            os.remove(self.gpufile)
-        if self.verbose >= 1:
-            print('ready to start ......')
-        # check for a pid file to see if the daemon already runs
-        pid = self.get_pid()
-        if pid:
-            msg = 'pid file %s already exists, is it already running?\n'
-            sys.stderr.write(msg % self.pidfile)
-            sys.exit(1)
-        # start the daemon
-        self.daemonize()
-        self.run()
-
-    def stop(self):
-        if self.verbose >= 1:
-            print('stopping ...')
-        pid = self.get_pid()
-        if not pid:
-            msg = 'pid file [%s] does not exist. Not running?\n' % self.pidfile
-            sys.stderr.write(msg)
-            if os.path.exists(self.pidfile):
-                os.remove(self.pidfile)
-            return
-        # try to kill the daemon process
-        try:
-            i = 0
-            while 1:
-                os.kill(pid, signal.SIGTERM)
-                time.sleep(1)
-                i = i + 1
-                if i % 10 == 0:
-                    os.kill(pid, signal.SIGHUP)
-        except OSError as err:
-            err = str(err)
-            if err.find('No such process') > 0:
-                if os.path.exists(self.pidfile):
-                    os.remove(self.pidfile)
-            else:
-                print(str(err))
-                sys.exit(1)
-            if self.verbose >= 1:
-                print('Stopped!')
-
-    def restart(self):
-        self.stop()
-        self.start()
-
-    def status(self):
-        pid = self.get_pid()
-        if pid:
-            if os.path.exists('/proc/%d' % pid):
-                return pid
-        return False
-
-
-def parse_args():
-    ''' Check script input parameter. '''
-    parse = argparse.ArgumentParser(description='Sys monitor script')
-    parse.add_argument('-o',
-                       type=str,
-                       metavar='[operation]',
-                       required=True,
-                       help='start|stop|restart|status')
-    parse.add_argument('-l',
-                       type=str,
-                       metavar='[log_path]',
-                       required=False,
-                       default='./logs/',
-                       help='log path')
-    args = parse.parse_args()
-    return args
-
-
-def main():
-    sample_rate1 = 5
-    args = parse_args()
-    operation = args.o
-    log_path = args.l
-    pid_fn = str('/tmp/xpu_monitor.pid')
-    log_fn = str(log_path + '/kunlunxin_monitor.log')
-    err_fn = str(log_path + '/kunlunxin_monitor.err')
-    # result for gpu
-    gpu_fn = str(log_path + '/kunlunxin_monitor.log')
-
-    subdaemon = Daemon(pid_fn,
-                       log_fn,
-                       err_fn,
-                       gpu_fn,
-                       log_path,
-                       verbose=1,
-                       rate=sample_rate1)
-    if operation == 'start':
-        subdaemon.start()
-    elif operation == 'stop':
-        subdaemon.stop()
-    elif operation == 'restart':
-        subdaemon.restart()
-    elif operation == 'status':
-        pid = subdaemon.status()
-        if pid:
-            print('process [%s] is running ......' % pid)
-        else:
-            print('daemon process [%s] stopped' % pid)
-    else:
-        print("invalid argument!")
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
+# ！/usr/bin/env python3
+# encoding: utf-8
+'''
+Usage:  python3 sys-monitor.py -o operation -l [log_path]
+            -o, --operation     start|stop|restart|status
+            -l, --log           log path , ./logs/ default
+'''
+
+import os
+import sys
+import time
+import signal
+import atexit
+import argparse
+import datetime
+from multiprocessing import Process
+import subprocess
+import schedule
+
+
+class Daemon:
+    '''
+    daemon subprocess class.
+    usage: subclass this daemon and override the run() method.
+    sys-monitor.pid: in the /tmp/, auto del when unexpected exit.
+    verbose: debug mode, disabled default.
+    '''
+
+    def __init__(self,
+                 pid_file,
+                 log_file,
+                 err_file,
+                 gpu_log,
+                 log_path,
+                 rate=5,
+                 stdin=os.devnull,
+                 stdout=os.devnull,
+                 stderr=os.devnull,
+                 home_dir='.',
+                 umask=0o22,
+                 verbose=0):
+        self.stdin = stdin
+        self.stdout = stdout
+        self.stderr = stderr
+        self.home_dir = home_dir
+        self.verbose = verbose
+        self.pidfile = pid_file
+        self.logfile = log_file
+        self.errfile = err_file
+        self.gpufile = gpu_log
+        self.logpath = log_path
+        self.rate = rate
+        self.umask = umask
+        self.verbose = verbose
+        self.daemon_alive = True
+
+    def get_pid(self):
+        try:
+            with open(self.pidfile, 'r') as pf:
+                pid = int(pf.read().strip())
+        except IOError:
+            pid = None
+        except SystemExit:
+            pid = None
+        return pid
+
+    def del_pid(self):
+        if os.path.exists(self.pidfile):
+            os.remove(self.pidfile)
+
+    def run(self):
+        '''
+        NOTE: override the method in subclass
+        '''
+
+        def gpu_mon(file):
+            TIMESTAMP = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
+            cmd = "xpu_smi |grep '/dev/xpu0'|awk '{print $29,$27,$22,$24,$14}'"  ## temp power mem w_mem use_rate
+            process = subprocess.Popen(cmd,
+                                       shell=True,
+                                       stdout=subprocess.PIPE,
+                                       stderr=subprocess.STDOUT,
+                                       encoding='utf-8')
+            try:
+                out = process.communicate(timeout=10)
+            except subprocess.TimeoutExpired:
+                process.kill()
+                out = process.communicate()
+
+            if process.returncode != 0:
+                result = "error"
+            result = TIMESTAMP + "\n xpu_smi " + out[0] + "\n"
+            with open(file, 'a') as f:
+                f.write(result)
+
+        def timer_gpu_mon():
+            gpu_process = Process(target=gpu_mon, args=(self.gpufile, ))
+            gpu_process.start()
+
+        schedule.every(self.rate).seconds.do(timer_gpu_mon)
+        while True:
+            schedule.run_pending()
+            time.sleep(5)
+
+    def daemonize(self):
+        if self.verbose >= 1:
+            print('daemon process starting ...')
+        try:
+            pid = os.fork()
+            if pid > 0:
+                sys.exit(0)
+        except OSError as e:
+            sys.stderr.write('fork #1 failed: %d (%s)\n' %
+                             (e.errno, e.strerror))
+            sys.exit(1)
+        os.chdir(self.home_dir)
+        os.setsid()
+        os.umask(self.umask)
+        try:
+            pid = os.fork()
+            if pid > 0:
+                sys.exit(0)
+        except OSError as e:
+            sys.stderr.write('fork #2 failed: %d (%s)\n' %
+                             (e.errno, e.strerror))
+            sys.exit(1)
+        sys.stdout.flush()
+        sys.stderr.flush()
+        si = open(self.stdin, 'r')
+        so = open(self.stdout, 'a+')
+        if self.stderr:
+            se = open(self.stderr, 'a+')
+        else:
+            se = so
+        os.dup2(si.fileno(), sys.stdin.fileno())
+        os.dup2(so.fileno(), sys.stdout.fileno())
+        os.dup2(se.fileno(), sys.stderr.fileno())
+        atexit.register(self.del_pid)
+        pid = str(os.getpid())
+        with open(self.pidfile, 'w+') as f:
+            f.write('%s\n' % pid)
+
+    def start(self):
+        if not os.path.exists(self.logpath):
+            os.makedirs(self.logpath)
+        elif os.path.exists(self.gpufile):
+            os.remove(self.gpufile)
+        if self.verbose >= 1:
+            print('ready to start ......')
+        # check for a pid file to see if the daemon already runs
+        pid = self.get_pid()
+        if pid:
+            msg = 'pid file %s already exists, is it already running?\n'
+            sys.stderr.write(msg % self.pidfile)
+            sys.exit(1)
+        # start the daemon
+        self.daemonize()
+        self.run()
+
+    def stop(self):
+        if self.verbose >= 1:
+            print('stopping ...')
+        pid = self.get_pid()
+        if not pid:
+            msg = 'pid file [%s] does not exist. Not running?\n' % self.pidfile
+            sys.stderr.write(msg)
+            if os.path.exists(self.pidfile):
+                os.remove(self.pidfile)
+            return
+        # try to kill the daemon process
+        try:
+            i = 0
+            while 1:
+                os.kill(pid, signal.SIGTERM)
+                time.sleep(1)
+                i = i + 1
+                if i % 10 == 0:
+                    os.kill(pid, signal.SIGHUP)
+        except OSError as err:
+            err = str(err)
+            if err.find('No such process') > 0:
+                if os.path.exists(self.pidfile):
+                    os.remove(self.pidfile)
+            else:
+                print(str(err))
+                sys.exit(1)
+            if self.verbose >= 1:
+                print('Stopped!')
+
+    def restart(self):
+        self.stop()
+        self.start()
+
+    def status(self):
+        pid = self.get_pid()
+        if pid:
+            if os.path.exists('/proc/%d' % pid):
+                return pid
+        return False
+
+
+def parse_args():
+    ''' Check script input parameter. '''
+    parse = argparse.ArgumentParser(description='Sys monitor script')
+    parse.add_argument('-o',
+                       type=str,
+                       metavar='[operation]',
+                       required=True,
+                       help='start|stop|restart|status')
+    parse.add_argument('-l',
+                       type=str,
+                       metavar='[log_path]',
+                       required=False,
+                       default='./logs/',
+                       help='log path')
+    args = parse.parse_args()
+    return args
+
+
+def main():
+    sample_rate1 = 5
+    args = parse_args()
+    operation = args.o
+    log_path = args.l
+    pid_fn = str('/tmp/xpu_monitor.pid')
+    log_fn = str(log_path + '/kunlunxin_monitor.log')
+    err_fn = str(log_path + '/kunlunxin_monitor.err')
+    # result for gpu
+    gpu_fn = str(log_path + '/kunlunxin_monitor.log')
+
+    subdaemon = Daemon(pid_fn,
+                       log_fn,
+                       err_fn,
+                       gpu_fn,
+                       log_path,
+                       verbose=1,
+                       rate=sample_rate1)
+    if operation == 'start':
+        subdaemon.start()
+    elif operation == 'stop':
+        subdaemon.stop()
+    elif operation == 'restart':
+        subdaemon.restart()
+    elif operation == 'status':
+        pid = subdaemon.status()
+        if pid:
+            print('process [%s] is running ......' % pid)
+        else:
+            print('daemon process [%s] stopped' % pid)
+    else:
+        print("invalid argument!")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/inference/docker_images/kunlunxin/pytorch_1.13/Dockerfile b/inference/docker_images/kunlunxin/pytorch_1.13/Dockerfile
index 7227b9743..fa778e7e8 100644
--- a/inference/docker_images/kunlunxin/pytorch_1.13/Dockerfile
+++ b/inference/docker_images/kunlunxin/pytorch_1.13/Dockerfile
@@ -72,6 +72,7 @@ ENV TVM_DIR=/root/XTCL-ubuntu_x86_64
 
 
 
+
 ENV PATH /root/xre-ubuntu_2004_x86_64/bin:$PATH
 ENV PATH /root/miniconda/envs/python38/bin:$PATH
 
diff --git a/inference/inference_engine/kunlunxin/xtcl.py b/inference/inference_engine/kunlunxin/xtcl.py
index 396cc3ae9..db8540eff 100755
--- a/inference/inference_engine/kunlunxin/xtcl.py
+++ b/inference/inference_engine/kunlunxin/xtcl.py
@@ -27,7 +27,7 @@ def build_engine(self, config, onnx_path):
             input_name = input.name #'inputs:0'
             self.input_names.append(input_name)
             shape_dict[input_name] = input_shape
-        
+
         mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
 
         target_host = f'llvm -acc=xpu{os.environ.get("XPUSIM_DEVICE_MODEL", "KUNLUN1")[-1]}'
@@ -68,3 +68,4 @@ def __call__(self, model_inputs: list):
         return output_list, foo_time
 
 
+
diff --git a/inference/run.py b/inference/run.py
index a11fa4824..36cf49222 100644
--- a/inference/run.py
+++ b/inference/run.py
@@ -272,7 +272,7 @@ def start_monitors_in_cluster(dp_path, case_log_dir, nnodes):
 
     ven_mon_path = os.path.join(dp_path, "docker_images", config.VENDOR,
                                 config.VENDOR + "_monitor.py")
-    start_mon_cmd = "cd " + dp_path + " && " + sys.executable \
+    start_mon_cmd = "cd " + dp_path + " && sudo " + sys.executable \
                     + " " + ven_mon_path + " -o restart -l "
     logger.debug("Run cmd in the cluster to start vendor's monitors: " +
                  start_mon_cmd)
@@ -299,7 +299,7 @@ def stop_monitors_in_cluster(dp_path, nnodes):
 
     ven_mon_path = os.path.join(dp_path, "docker_images", config.VENDOR,
                                 config.VENDOR + "_monitor.py")
-    stop_mon_cmd = "cd " + dp_path + " && " + sys.executable \
+    stop_mon_cmd = "cd " + dp_path + " && sudo " + sys.executable \
                    + " " + ven_mon_path + " -o stop"
     logger.debug("Run cmd in the cluster to stop vendor's monitors: " +
                  stop_mon_cmd)