From ebd18c686c2615407f0214bf1309b139afb48262 Mon Sep 17 00:00:00 2001 From: wanglusheng Date: Sun, 9 Feb 2025 13:53:53 +0800 Subject: [PATCH 1/2] fix cffi life cycle issue --- axengine/_axe.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/axengine/_axe.py b/axengine/_axe.py index 33b077e..3adb634 100644 --- a/axengine/_axe.py +++ b/axengine/_axe.py @@ -216,11 +216,14 @@ def __init__( _inputs= engine_cffi.new( "AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nInputSize) ) - self._io[0].pInputs = _inputs _outputs = engine_cffi.new( "AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nOutputSize) ) + self._io_buffers = (_inputs, _outputs) + self._io[0].pInputs = _inputs self._io[0].pOutputs = _outputs + + self._io_inputs_pool = [] for i in range(len(self.get_inputs())): max_buf = 0 for j in range(self._shape_count): @@ -228,6 +231,7 @@ def __init__( self._io[0].pInputs[i].nSize = max_buf phy = engine_cffi.new("AX_U64*") vir = engine_cffi.new("AX_VOID**") + self._io_inputs_pool.append((phy, vir)) ret = sys_lib.AX_SYS_MemAllocCached( phy, vir, self._io[0].pInputs[i].nSize, self._align, self._cmm_token ) @@ -235,6 +239,8 @@ def __init__( raise RuntimeError("Failed to allocate memory for input.") self._io[0].pInputs[i].phyAddr = phy[0] self._io[0].pInputs[i].pVirAddr = vir[0] + + self._io_outputs_pool = [] for i in range(len(self.get_outputs())): max_buf = 0 for j in range(self._shape_count): @@ -242,6 +248,7 @@ def __init__( self._io[0].pOutputs[i].nSize = max_buf phy = engine_cffi.new("AX_U64*") vir = engine_cffi.new("AX_VOID**") + self._io_outputs_pool.append((phy, vir)) ret = sys_lib.AX_SYS_MemAllocCached( phy, vir, self._io[0].pOutputs[i].nSize, self._align, self._cmm_token ) From 040bbe1b090e712a66c313cf6f8816c4df5777f9 Mon Sep 17 00:00:00 2001 From: wanglusheng Date: Sun, 9 Feb 2025 14:41:13 +0800 Subject: [PATCH 2/2] fix cffi life cycle --- axengine/_axclrt.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/axengine/_axclrt.py b/axengine/_axclrt.py index 980fa14..559e716 100644 --- a/axengine/_axclrt.py +++ b/axengine/_axclrt.py @@ -96,6 +96,8 @@ def __init__( super().__init__() self._device_index = 0 + self._io = None + self._model_id = None if provider_options is not None and "device_id" in provider_options[0]: self._device_index = provider_options[0].get("device_id", 0) @@ -214,12 +216,12 @@ def _unload(self): dev_size = axclrt_cffi.new("uint64_t *") dev_prt = axclrt_cffi.new("void **") for i in range(axclrt_lib.axclrtEngineGetNumInputs(self._info[0])): - axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io, i, dev_prt, dev_size) + axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io[0], i, dev_prt, dev_size) axclrt_lib.axclrtFree(dev_prt[0]) for i in range(axclrt_lib.axclrtEngineGetNumOutputs(self._info[0])): - axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io, i, dev_prt, dev_size) + axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io[0], i, dev_prt, dev_size) axclrt_lib.axclrtFree(dev_prt[0]) - axclrt_lib.axclrtEngineDestroyIO(self._io) + axclrt_lib.axclrtEngineDestroyIO(self._io[0]) self._io = None if self._model_id[0] is not None and self._model_id[0] != 0: axclrt_lib.axclrtEngineUnload(self._model_id[0]) @@ -322,7 +324,7 @@ def _prepare_io(self): ret = axclrt_lib.axclrtEngineSetOutputBufferByIndex(_io[0], i, dev_ptr[0], max_size) if 0 != ret: raise RuntimeError(f"axclrtEngineSetOutputBufferByIndex failed 0x{ret:08x} for output {i}.") - return _io[0] + return _io def run( self, @@ -353,7 +355,7 @@ def run( if not (npy.flags.c_contiguous or npy.flags.f_contiguous): npy = np.ascontiguousarray(npy) npy_ptr = axclrt_cffi.cast("void *", npy.ctypes.data) - ret = axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io, i, dev_prt, dev_size) + ret = axclrt_lib.axclrtEngineGetInputBufferByIndex(self._io[0], i, dev_prt, dev_size) if 0 != ret: raise RuntimeError(f"axclrtEngineGetInputBufferByIndex failed for input {i}.") ret = axclrt_lib.axclrtMemcpy(dev_prt[0], npy_ptr, npy.nbytes, axclrt_lib.AXCL_MEMCPY_HOST_TO_DEVICE) @@ -361,13 +363,13 @@ def run( raise RuntimeError(f"axclrtMemcpy failed for input {i}.") # execute model - ret = axclrt_lib.axclrtEngineExecute(self._model_id[0], self._context_id[0], 0, self._io) + ret = axclrt_lib.axclrtEngineExecute(self._model_id[0], self._context_id[0], 0, self._io[0]) # get output outputs = [] if 0 == ret: for i in range(len(self.get_outputs())): - ret = axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io, i, dev_prt, dev_size) + ret = axclrt_lib.axclrtEngineGetOutputBufferByIndex(self._io[0], i, dev_prt, dev_size) if 0 != ret: raise RuntimeError(f"axclrtEngineGetOutputBufferByIndex failed for output {i}.") npy = np.zeros(self.get_outputs()[i].shape, dtype=self.get_outputs()[i].dtype)