From 3bfef7e596f0036402050a43c6a4ce0d8b589cd6 Mon Sep 17 00:00:00 2001 From: wanglusheng Date: Thu, 5 Dec 2024 15:30:35 +0800 Subject: [PATCH] init version --- axengine/__init__.py | 8 + axengine/_capi.py | 323 +++++++++++++++++++++++++++++++++++ axengine/_chip.py | 31 ++++ axengine/_node.py | 13 ++ axengine/_types.py | 63 +++++++ axengine/session.py | 341 +++++++++++++++++++++++++++++++++++++ examples/classification.py | 86 ++++++++++ setup.py | 22 +++ 8 files changed, 887 insertions(+) create mode 100644 axengine/__init__.py create mode 100644 axengine/_capi.py create mode 100644 axengine/_chip.py create mode 100644 axengine/_node.py create mode 100644 axengine/_types.py create mode 100644 axengine/session.py create mode 100644 examples/classification.py create mode 100644 setup.py diff --git a/axengine/__init__.py b/axengine/__init__.py new file mode 100644 index 0000000..198ad7e --- /dev/null +++ b/axengine/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +from .session import InferenceSession diff --git a/axengine/_capi.py b/axengine/_capi.py new file mode 100644 index 0000000..dd40bc1 --- /dev/null +++ b/axengine/_capi.py @@ -0,0 +1,323 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +import ctypes.util +import platform + +from cffi import FFI + +__all__: ["S", "M", "E", "N"] + +M = FFI() + +# ax_base_type.h +M.cdef( + """ + typedef int AX_S32; + typedef unsigned int AX_U32; + typedef unsigned long long int AX_U64; + typedef signed char AX_S8; + typedef void AX_VOID; +""" +) + +# ax_sys_api.h +M.cdef( + """ + AX_S32 AX_SYS_Init(AX_VOID); + AX_S32 AX_SYS_Deinit(AX_VOID); + AX_S32 AX_SYS_MemAllocCached(AX_U64 *phyaddr, AX_VOID **pviraddr, AX_U32 size, AX_U32 align, const AX_S8 *token); + AX_S32 AX_SYS_MemFree(AX_U64 phyaddr, AX_VOID *pviraddr); + AX_S32 AX_SYS_MflushCache(AX_U64 phyaddr, AX_VOID *pviraddr, AX_U32 size); + AX_S32 AX_SYS_MinvalidateCache(AX_U64 phyaddr, AX_VOID *pviraddr, AX_U32 size); +""" +) + +sys_name = "ax_sys" +sys_path = ctypes.util.find_library(sys_name) +assert ( + sys_path is not None +), f"Failed to find library {sys_name}. Please ensure it is installed and in the library path." + +S = M.dlopen(sys_path) +assert S is not None, f"Failed to load library {sys_path}. Please ensure it is installed and in the library path." + +N = FFI() + +# ax_base_type.h +N.cdef( + """ + typedef unsigned long long int AX_U64; + typedef unsigned int AX_U32; + typedef unsigned char AX_U8; + typedef int AX_S32; + typedef signed char AX_S8; + typedef char AX_CHAR; + typedef void AX_VOID; + + typedef enum { + AX_FALSE = 0, + AX_TRUE = 1, + } AX_BOOL; +""" +) + +# ax_engine_type.h, base type +N.cdef( + """ + typedef AX_U32 AX_ENGINE_NPU_SET_T; +""" +) + +# ax_engine_type.h, enum +N.cdef( + """ + typedef enum _AX_ENGINE_TENSOR_LAYOUT_E + { + AX_ENGINE_TENSOR_LAYOUT_UNKNOWN = 0, + AX_ENGINE_TENSOR_LAYOUT_NHWC = 1, + AX_ENGINE_TENSOR_LAYOUT_NCHW = 2, + } AX_ENGINE_TENSOR_LAYOUT_T; + + typedef enum + { + AX_ENGINE_MT_PHYSICAL = 0, + AX_ENGINE_MT_VIRTUAL = 1, + AX_ENGINE_MT_OCM = 2, + } AX_ENGINE_MEMORY_TYPE_T; + + typedef enum + { + AX_ENGINE_DT_UNKNOWN = 0, + AX_ENGINE_DT_UINT8 = 1, + AX_ENGINE_DT_UINT16 = 2, + AX_ENGINE_DT_FLOAT32 = 3, + AX_ENGINE_DT_SINT16 = 4, + AX_ENGINE_DT_SINT8 = 5, + AX_ENGINE_DT_SINT32 = 6, + AX_ENGINE_DT_UINT32 = 7, + AX_ENGINE_DT_FLOAT64 = 8, + AX_ENGINE_DT_BFLOAT16 = 9, + AX_ENGINE_DT_UINT10_PACKED = 100, + AX_ENGINE_DT_UINT12_PACKED = 101, + AX_ENGINE_DT_UINT14_PACKED = 102, + AX_ENGINE_DT_UINT16_PACKED = 103, + } AX_ENGINE_DATA_TYPE_T; + + typedef enum + { + AX_ENGINE_CS_FEATUREMAP = 0, + AX_ENGINE_CS_RAW8 = 12, + AX_ENGINE_CS_RAW10 = 1, + AX_ENGINE_CS_RAW12 = 2, + AX_ENGINE_CS_RAW14 = 11, + AX_ENGINE_CS_RAW16 = 3, + AX_ENGINE_CS_NV12 = 4, + AX_ENGINE_CS_NV21 = 5, + AX_ENGINE_CS_RGB = 6, + AX_ENGINE_CS_BGR = 7, + AX_ENGINE_CS_RGBA = 8, + AX_ENGINE_CS_GRAY = 9, + AX_ENGINE_CS_YUV444 = 10, + } AX_ENGINE_COLOR_SPACE_T; +""" +) + +# ax_engine_type.h, architecturally agnostic struct +N.cdef( + """ + typedef enum { + AX_ENGINE_VIRTUAL_NPU_DISABLE = 0, + } AX_ENGINE_NPU_MODE_T; + + typedef enum { + AX_ENGINE_MODEL_TYPE0 = 0, + } AX_ENGINE_MODEL_TYPE_T; + + typedef struct { + AX_ENGINE_NPU_MODE_T eHardMode; + AX_U32 reserve[8]; + } AX_ENGINE_NPU_ATTR_T; + + typedef struct _AX_ENGINE_IO_META_EX_T + { + AX_ENGINE_COLOR_SPACE_T eColorSpace; + AX_U64 u64Reserved[18]; + } AX_ENGINE_IO_META_EX_T; + + typedef struct { + AX_ENGINE_NPU_SET_T nNpuSet; + AX_S8* pName; + AX_U32 reserve[8]; + } AX_ENGINE_HANDLE_EXTRA_T; + + typedef struct _AX_ENGINE_CMM_INFO_T + { + AX_U32 nCMMSize; + } AX_ENGINE_CMM_INFO_T; + + typedef struct _AX_ENGINE_IO_SETTING_T + { + AX_U32 nWbtIndex; + AX_U64 u64Reserved[7]; + }AX_ENGINE_IO_SETTING_T; +""" +) + +# check architecture, 32bit or 64bit +arch = platform.architecture()[0] + +# ax_engine_type.h, struct +if arch == "64bit": + N.cdef( + """ + typedef struct _AX_ENGINE_IO_META_T + { + AX_CHAR* pName; + AX_S32* pShape; + AX_U8 nShapeSize; + AX_ENGINE_TENSOR_LAYOUT_T eLayout; + AX_ENGINE_MEMORY_TYPE_T eMemoryType; + AX_ENGINE_DATA_TYPE_T eDataType; + AX_ENGINE_IO_META_EX_T* pExtraMeta; + AX_U32 nSize; + AX_U32 nQuantizationValue; + AX_S32* pStride; + AX_U64 u64Reserved[9]; + } AX_ENGINE_IO_META_T; + + typedef struct _AX_ENGINE_IO_INFO_T + { + AX_ENGINE_IO_META_T* pInputs; + AX_U32 nInputSize; + AX_ENGINE_IO_META_T* pOutputs; + AX_U32 nOutputSize; + AX_U32 nMaxBatchSize; + AX_BOOL bDynamicBatchSize; + AX_U64 u64Reserved[11]; + } AX_ENGINE_IO_INFO_T; + + typedef struct _AX_ENGINE_IO_BUFFER_T + { + AX_U64 phyAddr; + AX_VOID* pVirAddr; + AX_U32 nSize; + AX_S32* pStride; + AX_U8 nStrideSize; + AX_U64 u64Reserved[11]; + } AX_ENGINE_IO_BUFFER_T; + + typedef struct _AX_ENGINE_IO_T + { + AX_ENGINE_IO_BUFFER_T* pInputs; + AX_U32 nInputSize; + AX_ENGINE_IO_BUFFER_T* pOutputs; + AX_U32 nOutputSize; + AX_U32 nBatchSize; + AX_ENGINE_IO_SETTING_T* pIoSetting; + AX_U64 u64Reserved[10]; + } AX_ENGINE_IO_T; + """ + ) +else: + N.cdef( + """ + typedef struct _AX_ENGINE_IO_META_T + { + AX_CHAR* pName; + AX_S32* pShape; + AX_U8 nShapeSize; + AX_ENGINE_TENSOR_LAYOUT_T eLayout; + AX_ENGINE_MEMORY_TYPE_T eMemoryType; + AX_ENGINE_DATA_TYPE_T eDataType; + AX_ENGINE_IO_META_EX_T* pExtraMeta; + AX_U32 nSize; + AX_U32 nQuantizationValue; + AX_S32* pStride; + AX_U64 u64Reserved[11]; + } AX_ENGINE_IO_META_T; + + typedef struct _AX_ENGINE_IO_INFO_T + { + AX_ENGINE_IO_META_T* pInputs; + AX_U32 nInputSize; + AX_ENGINE_IO_META_T* pOutputs; + AX_U32 nOutputSize; + AX_U32 nMaxBatchSize; + AX_BOOL bDynamicBatchSize; + AX_U64 u64Reserved[13]; + } AX_ENGINE_IO_INFO_T; + + typedef struct _AX_ENGINE_IO_BUFFER_T + { + AX_U64 phyAddr; + AX_VOID* pVirAddr; + AX_U32 nSize; + AX_S32* pStride; + AX_U8 nStrideSize; + AX_U64 u64Reserved[13]; + } AX_ENGINE_IO_BUFFER_T; + + typedef struct _AX_ENGINE_IO_T + { + AX_ENGINE_IO_BUFFER_T* pInputs; + AX_U32 nInputSize; + AX_ENGINE_IO_BUFFER_T* pOutputs; + AX_U32 nOutputSize; + AX_U32 nBatchSize; + AX_ENGINE_IO_SETTING_T* pIoSetting; + AX_U64 u64Reserved[12]; + } AX_ENGINE_IO_T; + """ + ) + +# ax_engine_api.h +N.cdef( + """ + const AX_CHAR* AX_ENGINE_GetVersion(AX_VOID); + + AX_VOID AX_ENGINE_NPUReset(AX_VOID); + AX_S32 AX_ENGINE_Init(AX_ENGINE_NPU_ATTR_T* pNpuAttr); + AX_S32 AX_ENGINE_GetVNPUAttr(AX_ENGINE_NPU_ATTR_T* pNpuAttr); + AX_S32 AX_ENGINE_Deinit(AX_VOID); + + AX_S32 AX_ENGINE_GetModelType(const AX_VOID* pData, AX_U32 nDataSize, AX_ENGINE_MODEL_TYPE_T* pModelType); + + AX_S32 AX_ENGINE_CreateHandleV2(uint64_t** pHandle, const AX_VOID* pData, AX_U32 nDataSize, AX_ENGINE_HANDLE_EXTRA_T* pExtraParam); + AX_S32 AX_ENGINE_DestroyHandle(uint64_t* nHandle); + + AX_S32 AX_ENGINE_GetIOInfo(uint64_t* nHandle, AX_ENGINE_IO_INFO_T** pIO); + AX_S32 AX_ENGINE_GetGroupIOInfoCount(uint64_t* nHandle, AX_U32* pCount); + AX_S32 AX_ENGINE_GetGroupIOInfo(uint64_t* nHandle, AX_U32 nIndex, AX_ENGINE_IO_INFO_T** pIO); + + AX_S32 AX_ENGINE_GetHandleModelType(uint64_t* nHandle, AX_ENGINE_MODEL_TYPE_T* pModelType); + + AX_S32 AX_ENGINE_CreateContextV2(uint64_t* nHandle, uint64_t** pContext); + + AX_S32 AX_ENGINE_RunSyncV2(uint64_t* handle, uint64_t* context, AX_ENGINE_IO_T* pIO); + AX_S32 AX_ENGINE_RunGroupIOSync(uint64_t* handle, uint64_t* context, AX_U32 nIndex, AX_ENGINE_IO_T* pIO); + + AX_S32 AX_ENGINE_SetAffinity(uint64_t* nHandle, AX_ENGINE_NPU_SET_T nNpuSet); + AX_S32 AX_ENGINE_GetAffinity(uint64_t* nHandle, AX_ENGINE_NPU_SET_T* pNpuSet); + + AX_S32 AX_ENGINE_GetCMMUsage(uint64_t* nHandle, AX_ENGINE_CMM_INFO_T* pCMMInfo); + + const AX_CHAR* AX_ENGINE_GetModelToolsVersion(uint64_t* nHandle); + + // internal use api, remember no question + AX_S32 AX_ENGINE_GetTotalOps(); +""" +) + +engine_name = "ax_engine" +engine_path = ctypes.util.find_library(engine_name) +assert ( + engine_path is not None +), f"Failed to find library {engine_name}. Please ensure it is installed and in the library path." + +E = N.dlopen(engine_path) +assert E is not None, f"Failed to load library {engine_path}. Please ensure it is installed and in the library path." diff --git a/axengine/_chip.py b/axengine/_chip.py new file mode 100644 index 0000000..e770ed4 --- /dev/null +++ b/axengine/_chip.py @@ -0,0 +1,31 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +from . import _types +from ._capi import E as _lib + +__all__: ["T"] + + +def function_exists(lib, func_name): + try: + getattr(lib, func_name) + return True + except AttributeError: + return False + + +def check_chip_type(clib): + if not function_exists(clib, "AX_ENGINE_SetAffinity"): + return _types.ChipType.M57H + elif not function_exists(clib, "AX_ENGINE_GetTotalOps"): + return _types.ChipType.MC50 + else: + return _types.ChipType.MC20E + + +T = check_chip_type(_lib) diff --git a/axengine/_node.py b/axengine/_node.py new file mode 100644 index 0000000..cf0459e --- /dev/null +++ b/axengine/_node.py @@ -0,0 +1,13 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + + +class NodeArg(object): + def __init__(self, name, dtype, shape): + self.name = name + self.dtype = dtype + self.shape = shape diff --git a/axengine/_types.py b/axengine/_types.py new file mode 100644 index 0000000..d054a92 --- /dev/null +++ b/axengine/_types.py @@ -0,0 +1,63 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +from enum import Enum +import ml_dtypes as mldt +import numpy as np + + +class VNPUType(Enum): + DISABLED = 0 + ENABLED = 1 + BIG_LITTLE = 2 + LITTLE_BIG = 3 + + +class ModelType(Enum): + HALF = 0 # for MC20E, which means chip is AX630C(x), or AX620Q(x) + FULL = 1 # for MC20E + SINGLE = 0 # for MC50, which means chip is AX650A or AX650N, and M57H + DUAL = 1 # for MC50 + TRIPLE = 2 # for MC50 + + +class ChipType(Enum): + MC20E = 0 + MC50 = 1 + M57H = 2 + + +def get_data_type(engine_type): + if engine_type == ChipType.MC20E: + return ModelType.HALF + elif engine_type == ChipType.MC50: + return ModelType.SINGLE + elif engine_type == ChipType.M57H: + return ModelType.SINGLE + else: + raise ValueError("Invalid engine type: %s" % engine_type) + + +def _transform_dtype(ffi, lib, dtype): + if dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_UINT8): + return np.dtype(np.uint8) + elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_SINT8): + return np.dtype(np.int8) + elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_UINT16): + return np.dtype(np.uint16) + elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_SINT16): + return np.dtype(np.int16) + elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_UINT32): + return np.dtype(np.uint32) + elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_SINT32): + return np.dtype(np.int32) + elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_FLOAT32): + return np.dtype(np.float32) + elif dtype == ffi.cast("AX_ENGINE_DATA_TYPE_T", lib.AX_ENGINE_DT_BFLOAT16): + return np.dtype(mldt.bfloat16) + else: + raise ValueError(f"Unsupported data type '{dtype}'.") diff --git a/axengine/session.py b/axengine/session.py new file mode 100644 index 0000000..0e16142 --- /dev/null +++ b/axengine/session.py @@ -0,0 +1,341 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +from ._types import VNPUType, ModelType, ChipType +from ._types import _transform_dtype +from ._node import NodeArg + +from . import _chip +from . import _capi + +import os +import numpy as np + +__all__: ["InferenceSession"] + + +class InferenceSession: + def __init__( + self, + path_or_bytes: str | bytes | os.PathLike, + ) -> None: + # load shared library + self._sys_lib = _capi.S + self._sys_ffi = _capi.M + self._engine_lib = _capi.E + self._engine_ffi = _capi.N + + # chip type + self._chip_type = _chip.T + print(f"[INFO] Chip type: {self._chip_type}") + + # handle, context, info, io + self._handle = self._engine_ffi.new("uint64_t **") + self._context = self._engine_ffi.new("uint64_t **") + self._io = self._engine_ffi.new("AX_ENGINE_IO_T *") + + # init ax sys & engine + ret = self._init() + if 0 != ret: + raise RuntimeError("Failed to initialize engine.") + print(f"[INFO] Engine version: {self._get_version()}") + + # get vnpu type + self._vnpu_type = self._get_vnpu_type() + print(f"[INFO] VNPU type: {self._vnpu_type}") + + # model buffer, almost copied from onnx runtime + if isinstance(path_or_bytes, (str, os.PathLike)): + self._model_name = os.path.splitext(os.path.basename(path_or_bytes))[0] + with open(path_or_bytes, "rb") as f: + data = f.read() + self._model_buffer = self._engine_ffi.new("char[]", data) + self._model_buffer_size = len(data) + elif isinstance(path_or_bytes, bytes): + self._model_buffer = self._engine_ffi.new("char[]", path_or_bytes) + self._model_buffer_size = len(path_or_bytes) + else: + raise TypeError(f"Unable to load model from type '{type(path_or_bytes)}'") + + # get model type + self._model_type = self._get_model_type() + if self._chip_type is ChipType.MC20E: + if self._model_type is ModelType.FULL: + print(f"[INFO] Model type: {self._model_type.value} (full core)") + if self._model_type is ModelType.HALF: + print(f"[INFO] Model type: {self._model_type.value} (half core)") + if self._chip_type is ChipType.MC50: + if self._model_type is ModelType.SINGLE: + print(f"[INFO] Model type: {self._model_type.value} (single core)") + if self._model_type is ModelType.DUAL: + print(f"[INFO] Model type: {self._model_type.value} (dual core)") + if self._model_type is ModelType.TRIPLE: + print(f"[INFO] Model type: {self._model_type.value} (triple core)") + if self._chip_type is ChipType.M57H: + print(f"[INFO] Model type: {self._model_type.value} (single core)") + + # check model type + if self._chip_type is ChipType.MC50: + # all types (single or dual or triple) of model are allowed in vnpu mode disabled + # only single core model is allowed in vnpu mode enabled + # only triple core model is NOT allowed in vnpu mode big-little or little-big + if self._vnpu_type is VNPUType.ENABLED: + if self._model_type is not ModelType.SINGLE: + raise ValueError( + f"Model type '{self._model_type}' is not allowed when vnpu is inited as {self._vnpu_type}." + ) + if self._vnpu_type is VNPUType.BIG_LITTLE or self._vnpu_type is VNPUType.LITTLE_BIG: + if self._model_type is ModelType.TRIPLE: + raise ValueError( + f"Model type '{self._model_type}' is not allowed when vnpu is inited as {self._vnpu_type}." + ) + if self._chip_type is ChipType.MC20E: + # all types of full or half core model are allowed in vnpu mode disabled + # only half core model is allowed in vnpu mode enabled + if self._vnpu_type is VNPUType.ENABLED: + if self._model_type is ModelType.FULL: + raise ValueError( + f"Model type '{self._model_type}' is not allowed when vnpu is inited as {self._vnpu_type}." + ) + # if self._chip_type is ChipType.M57H: + # there only one type of model will be compiled, so no need to check + + # load model + ret = self._load() + if 0 != ret: + raise RuntimeError("Failed to load model.") + print(f"[INFO] Compiler version: {self._get_model_tool_version()}") + + # get shape group count + self._shape_count = self._get_shape_count() + + # get model shape + self._info = self._get_info() + self._inputs = self._get_inputs() + self._outputs = self._get_outputs() + + # fill model io + self._align = 128 + self._cmm_token = self._engine_ffi.new("AX_S8[]", b"PyEngine") + self._io[0].nInputSize = len(self.get_inputs()) + self._io[0].nOutputSize = len(self.get_outputs()) + self._io[0].pInputs = self._engine_ffi.new("AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nInputSize)) + self._io[0].pOutputs = self._engine_ffi.new("AX_ENGINE_IO_BUFFER_T[{}]".format(self._io[0].nOutputSize)) + for i in range(len(self.get_inputs())): + max_buf = 0 + for j in range(self._shape_count): + max_buf = max(max_buf, self._info[j][0].pInputs[i].nSize) + self._io[0].pInputs[i].nSize = max_buf + phy = self._engine_ffi.new("AX_U64*") + vir = self._engine_ffi.new("AX_VOID**") + ret = self._sys_lib.AX_SYS_MemAllocCached( + phy, vir, self._io[0].pInputs[i].nSize, self._align, self._cmm_token + ) + if 0 != ret: + raise RuntimeError("Failed to allocate memory for input.") + self._io[0].pInputs[i].phyAddr = phy[0] + self._io[0].pInputs[i].pVirAddr = vir[0] + for i in range(len(self.get_outputs())): + max_buf = 0 + for j in range(self._shape_count): + max_buf = max(max_buf, self._info[j][0].pOutputs[i].nSize) + self._io[0].pOutputs[i].nSize = max_buf + phy = self._engine_ffi.new("AX_U64*") + vir = self._engine_ffi.new("AX_VOID**") + ret = self._sys_lib.AX_SYS_MemAllocCached( + phy, vir, self._io[0].pOutputs[i].nSize, self._align, self._cmm_token + ) + if 0 != ret: + raise RuntimeError("Failed to allocate memory for output.") + self._io[0].pOutputs[i].phyAddr = phy[0] + self._io[0].pOutputs[i].pVirAddr = vir[0] + + def __del__(self): + self._final() + + def _init(self, vnpu=VNPUType.DISABLED): # vnpu type, the default is disabled + ret = self._sys_lib.AX_SYS_Init() + if 0 != ret: + raise RuntimeError("Failed to initialize system.") + + # get vnpu type first, check if npu was initialized + vnpu_type = self._engine_ffi.new("AX_ENGINE_NPU_ATTR_T *") + ret = self._engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type) + if 0 != ret: + # this means the NPU was not initialized + vnpu_type.eHardMode = self._engine_ffi.cast("AX_ENGINE_NPU_MODE_T", vnpu.value) + + return self._engine_lib.AX_ENGINE_Init(vnpu_type) + + def _final(self): + if self._handle[0] is not None: + self._unload() + self._engine_lib.AX_ENGINE_Deinit() + return self._sys_lib.AX_SYS_Deinit() + + def _get_version(self): + engine_version = self._engine_lib.AX_ENGINE_GetVersion() + return self._engine_ffi.string(engine_version).decode("utf-8") + + def _get_vnpu_type(self) -> VNPUType: + vnpu_type = self._engine_ffi.new("AX_ENGINE_NPU_ATTR_T *") + ret = self._engine_lib.AX_ENGINE_GetVNPUAttr(vnpu_type) + if 0 != ret: + raise RuntimeError("Failed to get VNPU attribute.") + return VNPUType(vnpu_type.eHardMode) + + def _get_model_type(self) -> ModelType: + model_type = self._engine_ffi.new("AX_ENGINE_MODEL_TYPE_T *") + ret = self._engine_lib.AX_ENGINE_GetModelType(self._model_buffer, self._model_buffer_size, model_type) + if 0 != ret: + raise RuntimeError("Failed to get model type.") + return ModelType(model_type[0]) + + def _get_model_tool_version(self): + model_tool_version = self._engine_lib.AX_ENGINE_GetModelToolsVersion(self._handle[0]) + return self._engine_ffi.string(model_tool_version).decode("utf-8") + + def _load(self): + extra = self._engine_ffi.new("AX_ENGINE_HANDLE_EXTRA_T *") + extra_name = self._engine_ffi.new("char[]", self._model_name.encode("utf-8")) + extra.pName = extra_name + + # for onnx runtime do not support one model multiple context running in multi-thread as far as I know, so + # the engine handle and context will create only once + ret = self._engine_lib.AX_ENGINE_CreateHandleV2( + self._handle, self._model_buffer, self._model_buffer_size, extra + ) + if 0 == ret: + ret = self._engine_lib.AX_ENGINE_CreateContextV2(self._handle[0], self._context) + return ret + + def _get_info(self): + total_info = [] + if 1 == self._shape_count: + info = self._engine_ffi.new("AX_ENGINE_IO_INFO_T **") + ret = self._engine_lib.AX_ENGINE_GetIOInfo(self._handle[0], info) + if 0 != ret: + raise RuntimeError("Failed to get model shape.") + total_info.append(info) + else: + for i in range(self._shape_count): + info = self._engine_ffi.new("AX_ENGINE_IO_INFO_T **") + ret = self._engine_lib.AX_ENGINE_GetGroupIOInfo(self._handle[0], i, info) + if 0 != ret: + raise RuntimeError(f"Failed to get model the {i}th shape.") + total_info.append(info) + return total_info + + def _get_shape_count(self): + count = self._engine_ffi.new("AX_U32 *") + ret = self._engine_lib.AX_ENGINE_GetGroupIOInfoCount(self._handle[0], count) + if 0 != ret: + raise RuntimeError("Failed to get model shape group.") + return count[0] + + def _unload(self): + return self._engine_lib.AX_ENGINE_DestroyHandle(self._handle[0]) + + def _get_inputs(self): + inputs = [] + for group in range(self._shape_count): + one_group_input = [] + for index in range(self._info[group][0].nInputSize): + current_input = self._info[group][0].pInputs[index] + name = self._engine_ffi.string(current_input.pName).decode("utf-8") + shape = [] + for i in range(current_input.nShapeSize): + shape.append(current_input.pShape[i]) + dtype = _transform_dtype(self._engine_ffi, self._engine_lib, current_input.eDataType) + meta = NodeArg(name, dtype, shape) + one_group_input.append(meta) + inputs.append(one_group_input) + return inputs + + def _get_outputs(self): + outputs = [] + for group in range(self._shape_count): + one_group_output = [] + for index in range(self._info[group][0].nOutputSize): + current_output = self._info[group][0].pOutputs[index] + name = self._engine_ffi.string(current_output.pName).decode("utf-8") + shape = [] + for i in range(current_output.nShapeSize): + shape.append(current_output.pShape[i]) + dtype = _transform_dtype(self._engine_ffi, self._engine_lib, current_output.eDataType) + meta = NodeArg(name, dtype, shape) + one_group_output.append(meta) + outputs.append(one_group_output) + return outputs + + def get_inputs(self, shape_group=0) -> list[NodeArg]: + if shape_group > self._shape_count: + raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.") + selected_info = self._inputs[shape_group] + return selected_info + + def get_outputs(self, shape_group=0) -> list[NodeArg]: + if shape_group > self._shape_count: + raise ValueError(f"Shape group '{shape_group}' is out of range, total {self._shape_count}.") + selected_info = self._outputs[shape_group] + return selected_info + + # copy from onnxruntime + def _validate_input(self, feed_input_names): + missing_input_names = [] + for i in self.get_inputs(): + if i.name not in feed_input_names: + missing_input_names.append(i.name) + if missing_input_names: + raise ValueError( + f"Required inputs ({missing_input_names}) are missing from input feed ({feed_input_names})." + ) + + def _validate_output(self, output_names): + if output_names is not None: + for name in output_names: + if name not in [o.name for o in self.get_outputs()]: + raise ValueError(f"Output name '{name}' is not registered.") + + def run(self, output_names, input_feed, run_options=None): + self._validate_input(list(input_feed.keys())) + self._validate_output(output_names) + + if None is output_names: + output_names = [o.name for o in self.get_outputs()] + + # fill model io + for key, npy in input_feed.items(): + for i, one in enumerate(self.get_inputs()): + if one.name == key: + npy_ptr = self._engine_ffi.cast("void *", npy.ctypes.data) + self._engine_ffi.memmove(self._io[0].pInputs[i].pVirAddr, npy_ptr, npy.nbytes) + self._sys_lib.AX_SYS_MflushCache( + self._io[0].pInputs[i].phyAddr, self._io[0].pInputs[i].pVirAddr, self._io[0].pInputs[i].nSize + ) + break + + # execute model + ret = self._engine_lib.AX_ENGINE_RunSyncV2(self._handle[0], self._context[0], self._io) + + # flush output + outputs = [] + if 0 == ret: + for i in range(len(self.get_outputs())): + self._sys_lib.AX_SYS_MinvalidateCache( + self._io[0].pOutputs[i].phyAddr, self._io[0].pOutputs[i].pVirAddr, self._io[0].pOutputs[i].nSize + ) + npy = np.frombuffer( + self._engine_ffi.buffer(self._io[0].pOutputs[i].pVirAddr, self._io[0].pOutputs[i].nSize), + dtype=self.get_outputs()[i].dtype, + ).reshape(self.get_outputs()[i].shape) + name = self.get_outputs()[i].name + if name in output_names: + outputs.append(npy) + return outputs + else: + raise RuntimeError("Failed to run model.") diff --git a/examples/classification.py b/examples/classification.py new file mode 100644 index 0000000..a87d516 --- /dev/null +++ b/examples/classification.py @@ -0,0 +1,86 @@ +# Copyright (c) 2019-2024 Axera Semiconductor Co., Ltd. All Rights Reserved. +# +# This source file is the property of Axera Semiconductor Co., Ltd. and +# may not be copied or distributed in any isomorphic form without the prior +# written consent of Axera Semiconductor Co., Ltd. +# + +import axengine as axe +import numpy as np +from PIL import Image + + +def load_model(model_path): + session = axe.InferenceSession(model_path) + return session + + +def preprocess_image(image_path, target_size=(256, 256), crop_size=(224, 224)): + # Load the image + img = Image.open(image_path).convert("RGB") + + # Get original dimensions + original_width, original_height = img.size + + # Determine the shorter side and calculate the center crop + if original_width < original_height: + crop_area = original_width + else: + crop_area = original_height + + crop_x = (original_width - crop_area) // 2 + crop_y = (original_height - crop_area) // 2 + + # Crop the center square + img = img.crop((crop_x, crop_y, crop_x + crop_area, crop_y + crop_area)) + + # Resize the image to 256x256 + img = img.resize(target_size) + + # Crop the center 224x224 + crop_x = (target_size[0] - crop_size[0]) // 2 + crop_y = (target_size[1] - crop_size[1]) // 2 + img = img.crop((crop_x, crop_y, crop_x + crop_size[0], crop_y + crop_size[1])) + + # Convert to numpy array and change dtype to int + img_array = np.array(img).astype("uint8") + # Transpose to (1, C, H, W) + # img_array = np.transpose(img_array, (2, 0, 1)) + # img_array = np.expand_dims(img_array, axis=0) # Add batch dimension + return img_array + + +def get_top_k_predictions(output, k=5): + # Get top k predictions + top_k_indices = np.argsort(output[0])[-k:][::-1] + top_k_scores = output[0][top_k_indices] + return top_k_indices, top_k_scores + + +def main(model_path, image_path, target_size, crop_size, k): + # Load the model + session = load_model(model_path) + + # Preprocess the image + input_tensor = preprocess_image(image_path, target_size, crop_size) + + # Get input name and run inference + input_name = session.get_inputs()[0].name + output = session.run(None, {input_name: input_tensor}) + + # Get top k predictions + top_k_indices, top_k_scores = get_top_k_predictions(output, k) + + # Print the results + print(f"Top {k} Predictions:") + for i in range(k): + print(f"Class Index: {top_k_indices[i]}, Score: {top_k_scores[i]}") + + +if __name__ == "__main__": + MODEL_PATH = "/opt/data/npu/models/mobilenetv2.axmodel" + IMAGE_PATH = "/opt/data/npu/images/cat.jpg" + TARGET_SIZE = (256, 256) # Resize to 256x256 + CROP_SIZE = (224, 224) # Crop to 224x224 + K = 5 # Top K predictions + main(MODEL_PATH, IMAGE_PATH, TARGET_SIZE, CROP_SIZE, K) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..609e459 --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup + +setup( + name="axengine", + version="0.1", + classifiers=[ + "Development Status :: 1 - Alpha", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: PyPy", + ], + packages=["axengine"], + ext_modules=[], + install_requires=["cffi>=1.0.0", "numpy>=1.22", "ml-dtypes>=0.1.0"], + setup_requires=["cffi>=1.0.0", "numpy>=1.22", "ml-dtypes>=0.1.0"], +)