open-mmlab · lzhangzz · Jul 21, 2022 · Jun 7, 2022 · Jun 7, 2022 · Jun 8, 2022
diff --git a/.circleci/docker/Dockerfile b/.circleci/docker/Dockerfile
@@ -5,7 +5,7 @@ ARG PYTHON_VERSION=3.8
 ARG TORCH_VERSION=1.10.0
 ARG TORCHVISION_VERSION=0.11.0
 ARG MMCV_VERSION=1.5.0
-ARG PPLCV_VERSION=0.6.2
+ARG PPLCV_VERSION=0.7.0
 ENV FORCE_CUDA="1"
 
 ENV DEBIAN_FRONTEND=noninteractive

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -110,7 +110,9 @@ if (MMDEPLOY_BUILD_SDK)
     mmdeploy_add_deps(ort BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS ONNXRUNTIME)
     mmdeploy_add_deps(ncnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS ncnn)
     mmdeploy_add_deps(openvino BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS InferenceEngine)
-    mmdeploy_add_deps(pplnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS pplnn)
+    if (NOT MMDEPLOY_SHARED_LIBS)
+        mmdeploy_add_deps(pplnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS pplnn)
+    endif ()
 
     include(CMakePackageConfigHelpers)
     # generate the config file that is includes the exports

diff --git a/csrc/mmdeploy/net/ppl/CMakeLists.txt b/csrc/mmdeploy/net/ppl/CMakeLists.txt
@@ -7,10 +7,10 @@ find_package(pplnn REQUIRED)
 mmdeploy_add_module(${PROJECT_NAME} ppl_net.cpp)
 target_include_directories(${PROJECT_NAME} PUBLIC
         $<BUILD_INTERFACE:${PPLNN_INCLUDE_DIRS}>)
-if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
+if (PPLNN_USE_X86 AND ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES))
     target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_X86=1)
 endif ()
-if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
+if (PPLNN_USE_CUDA AND ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES))
     target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_CUDA=1)
     target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include)
     target_link_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/lib64)

diff --git a/csrc/mmdeploy/net/ppl/ppl_net.cpp b/csrc/mmdeploy/net/ppl/ppl_net.cpp
@@ -6,14 +6,17 @@
 #include "mmdeploy/core/model.h"
 #include "mmdeploy/core/utils/formatter.h"
 #include "ppl/nn/common/logger.h"
-#include "ppl/nn/models/onnx/onnx_runtime_builder_factory.h"
+#include "ppl/nn/models/onnx/runtime_builder_factory.h"
 #if PPL_NN_HAS_X86
 #include "ppl/nn/engines/x86/engine_factory.h"
-#include "ppl/nn/engines/x86/x86_options.h"
+#include "ppl/nn/engines/x86/engine_options.h"
+#include "ppl/nn/engines/x86/ops.h"
 #endif
 #if PPL_NN_HAS_CUDA
-#include "ppl/nn/engines/cuda/cuda_options.h"
 #include "ppl/nn/engines/cuda/engine_factory.h"
+#include "ppl/nn/engines/cuda/engine_options.h"
+#include "ppl/nn/engines/cuda/ops.h"
+#define PPL_CUDA_IMPORT_FROM_BUFFER 0
 #endif
 
 namespace mmdeploy {
@@ -35,7 +38,7 @@ Result<std::unique_ptr<T>> ppl_try(T* v) {
 }
 
 Tensor PPLNet::CreateInternalTensor(ppl::nn::Tensor* src, Device device) {
-  auto desc = src->GetShape();
+  const auto& desc = *src->GetShape();
   auto name = src->GetName();
   std::vector<int64_t> shape{desc.GetDims(), desc.GetDims() + desc.GetDimCount()};
   if (std::any_of(begin(shape), end(shape), [](auto x) { return x <= 0; })) {
@@ -56,15 +59,37 @@ Result<void> PPLNet::Init(const Value& args) {
 
 #if PPL_NN_HAS_CUDA
   if (device_.is_device()) {
-    engines_.emplace_back(ppl::nn::CudaEngineFactory::Create({}));
-    // Use default algorithms until PPL can set algorithms from a memory buffer
-    //  since the optimization process is really slow
-    engines_.back()->Configure(ppl::nn::CUDA_CONF_USE_DEFAULT_ALGORITHMS, true);
+    ppl::nn::cuda::RegisterBuiltinOpImpls();
+    ppl::nn::cuda::EngineOptions options{};
+    options.device_id = device_.device_id();
+    options.mm_policy = ppl::nn::cuda::MM_BEST_FIT;
+    engines_.emplace_back(ppl::nn::cuda::EngineFactory::Create(options));
+
+    bool import_algo = false;
+
+#if PPL_CUDA_IMPORT_FROM_BUFFER
+    auto algo = model.ReadFile(config.weights);
+    if (algo) {
+      auto ret =
+          engines_.back()->Configure(ppl::nn::cuda::ENGINE_CONF_IMPORT_ALGORITHMS_FROM_BUFFER,
+                                     algo.value().c_str(), algo.value().size());
+      if (ret == ppl::common::RC_SUCCESS) {
+        import_algo = true;
+      } else {
+        MMDEPLOY_ERROR("failed to import algorithms ({}), default algorithms will be used", ret);
+      }
+    }
+#endif
+
+    if (!import_algo) {
+      engines_.back()->Configure(ppl::nn::cuda::ENGINE_CONF_USE_DEFAULT_ALGORITHMS, true);
+    }
   }
 #endif
 #if PPL_NN_HAS_X86
   if (device_.is_host()) {
-    engines_.emplace_back(ppl::nn::X86EngineFactory::Create({}));
+    ppl::nn::x86::RegisterBuiltinOpImpls();
+    engines_.emplace_back(ppl::nn::x86::EngineFactory::Create({}));
   }
 #endif
 
@@ -73,8 +98,14 @@ Result<void> PPLNet::Init(const Value& args) {
     engines.push_back(engine.get());
   }
 
-  OUTCOME_TRY(auto builder, ppl_try(ppl::nn::OnnxRuntimeBuilderFactory::Create(
-                                onnx.data(), onnx.size(), engines.data(), engines.size())));
+  OUTCOME_TRY(auto builder, ppl_try(ppl::nn::onnx::RuntimeBuilderFactory::Create()));
+  OUTCOME_TRY(ppl_try(builder->LoadModel(onnx.data(), onnx.size(), nullptr)));
+
+  ppl::nn::onnx::RuntimeBuilder::Resources resources{};
+  resources.engines = engines.data();
+  resources.engine_num = engines.size();
+  OUTCOME_TRY(ppl_try(builder->SetResources(resources)));
+  OUTCOME_TRY(ppl_try(builder->Preprocess()));
 
   OUTCOME_TRY(auto runtime, ppl_try(builder->CreateRuntime()));
 
@@ -84,7 +115,7 @@ Result<void> PPLNet::Init(const Value& args) {
     inputs_external_.push_back(CreateInternalTensor(src, device_));
 
     /// debug only
-    auto& desc = inputs_internal_[i]->GetShape();
+    const auto& desc = *inputs_internal_[i]->GetShape();
     std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
     MMDEPLOY_DEBUG("input {}: datatype = {}, dataformat = {}, shape = {}", i,
                    ppl::common::GetDataTypeStr(desc.GetDataType()),
@@ -96,7 +127,7 @@ Result<void> PPLNet::Init(const Value& args) {
     outputs_internal_.push_back(src);
     outputs_external_.push_back(CreateInternalTensor(src, device_));
 
-    auto desc = outputs_internal_[i]->GetShape();
+    const auto& desc = *outputs_internal_[i]->GetShape();
     std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
     MMDEPLOY_DEBUG("output {}: datatype = {}, dataformat = {}, shape = {}", i,
                    ppl::common::GetDataTypeStr(desc.GetDataType()),
@@ -128,7 +159,7 @@ Result<void> PPLNet::Deinit() {
 }
 
 static TensorShape GetShape(const PPLTensor& tensor) {
-  auto& desc = tensor.GetShape();
+  const auto& desc = *tensor.GetShape();
   return {desc.GetDims(), desc.GetDims() + desc.GetDimCount()};
 }
 
@@ -170,18 +201,17 @@ Result<void> PPLNet::Forward() {
   OUTCOME_TRY(stream_.Wait());
 
   OUTCOME_TRY(ppl_try(runtime_->Run()));
-  OUTCOME_TRY(ppl_try(runtime_->Sync()));
 
   for (int i = 0; i < outputs_external_.size(); ++i) {
     auto& internal = *outputs_internal_[i];
-    auto format = internal.GetShape().GetDataFormat();
+    auto format = internal.GetShape()->GetDataFormat();
     if (format != ppl::common::DATAFORMAT_NDARRAY) {
       MMDEPLOY_ERROR("output {}'s format is {}, only NDARRAY is currently supported", i,
                      ppl::common::GetDataFormatStr(format));
       return Status(eNotSupported);
     }
     auto& external = outputs_external_[i];
-    auto dtype_int = internal.GetShape().GetDataType();
+    auto dtype_int = internal.GetShape()->GetDataType();
     OUTCOME_TRY(auto dtype_ext, GetPPLDataType(external.data_type()));
     auto shape_int = GetShape(internal);
     auto shape_ext = external.shape();
@@ -213,7 +243,7 @@ Result<void> PPLNet::Forward() {
 Result<void> PPLNet::ForwardAsync(Event* event) { return Status(eNotSupported); }
 
 Result<void> ReshapeLike(PPLTensor& dst, Tensor& src) {
-  auto& dst_desc = dst.GetShape();
+  auto& dst_desc = *dst.GetShape();
   auto& src_desc = src.desc();
   OUTCOME_TRY(auto data_type, GetPPLDataType(src_desc.data_type));
   dst_desc.SetDataType(data_type);

diff --git a/csrc/mmdeploy/preprocess/cuda/pad_impl.cpp b/csrc/mmdeploy/preprocess/cuda/pad_impl.cpp
@@ -14,18 +14,10 @@ namespace cuda {
 class PadImpl : public ::mmdeploy::PadImpl {
  public:
   explicit PadImpl(const Value& args) : ::mmdeploy::PadImpl(args) {
-#if PPLCV_VERSION_MAJOR >= 0 && PPLCV_VERSION_MINOR >= 6 && PPLCV_VERSION_PATCH >= 2
     map<string, ppl::cv::BorderType> border_map{{"constant", ppl::cv::BORDER_CONSTANT},
                                                 {"edge", ppl::cv::BORDER_REPLICATE},
                                                 {"reflect", ppl::cv::BORDER_REFLECT_101},
-                                                { "symmetric",
-                                                  ppl::cv::BORDER_REFLECT }};
-#else
-    map<string, ppl::cv::BorderType> border_map{{"constant", ppl::cv::BORDER_TYPE_CONSTANT},
-                                                {"edge", ppl::cv::BORDER_TYPE_REPLICATE},
-                                                {"reflect", ppl::cv::BORDER_TYPE_REFLECT_101},
-                                                {"symmetric", ppl::cv::BORDER_TYPE_REFLECT}};
-#endif
+                                                {"symmetric", ppl::cv::BORDER_REFLECT}};
     if (border_map.find(arg_.padding_mode) == border_map.end()) {
       MMDEPLOY_ERROR("unsupported padding_mode '{}'", arg_.padding_mode);
       throw_exception(eNotSupported);

diff --git a/csrc/mmdeploy/preprocess/cuda/resize_impl.cpp b/csrc/mmdeploy/preprocess/cuda/resize_impl.cpp
@@ -45,7 +45,6 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
  private:
   template <class T, int C, class... Args>
   ppl::common::RetCode DispatchImpl(Args&&... args) {
-#if PPLCV_VERSION_MAJOR >= 0 && PPLCV_VERSION_MINOR >= 6 && PPLCV_VERSION_PATCH >= 2
     if (arg_.interpolation == "bilinear") {
       return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
                                          ppl::cv::INTERPOLATION_LINEAR);
@@ -54,16 +53,6 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
       return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
                                          ppl::cv::INTERPOLATION_NEAREST_POINT);
     }
-#else
-    if (arg_.interpolation == "bilinear") {
-      return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
-                                         ppl::cv::INTERPOLATION_TYPE_LINEAR);
-    }
-    if (arg_.interpolation == "nearest") {
-      return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
-                                         ppl::cv::INTERPOLATION_TYPE_NEAREST_POINT);
-    }
-#endif
     return ppl::common::RC_UNSUPPORTED;
   }
 

diff --git a/docker/GPU/Dockerfile b/docker/GPU/Dockerfile
@@ -6,7 +6,7 @@ ARG TORCH_VERSION=1.8.0
 ARG TORCHVISION_VERSION=0.9.0
 ARG ONNXRUNTIME_VERSION=1.8.1
 ARG MMCV_VERSION=1.4.0
-ARG PPLCV_VERSION=0.6.2
+ARG PPLCV_VERSION=0.7.0
 ENV FORCE_CUDA="1"
 
 ENV DEBIAN_FRONTEND=noninteractive

diff --git a/docs/en/01-how-to-build/linux-x86_64.md b/docs/en/01-how-to-build/linux-x86_64.md
@@ -110,13 +110,12 @@ sudo apt-get install libopencv-dev
   <tr>
     <td>pplcv </td>
     <td>A high-performance image processing library of openPPL.<br>
-  <b>It is optional which only be needed if <code>cuda</code> platform is required.
-  Now, MMDeploy supports v0.6.2 and has to use <code>git clone</code> to download it.</b><br>
+  <b>It is optional which only be needed if <code>cuda</code> platform is required.</b><br>
 <pre><code>
 git clone https://github.com/openppl-public/ppl.cv.git
 cd ppl.cv
 export PPLCV_DIR=$(pwd)
-git checkout tags/v0.6.2 -b v0.6.2
+git checkout tags/v0.7.0 -b v0.7.0
 ./build.sh cuda
 </code></pre>
    </td>

diff --git a/docs/en/01-how-to-build/windows.md b/docs/en/01-how-to-build/windows.md
@@ -97,16 +97,15 @@ You can skip this chapter if you are only interested in the model converter.
   <tr>
     <td>pplcv </td>
     <td>A high-performance image processing library of openPPL.<br>
-  <b>It is optional which only be needed if <code>cuda</code> platform is required.
-  Now, MMDeploy supports v0.6.2 and has to use <code>git clone</code> to download it.</b><br>
+  <b>It is optional which only be needed if <code>cuda</code> platform is required.</b><br>
 <pre><code>
 git clone https://github.com/openppl-public/ppl.cv.git
 cd ppl.cv
-git checkout tags/v0.6.2 -b v0.6.2
+git checkout tags/v0.7.0 -b v0.7.0
 $env:PPLCV_DIR = "$pwd"
 mkdir pplcv-build
 cd pplcv-build
-cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DHPCC_USE_CUDA=ON -DHPCC_MSVC_MD=ON
+cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DHPCC_USE_CUDA=ON -DPPLCV_USE_MSVC_STATIC_RUNTIME=OFF
 cmake --build . --config Release -- /m
 cmake --install . --config Release
 cd ../..

diff --git a/docs/en/05-supported-backends/pplnn.md b/docs/en/05-supported-backends/pplnn.md
@@ -1,6 +1,6 @@
 # PPLNN Support
 
-This tutorial is based on Linux systems like Ubuntu-18.04.
+MMDeploy supports ppl.nn v0.8.1 and later. This tutorial is based on Linux systems like Ubuntu-18.04.
 
 ## Installation
 

diff --git a/docs/zh_cn/01-how-to-build/linux-x86_64.md b/docs/zh_cn/01-how-to-build/linux-x86_64.md
@@ -110,12 +110,12 @@ sudo apt-get install libopencv-dev
 </tr>
   <tr>
     <td>pplcv </td>
-    <td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项，只有在 cuda 平台下，才需安装。而且，目前必须使用 v0.6.2，且需要使用 git clone 的方式下载源码并编译安装</b><br>
+    <td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项，只有在 cuda 平台下，才需安装。</b><br>
 <pre><code>
 git clone https://github.com/openppl-public/ppl.cv.git
 cd ppl.cv
 export PPLCV_DIR=$(pwd)
-git checkout tags/v0.6.2 -b v0.6.2
+git checkout tags/v0.7.0 -b v0.7.0
 ./build.sh cuda
 </code></pre>
    </td>

diff --git a/docs/zh_cn/01-how-to-build/windows.md b/docs/zh_cn/01-how-to-build/windows.md
@@ -94,15 +94,15 @@ pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/$env:cu
   </tr>
   <tr>
     <td>pplcv </td>
-    <td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项，只有在 cuda 平台下，才需安装。而且，目前必须使用 v0.6.2，且需要使用 git clone 的方式下载源码并编译安装</b><br>
+    <td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项，只有在 cuda 平台下，才需安装。</b><br>
 <pre><code>
 git clone https://github.com/openppl-public/ppl.cv.git
 cd ppl.cv
-git checkout tags/v0.6.2 -b v0.6.2
+git checkout tags/v0.7.0 -b v0.7.0
 $env:PPLCV_DIR = "$pwd"
 mkdir pplcv-build
 cd pplcv-build
-cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DHPCC_USE_CUDA=ON -DHPCC_MSVC_MD=ON
+cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DPPLCV_USE_CUDA=ON -DPPLCV_USE_MSVC_STATIC_RUNTIME=OFF
 cmake --build . --config Release -- /m
 cmake --install . --config Release
 cd ../..

diff --git a/mmdeploy/backend/pplnn/onnx2pplnn.py b/mmdeploy/backend/pplnn/onnx2pplnn.py
@@ -1,10 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Optional, Sequence
 
-from pyppl import nn as pplnn
-
 from mmdeploy.utils.device import parse_cuda_device_id
-from .utils import register_engines
+from .utils import create_runtime, register_engines
 
 
 def from_onnx(onnx_model: str,
@@ -52,10 +50,7 @@ def from_onnx(onnx_model: str,
         quick_select=False,
         export_algo_file=algo_file,
         input_shapes=input_shapes)
-    runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile(
-        onnx_model, engines)
-    assert runtime_builder is not None, 'Failed to create '\
-        'OnnxRuntimeBuilder.'
+    _ = create_runtime(onnx_model, engines)  # side effect: export algorithms
     import shutil
     if onnx_output_path != onnx_model:
         shutil.copy2(onnx_model, onnx_output_path)