Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Support latest ppl.nn & ppl.cv #564

Merged
merged 11 commits into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ARG PYTHON_VERSION=3.8
ARG TORCH_VERSION=1.10.0
ARG TORCHVISION_VERSION=0.11.0
ARG MMCV_VERSION=1.5.0
ARG PPLCV_VERSION=0.6.2
ARG PPLCV_VERSION=0.7.0
ENV FORCE_CUDA="1"

ENV DEBIAN_FRONTEND=noninteractive
Expand Down
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ if (MMDEPLOY_BUILD_SDK)
mmdeploy_add_deps(ort BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS ONNXRUNTIME)
mmdeploy_add_deps(ncnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS ncnn)
mmdeploy_add_deps(openvino BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS InferenceEngine)
mmdeploy_add_deps(pplnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS pplnn)
if (NOT MMDEPLOY_SHARED_LIBS)
mmdeploy_add_deps(pplnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS pplnn)
endif ()

include(CMakePackageConfigHelpers)
# generate the config file that is includes the exports
Expand Down
4 changes: 2 additions & 2 deletions csrc/mmdeploy/net/ppl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ find_package(pplnn REQUIRED)
mmdeploy_add_module(${PROJECT_NAME} ppl_net.cpp)
target_include_directories(${PROJECT_NAME} PUBLIC
$<BUILD_INTERFACE:${PPLNN_INCLUDE_DIRS}>)
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
if (PPLNN_USE_X86 AND ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES))
target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_X86=1)
endif ()
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
if (PPLNN_USE_CUDA AND ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES))
target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_CUDA=1)
target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include)
target_link_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
Expand Down
66 changes: 48 additions & 18 deletions csrc/mmdeploy/net/ppl/ppl_net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/utils/formatter.h"
#include "ppl/nn/common/logger.h"
#include "ppl/nn/models/onnx/onnx_runtime_builder_factory.h"
#include "ppl/nn/models/onnx/runtime_builder_factory.h"
#if PPL_NN_HAS_X86
#include "ppl/nn/engines/x86/engine_factory.h"
#include "ppl/nn/engines/x86/x86_options.h"
#include "ppl/nn/engines/x86/engine_options.h"
#include "ppl/nn/engines/x86/ops.h"
#endif
#if PPL_NN_HAS_CUDA
#include "ppl/nn/engines/cuda/cuda_options.h"
#include "ppl/nn/engines/cuda/engine_factory.h"
#include "ppl/nn/engines/cuda/engine_options.h"
#include "ppl/nn/engines/cuda/ops.h"
#define PPL_CUDA_IMPORT_FROM_BUFFER 0
#endif

namespace mmdeploy {
Expand All @@ -35,7 +38,7 @@ Result<std::unique_ptr<T>> ppl_try(T* v) {
}

Tensor PPLNet::CreateInternalTensor(ppl::nn::Tensor* src, Device device) {
auto desc = src->GetShape();
const auto& desc = *src->GetShape();
auto name = src->GetName();
std::vector<int64_t> shape{desc.GetDims(), desc.GetDims() + desc.GetDimCount()};
if (std::any_of(begin(shape), end(shape), [](auto x) { return x <= 0; })) {
Expand All @@ -56,15 +59,37 @@ Result<void> PPLNet::Init(const Value& args) {

#if PPL_NN_HAS_CUDA
if (device_.is_device()) {
engines_.emplace_back(ppl::nn::CudaEngineFactory::Create({}));
// Use default algorithms until PPL can set algorithms from a memory buffer
// since the optimization process is really slow
engines_.back()->Configure(ppl::nn::CUDA_CONF_USE_DEFAULT_ALGORITHMS, true);
ppl::nn::cuda::RegisterBuiltinOpImpls();
ppl::nn::cuda::EngineOptions options{};
options.device_id = device_.device_id();
options.mm_policy = ppl::nn::cuda::MM_BEST_FIT;
engines_.emplace_back(ppl::nn::cuda::EngineFactory::Create(options));

bool import_algo = false;

#if PPL_CUDA_IMPORT_FROM_BUFFER
auto algo = model.ReadFile(config.weights);
if (algo) {
auto ret =
engines_.back()->Configure(ppl::nn::cuda::ENGINE_CONF_IMPORT_ALGORITHMS_FROM_BUFFER,
algo.value().c_str(), algo.value().size());
if (ret == ppl::common::RC_SUCCESS) {
import_algo = true;
} else {
MMDEPLOY_ERROR("failed to import algorithms ({}), default algorithms will be used", ret);
}
}
#endif

if (!import_algo) {
engines_.back()->Configure(ppl::nn::cuda::ENGINE_CONF_USE_DEFAULT_ALGORITHMS, true);
}
}
#endif
#if PPL_NN_HAS_X86
if (device_.is_host()) {
engines_.emplace_back(ppl::nn::X86EngineFactory::Create({}));
ppl::nn::x86::RegisterBuiltinOpImpls();
engines_.emplace_back(ppl::nn::x86::EngineFactory::Create({}));
}
#endif

Expand All @@ -73,8 +98,14 @@ Result<void> PPLNet::Init(const Value& args) {
engines.push_back(engine.get());
}

OUTCOME_TRY(auto builder, ppl_try(ppl::nn::OnnxRuntimeBuilderFactory::Create(
onnx.data(), onnx.size(), engines.data(), engines.size())));
OUTCOME_TRY(auto builder, ppl_try(ppl::nn::onnx::RuntimeBuilderFactory::Create()));
OUTCOME_TRY(ppl_try(builder->LoadModel(onnx.data(), onnx.size(), nullptr)));

ppl::nn::onnx::RuntimeBuilder::Resources resources{};
resources.engines = engines.data();
resources.engine_num = engines.size();
OUTCOME_TRY(ppl_try(builder->SetResources(resources)));
OUTCOME_TRY(ppl_try(builder->Preprocess()));

OUTCOME_TRY(auto runtime, ppl_try(builder->CreateRuntime()));

Expand All @@ -84,7 +115,7 @@ Result<void> PPLNet::Init(const Value& args) {
inputs_external_.push_back(CreateInternalTensor(src, device_));

/// debug only
auto& desc = inputs_internal_[i]->GetShape();
const auto& desc = *inputs_internal_[i]->GetShape();
std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
MMDEPLOY_DEBUG("input {}: datatype = {}, dataformat = {}, shape = {}", i,
ppl::common::GetDataTypeStr(desc.GetDataType()),
Expand All @@ -96,7 +127,7 @@ Result<void> PPLNet::Init(const Value& args) {
outputs_internal_.push_back(src);
outputs_external_.push_back(CreateInternalTensor(src, device_));

auto desc = outputs_internal_[i]->GetShape();
const auto& desc = *outputs_internal_[i]->GetShape();
std::vector<long> shape_(desc.GetDims(), desc.GetDims() + desc.GetDimCount());
MMDEPLOY_DEBUG("output {}: datatype = {}, dataformat = {}, shape = {}", i,
ppl::common::GetDataTypeStr(desc.GetDataType()),
Expand Down Expand Up @@ -128,7 +159,7 @@ Result<void> PPLNet::Deinit() {
}

static TensorShape GetShape(const PPLTensor& tensor) {
auto& desc = tensor.GetShape();
const auto& desc = *tensor.GetShape();
return {desc.GetDims(), desc.GetDims() + desc.GetDimCount()};
}

Expand Down Expand Up @@ -170,18 +201,17 @@ Result<void> PPLNet::Forward() {
OUTCOME_TRY(stream_.Wait());

OUTCOME_TRY(ppl_try(runtime_->Run()));
OUTCOME_TRY(ppl_try(runtime_->Sync()));

for (int i = 0; i < outputs_external_.size(); ++i) {
auto& internal = *outputs_internal_[i];
auto format = internal.GetShape().GetDataFormat();
auto format = internal.GetShape()->GetDataFormat();
if (format != ppl::common::DATAFORMAT_NDARRAY) {
MMDEPLOY_ERROR("output {}'s format is {}, only NDARRAY is currently supported", i,
ppl::common::GetDataFormatStr(format));
return Status(eNotSupported);
}
auto& external = outputs_external_[i];
auto dtype_int = internal.GetShape().GetDataType();
auto dtype_int = internal.GetShape()->GetDataType();
OUTCOME_TRY(auto dtype_ext, GetPPLDataType(external.data_type()));
auto shape_int = GetShape(internal);
auto shape_ext = external.shape();
Expand Down Expand Up @@ -213,7 +243,7 @@ Result<void> PPLNet::Forward() {
Result<void> PPLNet::ForwardAsync(Event* event) { return Status(eNotSupported); }

Result<void> ReshapeLike(PPLTensor& dst, Tensor& src) {
auto& dst_desc = dst.GetShape();
auto& dst_desc = *dst.GetShape();
auto& src_desc = src.desc();
OUTCOME_TRY(auto data_type, GetPPLDataType(src_desc.data_type));
dst_desc.SetDataType(data_type);
Expand Down
10 changes: 1 addition & 9 deletions csrc/mmdeploy/preprocess/cuda/pad_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,10 @@ namespace cuda {
class PadImpl : public ::mmdeploy::PadImpl {
public:
explicit PadImpl(const Value& args) : ::mmdeploy::PadImpl(args) {
#if PPLCV_VERSION_MAJOR >= 0 && PPLCV_VERSION_MINOR >= 6 && PPLCV_VERSION_PATCH >= 2
map<string, ppl::cv::BorderType> border_map{{"constant", ppl::cv::BORDER_CONSTANT},
{"edge", ppl::cv::BORDER_REPLICATE},
{"reflect", ppl::cv::BORDER_REFLECT_101},
{ "symmetric",
ppl::cv::BORDER_REFLECT }};
#else
map<string, ppl::cv::BorderType> border_map{{"constant", ppl::cv::BORDER_TYPE_CONSTANT},
{"edge", ppl::cv::BORDER_TYPE_REPLICATE},
{"reflect", ppl::cv::BORDER_TYPE_REFLECT_101},
{"symmetric", ppl::cv::BORDER_TYPE_REFLECT}};
#endif
{"symmetric", ppl::cv::BORDER_REFLECT}};
if (border_map.find(arg_.padding_mode) == border_map.end()) {
MMDEPLOY_ERROR("unsupported padding_mode '{}'", arg_.padding_mode);
throw_exception(eNotSupported);
Expand Down
11 changes: 0 additions & 11 deletions csrc/mmdeploy/preprocess/cuda/resize_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
private:
template <class T, int C, class... Args>
ppl::common::RetCode DispatchImpl(Args&&... args) {
#if PPLCV_VERSION_MAJOR >= 0 && PPLCV_VERSION_MINOR >= 6 && PPLCV_VERSION_PATCH >= 2
if (arg_.interpolation == "bilinear") {
return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
ppl::cv::INTERPOLATION_LINEAR);
Expand All @@ -54,16 +53,6 @@ class ResizeImpl final : public ::mmdeploy::ResizeImpl {
return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
ppl::cv::INTERPOLATION_NEAREST_POINT);
}
#else
if (arg_.interpolation == "bilinear") {
return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
ppl::cv::INTERPOLATION_TYPE_LINEAR);
}
if (arg_.interpolation == "nearest") {
return ppl::cv::cuda::Resize<T, C>(std::forward<Args>(args)...,
ppl::cv::INTERPOLATION_TYPE_NEAREST_POINT);
}
#endif
return ppl::common::RC_UNSUPPORTED;
}

Expand Down
2 changes: 1 addition & 1 deletion docker/GPU/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ARG TORCH_VERSION=1.8.0
ARG TORCHVISION_VERSION=0.9.0
ARG ONNXRUNTIME_VERSION=1.8.1
ARG MMCV_VERSION=1.4.0
ARG PPLCV_VERSION=0.6.2
ARG PPLCV_VERSION=0.7.0
ENV FORCE_CUDA="1"

ENV DEBIAN_FRONTEND=noninteractive
Expand Down
5 changes: 2 additions & 3 deletions docs/en/01-how-to-build/linux-x86_64.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,12 @@ sudo apt-get install libopencv-dev
<tr>
<td>pplcv </td>
<td>A high-performance image processing library of openPPL.<br>
<b>It is optional which only be needed if <code>cuda</code> platform is required.
Now, MMDeploy supports v0.6.2 and has to use <code>git clone</code> to download it.</b><br>
<b>It is optional which only be needed if <code>cuda</code> platform is required.</b><br>
<pre><code>
git clone https://github.com/openppl-public/ppl.cv.git
cd ppl.cv
export PPLCV_DIR=$(pwd)
git checkout tags/v0.6.2 -b v0.6.2
git checkout tags/v0.7.0 -b v0.7.0
./build.sh cuda
</code></pre>
</td>
Expand Down
7 changes: 3 additions & 4 deletions docs/en/01-how-to-build/windows.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,15 @@ You can skip this chapter if you are only interested in the model converter.
<tr>
<td>pplcv </td>
<td>A high-performance image processing library of openPPL.<br>
<b>It is optional which only be needed if <code>cuda</code> platform is required.
Now, MMDeploy supports v0.6.2 and has to use <code>git clone</code> to download it.</b><br>
<b>It is optional which only be needed if <code>cuda</code> platform is required.</b><br>
<pre><code>
git clone https://github.com/openppl-public/ppl.cv.git
cd ppl.cv
git checkout tags/v0.6.2 -b v0.6.2
git checkout tags/v0.7.0 -b v0.7.0
$env:PPLCV_DIR = "$pwd"
mkdir pplcv-build
cd pplcv-build
cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DHPCC_USE_CUDA=ON -DHPCC_MSVC_MD=ON
cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DHPCC_USE_CUDA=ON -DPPLCV_USE_MSVC_STATIC_RUNTIME=OFF
cmake --build . --config Release -- /m
cmake --install . --config Release
cd ../..
Expand Down
2 changes: 1 addition & 1 deletion docs/en/05-supported-backends/pplnn.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# PPLNN Support

This tutorial is based on Linux systems like Ubuntu-18.04.
MMDeploy supports ppl.nn v0.8.1 and later. This tutorial is based on Linux systems like Ubuntu-18.04.

## Installation

Expand Down
4 changes: 2 additions & 2 deletions docs/zh_cn/01-how-to-build/linux-x86_64.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ sudo apt-get install libopencv-dev
</tr>
<tr>
<td>pplcv </td>
<td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项,只有在 cuda 平台下,才需安装。而且,目前必须使用 v0.6.2,且需要使用 git clone 的方式下载源码并编译安装</b><br>
<td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项,只有在 cuda 平台下,才需安装。</b><br>
<pre><code>
git clone https://github.com/openppl-public/ppl.cv.git
cd ppl.cv
export PPLCV_DIR=$(pwd)
git checkout tags/v0.6.2 -b v0.6.2
git checkout tags/v0.7.0 -b v0.7.0
./build.sh cuda
</code></pre>
</td>
Expand Down
6 changes: 3 additions & 3 deletions docs/zh_cn/01-how-to-build/windows.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,15 @@ pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/$env:cu
</tr>
<tr>
<td>pplcv </td>
<td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项,只有在 cuda 平台下,才需安装。而且,目前必须使用 v0.6.2,且需要使用 git clone 的方式下载源码并编译安装</b><br>
<td>pplcv 是 openPPL 开发的高性能图像处理库。 <b>此依赖项为可选项,只有在 cuda 平台下,才需安装。</b><br>
<pre><code>
git clone https://github.com/openppl-public/ppl.cv.git
cd ppl.cv
git checkout tags/v0.6.2 -b v0.6.2
git checkout tags/v0.7.0 -b v0.7.0
$env:PPLCV_DIR = "$pwd"
mkdir pplcv-build
cd pplcv-build
cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DHPCC_USE_CUDA=ON -DHPCC_MSVC_MD=ON
cmake .. -G "Visual Studio 16 2019" -T v142 -A x64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install -DPPLCV_USE_CUDA=ON -DPPLCV_USE_MSVC_STATIC_RUNTIME=OFF
cmake --build . --config Release -- /m
cmake --install . --config Release
cd ../..
Expand Down
9 changes: 2 additions & 7 deletions mmdeploy/backend/pplnn/onnx2pplnn.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Sequence

from pyppl import nn as pplnn

from mmdeploy.utils.device import parse_cuda_device_id
from .utils import register_engines
from .utils import create_runtime, register_engines


def from_onnx(onnx_model: str,
Expand Down Expand Up @@ -52,10 +50,7 @@ def from_onnx(onnx_model: str,
quick_select=False,
export_algo_file=algo_file,
input_shapes=input_shapes)
runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile(
onnx_model, engines)
assert runtime_builder is not None, 'Failed to create '\
'OnnxRuntimeBuilder.'
_ = create_runtime(onnx_model, engines) # side effect: export algorithms
import shutil
if onnx_output_path != onnx_model:
shutil.copy2(onnx_model, onnx_output_path)
Loading