open-mmlab · RunningLeon · Jun 8, 2022 · Apr 2, 2022 · Apr 6, 2022 · Apr 7, 2022
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
     rev: v2.1.0
     hooks:
       - id: codespell
-        args: ["--skip=third_party/*,*.proto"]
+        args: ["--skip=third_party/*,*.ipynb,*.proto"]
 
   - repo: https://github.com/myint/docformatter
     rev: v1.4

diff --git a/csrc/backend_ops/tensorrt/common_impl/nms/allClassNMS.cu b/csrc/backend_ops/tensorrt/common_impl/nms/allClassNMS.cu
@@ -205,6 +205,18 @@ pluginStatus_t allClassNMS_gpu(cudaStream_t stream, const int num, const int num
       (T_BBOX *)bbox_data, (T_SCORE *)beforeNMS_scores, (int *)beforeNMS_index_array,
       (T_SCORE *)afterNMS_scores, (int *)afterNMS_index_array, flipXY);
 
+  cudaError_t code = cudaGetLastError();
+  if (code != cudaSuccess) {
+    // Verify if cuda dev0 requires top_k to be reduced;
+    // sm_53 (Jetson Nano) and sm_62 (Jetson TX2) requires reduced top_k < 1000
+    auto __cuda_arch__ = get_cuda_arch(0);
+    if ((__cuda_arch__ == 530 || __cuda_arch__ == 620) && top_k >= 1000) {
+      printf(
+          "Warning: pre_top_k need to be reduced for devices with arch 5.3, 6.2, got "
+          "pre_top_k=%d\n",
+          top_k);
+    }
+  }
   CSC(cudaGetLastError(), STATUS_FAILURE);
   return STATUS_SUCCESS;
 }
@@ -243,13 +255,7 @@ pluginStatus_t allClassNMS(cudaStream_t stream, const int num, const int num_cla
                            const bool isNormalized, const DataType DT_SCORE, const DataType DT_BBOX,
                            void *bbox_data, void *beforeNMS_scores, void *beforeNMS_index_array,
                            void *afterNMS_scores, void *afterNMS_index_array, bool flipXY) {
-  auto __cuda_arch__ = get_cuda_arch(0);  // assume there is only one arch 7.2 device
-  if (__cuda_arch__ == 720 && top_k >= 1000) {
-    printf("Warning: pre_top_k need to be reduced for devices with arch 7.2, got pre_top_k=%d\n",
-           top_k);
-  }
   nmsLaunchConfigSSD lc(DT_SCORE, DT_BBOX);
-
   for (unsigned i = 0; i < nmsFuncVec.size(); ++i) {
     if (lc == nmsFuncVec[i]) {
       DEBUG_PRINTF("all class nms kernel %d\n", i);

diff --git a/demo/tutorials/tutorials_1.ipynb b/demo/tutorials/tutorials_1.ipynb
diff --git a/docker/GPU/Dockerfile b/docker/GPU/Dockerfile
@@ -82,9 +82,10 @@ RUN cd /root/workspace/mmdeploy &&\
         -DCMAKE_CXX_COMPILER=g++ \
         -Dpplcv_DIR=/root/workspace/ppl.cv/cuda-build/install/lib/cmake/ppl \
         -DTENSORRT_DIR=${TENSORRT_DIR} \
+        -DONNXRUNTIME_DIR=${ONNXRUNTIME_DIR} \
         -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON \
         -DMMDEPLOY_TARGET_DEVICES="cuda;cpu" \
-        -DMMDEPLOY_TARGET_BACKENDS="trt" \
+        -DMMDEPLOY_TARGET_BACKENDS="ort;trt" \
         -DMMDEPLOY_CODEBASES=all &&\
     make -j$(nproc) && make install &&\
     cd install/example  && mkdir -p build && cd build &&\

diff --git a/docs/en/01-how-to-build/android.md b/docs/en/01-how-to-build/android.md
@@ -76,9 +76,9 @@ export OPENCV_ANDROID_SDK_DIR=${PWD}/OpenCV-android-sdk
   <tr>
     <td>ncnn </td>
     <td>A high-performance neural network inference computing framework supporting for android.</br>
-  <b> Now, MMDeploy supports v20211208 and has to use <code>git clone</code> to download it.</b><br>
+  <b> Now, MMDeploy supports v20220216 and has to use <code>git clone</code> to download it.</b><br>
 <pre><code>
-git clone -b 20211208 https://github.com/Tencent/ncnn.git
+git clone -b 20220216 https://github.com/Tencent/ncnn.git
 cd ncnn
 git submodule update --init
 export NCNN_DIR=${PWD}