open-mmlab · ZwwWayne · Aug 10, 2021 · Jul 19, 2021 · Jul 19, 2021 · Jul 19, 2021
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,5 +1,5 @@
 include requirements/runtime.txt
 include mmcv/model_zoo/open_mmlab.json mmcv/model_zoo/deprecated.json mmcv/model_zoo/mmcls.json
-include mmcv/ops/csrc/*.cuh mmcv/ops/csrc/*.hpp
-include mmcv/ops/csrc/pytorch/*.cu mmcv/ops/csrc/pytorch/*.cpp
-include mmcv/ops/csrc/parrots/*.cu mmcv/ops/csrc/parrots/*.cpp
+include mmcv/ops/csrc/common/cuda/*.cuh mmcv/ops/csrc/common/cuda/*.hpp mmcv/ops/csrc/common/*.hpp
+include mmcv/ops/csrc/pytorch/*.cpp mmcv/ops/csrc/pytorch/cuda/*.cu
+include mmcv/ops/csrc/parrots/*.h mmcv/ops/csrc/parrots/*.cpp
diff --git a/docs/compatibility.md b/docs/compatibility.md
@@ -0,0 +1,5 @@
+## Compatibility of MMCV
+
+### MMCV v1.3.10
+
+In order to flexibly support more backends and hardwares like `NVIDIA GPUs` and `AMD GPUs`, the directory of `mmcv/ops/csrc` is refactored. Note that this refactoring will not affect the usage in API. For related information, please refer to [PR1206](https://github.com/open-mmlab/mmcv/pull/1206).
diff --git a/docs/index.rst b/docs/index.rst
@@ -10,6 +10,7 @@ You can switch between Chinese and English documents in the lower-left corner of
    deployment.rst
    understand_mmcv.rst
    api.rst
+   compatibility.md
    faq.md
    community.rst
 

diff --git a/docs_zh_CN/compatibility.md b/docs_zh_CN/compatibility.md
@@ -0,0 +1,5 @@
+## MMCV 兼容性说明
+
+### MMCV v1.3.10
+
+为了灵活地支持更多的后端和硬件，例如 `NVIDIA GPUs` 、`AMD GPUs`，我们重构了 `mmcv/ops/csrc` 目录。注意，这次重构不会影响 API 的使用。更多相关信息，请参考 [PR1206](https://github.com/open-mmlab/mmcv/pull/1206)。
diff --git a/docs_zh_CN/index.rst b/docs_zh_CN/index.rst
@@ -10,6 +10,7 @@
    deployment.rst
    understand_mmcv.rst
    api.rst
+   compatibility.md
    faq.md
    community.rst
 

diff --git a/mmcv/ops/csrc/README.md b/mmcv/ops/csrc/README.md
@@ -0,0 +1,169 @@
+# Csrc Structure
+
+This folder contains all non-python code for MMCV custom ops. Please follow the same architecture if you want to add new ops.
+
+## Directories Tree
+
+```folder
+.
+├── common
+│   ├── box_iou_rotated_utils.hpp
+│   ├── parrots_cpp_helper.hpp
+│   ├── parrots_cuda_helper.hpp
+│   ├── pytorch_cpp_helper.hpp
+│   ├── pytorch_cuda_helper.hpp
+│   └── cuda
+│       ├── common_cuda_helper.hpp
+│       ├── parrots_cudawarpfunction.cuh
+│       ├── ...
+│       └── ops_cuda_kernel.cuh
+├── onnxruntime
+│   ├── onnxruntime_register.h
+│   ├── onnxruntime_session_options_config_keys.h
+│   ├── ort_mmcv_utils.h
+│   ├── ...
+│   ├── onnx_ops.h
+│   └── cpu
+│       ├── onnxruntime_register.cpp
+│       ├── ...
+│       └── onnx_ops_impl.cpp
+├── parrots
+│   ├── ...
+│   ├── ops.cpp
+│   ├── ops_parrots.cpp
+│   └── ops_pytorch.h
+├── pytorch
+│   ├── info.cpp
+│   ├── pybind.cpp
+│   ├── ...
+│   ├── ops.cpp
+│   └── cuda
+│       ├── ...
+│       └── ops_cuda.cu
+└── tensorrt
+    ├── trt_cuda_helper.cuh
+    ├── trt_plugin_helper.hpp
+    ├── trt_plugin.hpp
+    ├── trt_serialize.hpp
+    ├── ...
+    ├── trt_ops.hpp
+    └── plugins
+        ├── trt_cuda_helper.cu
+        ├── trt_plugin.cpp
+        ├── ...
+        ├── trt_ops.cpp
+        └── trt_ops_kernel.cu
+```
+
+## Components
+
+- `common`: This directory contains all tools and shared codes.
+  - `cuda`: The cuda kernels which can be shared by all backends. **HIP** kernel is also here since they have similar syntax.
+- `onnxruntime`: **ONNX Runtime** support for custom ops.
+  - `cpu`: CPU implementation of supported ops.
+- `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory.
+- `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory.
+  - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops.
+- `tensorrt`: **TensorRT** support for custom ops.
+  - `plugins`: This directory contains the implementation of the supported custom ops. Some ops might also use shared cuda kernel in `common/cuda`.
+
+## How to add new PyTorch ops?
+
+1. (Optional) Add shared kernel in `common` to support special hardware platform.
+
+    ```c++
+    // src/common/cuda/new_ops_cuda_kernel.cuh
+
+    template <typename T>
+    __global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) {
+        // forward here
+    }
+
+    ```
+
+    Add cuda kernel launcher in `pytorch/cuda`.
+
+    ```c++
+    // src/pytorch/cuda
+    #include <new_ops_cuda_kernel.cuh>
+
+    void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){
+        // initialize
+        at::cuda::CUDAGuard device_guard(input.device());
+        cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+        ...
+        AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+            input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] {
+                new_ops_forward_cuda_kernel<scalar_t>
+                    <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                        input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),...);
+            }));
+        AT_CUDA_CHECK(cudaGetLastError());
+    }
+    ```
+
+2. Add ops implementation in `pytorch` directory. Select different implementations according to device type.
+
+    ```c++
+    // src/pytorch/new_ops.cpp
+    #ifdef MMCV_WITH_CUDA
+    Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){
+        // implement cuda forward here
+        // use `NewOpsForwardCUDAKernelLauncher` here
+    }
+    #else
+
+    Tensor new_ops_forward_cpu(Tensor input, Tensor output, ...){
+        // implement cpu forward here
+    }
+
+    ...
+
+    Tensor new_ops_forward(Tensor input, Tensor output, ...){
+        // select implementation by input device type
+        if (boxes.device().is_cuda()) {
+        #ifdef MMCV_WITH_CUDA
+            CHECK_CUDA_INPUT(input);
+            CHECK_CUDA_INPUT(output);
+            return new_ops_forward_cuda(input, output, ...);
+        #else
+            AT_ERROR("new ops is not compiled with GPU support");
+        #endif
+        } else {
+            CHECK_CPU_INPUT(input);
+            CHECK_CPU_INPUT(output);
+            return new_ops_forward_cpu(input, output, ...);
+        }
+    }
+    ```
+
+3. Binding the implementation in `pytorch/pybind.cpp`
+
+    ```c++
+    // src/pytorch/pybind.cpp
+
+    ...
+
+    Tensor new_ops_forward(Tensor input, Tensor output, ...);
+
+    ...
+
+    // bind with pybind11
+    m.def("new_ops_forward", &new_ops_forward, "new_ops_forward",
+            py::arg("input"), py::arg("output"), ...);
+
+    ...
+
+    ```
+
+4. Build MMCV again. Enjoy new ops in python
+
+    ```python
+    from ..utils import ext_loader
+    ext_module = ext_loader.load_ext('_ext', ['new_ops_forward'])
+
+    ...
+
+    ext_module.new_ops_forward(input, output, ...)
+
+    ```
diff --git a/mmcv/ops/csrc/box_iou_rotated_utils.hpp → ...ops/csrc/common/box_iou_rotated_utils.hpp b/mmcv/ops/csrc/box_iou_rotated_utils.hpp → ...ops/csrc/common/box_iou_rotated_utils.hpp
diff --git a/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh → ...common/cuda/bbox_overlaps_cuda_kernel.cuh b/mmcv/ops/csrc/bbox_overlaps_cuda_kernel.cuh → ...common/cuda/bbox_overlaps_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/border_align_cuda_kernel.cuh → .../common/cuda/border_align_cuda_kernel.cuh b/mmcv/ops/csrc/border_align_cuda_kernel.cuh → .../common/cuda/border_align_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/box_iou_rotated_cuda.cuh → ...csrc/common/cuda/box_iou_rotated_cuda.cuh b/mmcv/ops/csrc/box_iou_rotated_cuda.cuh → ...csrc/common/cuda/box_iou_rotated_cuda.cuh
diff --git a/mmcv/ops/csrc/carafe_cuda_kernel.cuh → ...s/csrc/common/cuda/carafe_cuda_kernel.cuh b/mmcv/ops/csrc/carafe_cuda_kernel.cuh → ...s/csrc/common/cuda/carafe_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh → .../common/cuda/carafe_naive_cuda_kernel.cuh b/mmcv/ops/csrc/carafe_naive_cuda_kernel.cuh → .../common/cuda/carafe_naive_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh → .../common/cuda/cc_attention_cuda_kernel.cuh b/mmcv/ops/csrc/cc_attention_cuda_kernel.cuh → .../common/cuda/cc_attention_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/common_cuda_helper.hpp → ...s/csrc/common/cuda/common_cuda_helper.hpp b/mmcv/ops/csrc/common_cuda_helper.hpp → ...s/csrc/common/cuda/common_cuda_helper.hpp
diff --git a/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh → ...c/common/cuda/deform_conv_cuda_kernel.cuh b/mmcv/ops/csrc/deform_conv_cuda_kernel.cuh → ...c/common/cuda/deform_conv_cuda_kernel.cuh
diff --git a/.../ops/csrc/deform_roi_pool_cuda_kernel.cuh → ...mmon/cuda/deform_roi_pool_cuda_kernel.cuh b/.../ops/csrc/deform_roi_pool_cuda_kernel.cuh → ...mmon/cuda/deform_roi_pool_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh → ...common/cuda/masked_conv2d_cuda_kernel.cuh b/mmcv/ops/csrc/masked_conv2d_cuda_kernel.cuh → ...common/cuda/masked_conv2d_cuda_kernel.cuh
diff --git a/...src/modulated_deform_conv_cuda_kernel.cuh → ...uda/modulated_deform_conv_cuda_kernel.cuh b/...src/modulated_deform_conv_cuda_kernel.cuh → ...uda/modulated_deform_conv_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/ms_deform_attn_cuda_kernel.cuh → ...ommon/cuda/ms_deform_attn_cuda_kernel.cuh b/mmcv/ops/csrc/ms_deform_attn_cuda_kernel.cuh → ...ommon/cuda/ms_deform_attn_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/nms_cuda_kernel.cuh → .../ops/csrc/common/cuda/nms_cuda_kernel.cuh b/mmcv/ops/csrc/nms_cuda_kernel.cuh → .../ops/csrc/common/cuda/nms_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/nms_rotated_cuda.cuh → ...ops/csrc/common/cuda/nms_rotated_cuda.cuh b/mmcv/ops/csrc/nms_rotated_cuda.cuh → ...ops/csrc/common/cuda/nms_rotated_cuda.cuh
diff --git a/mmcv/ops/csrc/parrots_cudawarpfunction.cuh → .../common/cuda/parrots_cudawarpfunction.cuh b/mmcv/ops/csrc/parrots_cudawarpfunction.cuh → .../common/cuda/parrots_cudawarpfunction.cuh
diff --git a/mmcv/ops/csrc/psamask_cuda_kernel.cuh → .../csrc/common/cuda/psamask_cuda_kernel.cuh b/mmcv/ops/csrc/psamask_cuda_kernel.cuh → .../csrc/common/cuda/psamask_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/roi_align_cuda_kernel.cuh → ...src/common/cuda/roi_align_cuda_kernel.cuh b/mmcv/ops/csrc/roi_align_cuda_kernel.cuh → ...src/common/cuda/roi_align_cuda_kernel.cuh
diff --git a/...ps/csrc/roi_align_rotated_cuda_kernel.cuh → ...on/cuda/roi_align_rotated_cuda_kernel.cuh b/...ps/csrc/roi_align_rotated_cuda_kernel.cuh → ...on/cuda/roi_align_rotated_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh → ...csrc/common/cuda/roi_pool_cuda_kernel.cuh b/mmcv/ops/csrc/roi_pool_cuda_kernel.cuh → ...csrc/common/cuda/roi_pool_cuda_kernel.cuh
diff --git a/...s/csrc/sigmoid_focal_loss_cuda_kernel.cuh → ...n/cuda/sigmoid_focal_loss_cuda_kernel.cuh b/...s/csrc/sigmoid_focal_loss_cuda_kernel.cuh → ...n/cuda/sigmoid_focal_loss_cuda_kernel.cuh
diff --git a/...s/csrc/softmax_focal_loss_cuda_kernel.cuh → ...n/cuda/softmax_focal_loss_cuda_kernel.cuh b/...s/csrc/softmax_focal_loss_cuda_kernel.cuh → ...n/cuda/softmax_focal_loss_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh → .../csrc/common/cuda/sync_bn_cuda_kernel.cuh b/mmcv/ops/csrc/sync_bn_cuda_kernel.cuh → .../csrc/common/cuda/sync_bn_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh → ...src/common/cuda/tin_shift_cuda_kernel.cuh b/mmcv/ops/csrc/tin_shift_cuda_kernel.cuh → ...src/common/cuda/tin_shift_cuda_kernel.cuh
diff --git a/mmcv/ops/csrc/parrots_cpp_helper.hpp → mmcv/ops/csrc/common/parrots_cpp_helper.hpp b/mmcv/ops/csrc/parrots_cpp_helper.hpp → mmcv/ops/csrc/common/parrots_cpp_helper.hpp
diff --git a/mmcv/ops/csrc/parrots_cuda_helper.hpp → mmcv/ops/csrc/common/parrots_cuda_helper.hpp b/mmcv/ops/csrc/parrots_cuda_helper.hpp → mmcv/ops/csrc/common/parrots_cuda_helper.hpp
diff --git a/mmcv/ops/csrc/pytorch_cpp_helper.hpp → mmcv/ops/csrc/common/pytorch_cpp_helper.hpp b/mmcv/ops/csrc/pytorch_cpp_helper.hpp → mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
diff --git a/mmcv/ops/csrc/pytorch_cuda_helper.hpp → mmcv/ops/csrc/common/pytorch_cuda_helper.hpp b/mmcv/ops/csrc/pytorch_cuda_helper.hpp → mmcv/ops/csrc/common/pytorch_cuda_helper.hpp