From b2abf1e3b3b01d63d7f017bf39aaaf31598ad8d2 Mon Sep 17 00:00:00 2001 From: Xi Liu <75658786+xiliu8006@users.noreply.github.com> Date: Mon, 9 Aug 2021 15:55:02 +0800 Subject: [PATCH] [Feature] Add roipooling cuda ops (#843) * [Refactor] Main code modification for coordinate system refactor (#677) * [Enhance] Add script for data update (#774) * Fixed wrong config paths and fixed a bug in test * Fixed metafile * Coord sys refactor (main code) * Update test_waymo_dataset.py * Manually resolve conflict * Removed unused lines and fixed imports * remove coord2box and box2coord * update dir_limit_offset * Some minor improvements * Removed some \s in comments * Revert a change * Change Box3DMode to Coord3DMode where points are converted * Fix points_in_bbox function * Fix Imvoxelnet config * Revert adding a line * Fix rotation bug when batch size is 0 * Keep sign of dir_scores as before * Fix several comments * Add a comment * Fix docstring * Add data update scripts * Fix comments * fix import * add roipooling cuda ops * add roi extractor * add test_roi_extractor unittest * Modify setup.py to install roipooling ops * modify docstring * remove enlarge bbox in roipoint pooling * add_roipooling_ops * modify docstring Co-authored-by: Yezhen Cong <52420115+THU17cyz@users.noreply.github.com> Co-authored-by: THU17cyz --- .../roi_heads/roi_extractors/__init__.py | 6 +- .../single_roipoint_extractor.py | 63 +++++++ mmdet3d/ops/roipoint_pool3d/__init__.py | 3 + .../ops/roipoint_pool3d/roipoint_pool3d.py | 71 ++++++++ .../roipoint_pool3d/src/roipoint_pool3d.cpp | 66 +++++++ .../src/roipoint_pool3d_kernel.cu | 168 ++++++++++++++++++ setup.py | 5 + .../test_heads/test_roi_extractors.py | 27 ++- 8 files changed, 407 insertions(+), 2 deletions(-) create mode 100644 mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py create mode 100644 mmdet3d/ops/roipoint_pool3d/__init__.py create mode 100644 mmdet3d/ops/roipoint_pool3d/roipoint_pool3d.py create mode 100644 mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d.cpp create mode 100644 mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu diff --git a/mmdet3d/models/roi_heads/roi_extractors/__init__.py b/mmdet3d/models/roi_heads/roi_extractors/__init__.py index dc504d220d..24a4991047 100644 --- a/mmdet3d/models/roi_heads/roi_extractors/__init__.py +++ b/mmdet3d/models/roi_heads/roi_extractors/__init__.py @@ -1,4 +1,8 @@ from mmdet.models.roi_heads.roi_extractors import SingleRoIExtractor from .single_roiaware_extractor import Single3DRoIAwareExtractor +from .single_roipoint_extractor import Single3DRoIPointExtractor -__all__ = ['SingleRoIExtractor', 'Single3DRoIAwareExtractor'] +__all__ = [ + 'SingleRoIExtractor', 'Single3DRoIAwareExtractor', + 'Single3DRoIPointExtractor' +] diff --git a/mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py b/mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py new file mode 100644 index 0000000000..b6b9d8be2e --- /dev/null +++ b/mmdet3d/models/roi_heads/roi_extractors/single_roipoint_extractor.py @@ -0,0 +1,63 @@ +import torch +from torch import nn as nn + +from mmdet3d import ops +from mmdet3d.core.bbox.structures import rotation_3d_in_axis +from mmdet.models.builder import ROI_EXTRACTORS + + +@ROI_EXTRACTORS.register_module() +class Single3DRoIPointExtractor(nn.Module): + """Point-wise roi-aware Extractor. + + Extract Point-wise roi features. + + Args: + roi_layer (dict): The config of roi layer. + """ + + def __init__(self, roi_layer=None): + super(Single3DRoIPointExtractor, self).__init__() + self.roi_layer = self.build_roi_layers(roi_layer) + + def build_roi_layers(self, layer_cfg): + """Build roi layers using `layer_cfg`""" + cfg = layer_cfg.copy() + layer_type = cfg.pop('type') + assert hasattr(ops, layer_type) + layer_cls = getattr(ops, layer_type) + roi_layers = layer_cls(**cfg) + return roi_layers + + def forward(self, feats, coordinate, batch_inds, rois): + """Extract point-wise roi features. + + Args: + feats (torch.FloatTensor): Point-wise features with + shape (batch, npoints, channels) for pooling. + coordinate (torch.FloatTensor): Coordinate of each point. + batch_inds (torch.LongTensor): Indicate the batch of each point. + rois (torch.FloatTensor): Roi boxes with batch indices. + + Returns: + torch.FloatTensor: Pooled features + """ + rois = rois[..., 1:] + rois = rois.view(batch_inds, -1, rois.shape[-1]) + with torch.no_grad(): + pooled_roi_feat, pooled_empty_flag = self.roi_layer( + coordinate, feats, rois) + + # canonical transformation + roi_center = rois[:, :, 0:3] + pooled_roi_feat[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2) + pooled_roi_feat = pooled_roi_feat.view(-1, + pooled_roi_feat.shape[-2], + pooled_roi_feat.shape[-1]) + pooled_roi_feat[:, :, 0:3] = rotation_3d_in_axis( + pooled_roi_feat[:, :, 0:3], + -(rois.view(-1, rois.shape[-1])[:, 6]), + axis=2) + pooled_roi_feat[pooled_empty_flag.view(-1) > 0] = 0 + + return pooled_roi_feat diff --git a/mmdet3d/ops/roipoint_pool3d/__init__.py b/mmdet3d/ops/roipoint_pool3d/__init__.py new file mode 100644 index 0000000000..bbd2f9a0bb --- /dev/null +++ b/mmdet3d/ops/roipoint_pool3d/__init__.py @@ -0,0 +1,3 @@ +from .roipoint_pool3d import RoIPointPool3d + +__all__ = ['RoIPointPool3d'] diff --git a/mmdet3d/ops/roipoint_pool3d/roipoint_pool3d.py b/mmdet3d/ops/roipoint_pool3d/roipoint_pool3d.py new file mode 100644 index 0000000000..9ff10c53a0 --- /dev/null +++ b/mmdet3d/ops/roipoint_pool3d/roipoint_pool3d.py @@ -0,0 +1,71 @@ +from torch import nn as nn +from torch.autograd import Function + +from . import roipoint_pool3d_ext + + +class RoIPointPool3d(nn.Module): + + def __init__(self, num_sampled_points=512): + super().__init__() + """ + Args: + num_sampled_points (int): Number of samples in each roi + """ + self.num_sampled_points = num_sampled_points + + def forward(self, points, point_features, boxes3d): + """ + Args: + points (torch.Tensor): Input points whose shape is BxNx3 + point_features: (B, N, C) + boxes3d: (B, M, 7), [x, y, z, dx, dy, dz, heading] + + Returns: + torch.Tensor: (B, M, 512, 3 + C) pooled_features + torch.Tensor: (B, M) pooled_empty_flag + """ + return RoIPointPool3dFunction.apply(points, point_features, boxes3d, + self.num_sampled_points) + + +class RoIPointPool3dFunction(Function): + + @staticmethod + def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, 3) + point_features (torch.Tensor): Input points features shape is \ + (B, N, C) + boxes3d (torch.Tensor): Input bounding boxes whose shape is \ + (B, M, 7) + num_sampled_points (int): the num of sampled points + + Returns: + torch.Tensor: (B, M, 512, 3 + C) pooled_features + torch.Tensor: (B, M) pooled_empty_flag + """ + assert points.shape.__len__() == 3 and points.shape[2] == 3 + batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[ + 1], point_features.shape[2] + pooled_boxes3d = boxes3d.view(batch_size, -1, 7) + pooled_features = point_features.new_zeros( + (batch_size, boxes_num, num_sampled_points, 3 + feature_len)) + pooled_empty_flag = point_features.new_zeros( + (batch_size, boxes_num)).int() + + roipoint_pool3d_ext.forward(points.contiguous(), + pooled_boxes3d.contiguous(), + point_features.contiguous(), + pooled_features, pooled_empty_flag) + + return pooled_features, pooled_empty_flag + + @staticmethod + def backward(ctx, grad_out): + raise NotImplementedError + + +if __name__ == '__main__': + pass diff --git a/mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d.cpp b/mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d.cpp new file mode 100644 index 0000000000..9369b98482 --- /dev/null +++ b/mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d.cpp @@ -0,0 +1,66 @@ +/* +Modified for +https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu +Point cloud feature pooling +Written by Shaoshuai Shi +All Rights Reserved 2018. +*/ +#include +#include + +#define CHECK_CUDA(x) do { \ + if (!x.type().is_cuda()) { \ + fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \ + exit(-1); \ + } \ +} while (0) +#define CHECK_CONTIGUOUS(x) do { \ + if (!x.is_contiguous()) { \ + fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \ + exit(-1); \ + } \ +} while (0) +#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) + + +void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag); + + +int roipool3d_gpu(at::Tensor xyz, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_features, at::Tensor pooled_empty_flag){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + CHECK_INPUT(xyz); + CHECK_INPUT(boxes3d); + CHECK_INPUT(pts_feature); + CHECK_INPUT(pooled_features); + CHECK_INPUT(pooled_empty_flag); + + int batch_size = xyz.size(0); + int pts_num = xyz.size(1); + int boxes_num = boxes3d.size(1); + int feature_in_len = pts_feature.size(2); + int sampled_pts_num = pooled_features.size(2); + + + const float * xyz_data = xyz.data(); + const float * boxes3d_data = boxes3d.data(); + const float * pts_feature_data = pts_feature.data(); + float * pooled_features_data = pooled_features.data(); + int * pooled_empty_flag_data = pooled_empty_flag.data(); + + roipool3dLauncher(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz_data, boxes3d_data, pts_feature_data, pooled_features_data, pooled_empty_flag_data); + + + + return 1; +} + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &roipool3d_gpu, "roipool3d forward (CUDA)"); +} diff --git a/mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu b/mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu new file mode 100644 index 0000000000..a63a4c7ec4 --- /dev/null +++ b/mmdet3d/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu @@ -0,0 +1,168 @@ +/* +Modified from +https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu +Point cloud feature pooling +Written by Shaoshuai Shi +All Rights Reserved 2018. +*/ + +#include +#include + +#define THREADS_PER_BLOCK 256 +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) +// #define DEBUG + +__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, + float rz, float &local_x, + float &local_y) { + float cosa = cos(-rz), sina = sin(-rz); + local_x = shift_x * cosa + shift_y * (-sina); + local_y = shift_x * sina + shift_y * cosa; +} + +__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, + float &local_x, float &local_y) { + // param pt: (x, y, z) + // param box3d: (cx, cy, cz, dx, dy, dz, rz) in LiDAR coordinate, cz in the + // bottom center + float x = pt[0], y = pt[1], z = pt[2]; + float cx = box3d[0], cy = box3d[1], cz = box3d[2]; + float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; + cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center + + if (fabsf(z - cz) > dz / 2.0) return 0; + lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); + float in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & + (local_y > -dy / 2.0) & (local_y < dy / 2.0); + return in_flag; +} + +__global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num, const float *xyz, const float *boxes3d, int *pts_assign){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means background points + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + int box_idx = blockIdx.y; + int bs_idx = blockIdx.z; + + if (pt_idx >= pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ + return; + } + int assign_idx = bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx; + pts_assign[assign_idx] = 0; + + int box_offset = bs_idx * boxes_num * 7 + box_idx * 7; + int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3; + + + float local_x = 0, local_y = 0; + int cur_in_flag = check_pt_in_box3d(xyz + pt_offset, boxes3d + box_offset, local_x, local_y); + pts_assign[assign_idx] = cur_in_flag; + // printf("bs=%d, pt=%d, in=%d\n", bs_idx, pt_idx, pts_assign[bs_idx * pts_num + pt_idx]); +} + + +__global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, int sampled_pts_num, + const int *pts_assign, int *pts_idx, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params pts_feature: (B, N, C) + // params pts_assign: (B, N) + // params pts_idx: (B, M, 512) + // params pooled_empty_flag: (B, M) + + int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (boxes_idx >= boxes_num){ + return; + } + + int bs_idx = blockIdx.y; + + int cnt = 0; + for (int k = 0; k < pts_num; k++){ + if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + boxes_idx]){ + if (cnt < sampled_pts_num){ + pts_idx[bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num + cnt] = k; + cnt++; + } + else break; + } + } + + if (cnt == 0){ + pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1; + } + else if (cnt < sampled_pts_num){ + // duplicate same points for sampling + for (int k = cnt; k < sampled_pts_num; k++){ + int duplicate_idx = k % cnt; + int base_offset = bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num; + pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx]; + } + } +} + + +__global__ void roipool3d_forward(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const int *pts_idx, const float *pts_feature, + float *pooled_features, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params pts_idx: (B, M, 512) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + + int sample_pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + int box_idx = blockIdx.y; + int bs_idx = blockIdx.z; + + if (sample_pt_idx >= sampled_pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ + return; + } + + if (pooled_empty_flag[bs_idx * boxes_num + box_idx]){ + return; + } + + int temp_idx = bs_idx * boxes_num * sampled_pts_num + box_idx * sampled_pts_num + sample_pt_idx; + int src_pt_idx = pts_idx[temp_idx]; + int dst_feature_offset = temp_idx * (3 + feature_in_len); + + for (int j = 0; j < 3; j++) + pooled_features[dst_feature_offset + j] = xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j]; + + int src_feature_offset = bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len; + for (int j = 0; j < feature_in_len; j++) + pooled_features[dst_feature_offset + 3 + j] = pts_feature[src_feature_offset + j]; +} + + +void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag){ + + // printf("batch_size=%d, pts_num=%d, boxes_num=%d\n", batch_size, pts_num, boxes_num); + int *pts_assign = NULL; + cudaMalloc(&pts_assign, batch_size * pts_num * boxes_num * sizeof(int)); // (batch_size, N, M) + // cudaMemset(&pts_assign, -1, batch_size * pts_num * boxes_num * sizeof(int)); + + dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + assign_pts_to_box3d<<>>(batch_size, pts_num, boxes_num, xyz, boxes3d, pts_assign); + + int *pts_idx = NULL; + cudaMalloc(&pts_idx, batch_size * boxes_num * sampled_pts_num * sizeof(int)); // (batch_size, M, sampled_pts_num) + + dim3 blocks2(DIVUP(boxes_num, THREADS_PER_BLOCK), batch_size); // blockIdx.x(col), blockIdx.y(row) + get_pooled_idx<<>>(batch_size, pts_num, boxes_num, sampled_pts_num, pts_assign, pts_idx, pooled_empty_flag); + + dim3 blocks_pool(DIVUP(sampled_pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); + roipool3d_forward<<>>(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz, pts_idx, pts_feature, pooled_features, pooled_empty_flag); + + cudaFree(pts_assign); + cudaFree(pts_idx); + +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} diff --git a/setup.py b/setup.py index e0d80c8a6c..e46739a631 100644 --- a/setup.py +++ b/setup.py @@ -270,6 +270,11 @@ def add_mim_extention(): 'src/roiaware_pool3d_kernel.cu', 'src/points_in_boxes_cuda.cu', ]), + make_cuda_ext( + name='roipoint_pool3d_ext', + module='mmdet3d.ops.roipoint_pool3d', + sources=['src/roipoint_pool3d.cpp'], + sources_cuda=['src/roipoint_pool3d_kernel.cu']), make_cuda_ext( name='ball_query_ext', module='mmdet3d.ops.ball_query', diff --git a/tests/test_models/test_heads/test_roi_extractors.py b/tests/test_models/test_heads/test_roi_extractors.py index 1316aa3594..8ff941443d 100644 --- a/tests/test_models/test_heads/test_roi_extractors.py +++ b/tests/test_models/test_heads/test_roi_extractors.py @@ -2,7 +2,8 @@ import pytest import torch -from mmdet3d.models.roi_heads.roi_extractors import Single3DRoIAwareExtractor +from mmdet3d.models.roi_heads.roi_extractors import (Single3DRoIAwareExtractor, + Single3DRoIPointExtractor) def test_single_roiaware_extractor(): @@ -29,3 +30,27 @@ def test_single_roiaware_extractor(): assert pooled_feats.shape == torch.Size([2, 4, 4, 4, 3]) assert torch.allclose(pooled_feats.sum(), torch.tensor(51.100).cuda(), 1e-3) + + +def test_single_roipoint_extractor(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and torch+cuda') + + roi_layer_cfg = dict( + type='RoIPointPool3d', num_sampled_points=512, pool_extra_width=0) + + self = Single3DRoIPointExtractor(roi_layer=roi_layer_cfg) + + feats = torch.tensor( + [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], + [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], + [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], + [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]], + dtype=torch.float32).unsqueeze(0).cuda() + points = feats.clone() + batch_inds = feats.shape[0] + rois = torch.tensor([[0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], + [0, -10.0, 23.0, 16.0, 10, 20, 20, 0.5]], + dtype=torch.float32).cuda() + pooled_feats = self(feats, points, batch_inds, rois) + assert pooled_feats.shape == torch.Size([2, 512, 6])