Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add inferencer for LiDAR-based detection #2208

Merged
merged 12 commits into from
Jan 31, 2023
4 changes: 3 additions & 1 deletion configs/pointpillars/metafile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Models:
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car_20220331_134606-d42d15ed.pth

- Name: pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class
Alias: pointpillars_kitti-3class
In Collection: PointPillars
Config: configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py
Metadata:
Expand Down Expand Up @@ -164,8 +165,9 @@ Models:
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-car_20200901_204315-302fc3e7.pth

- Name: hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class
Alias: pointpillars_waymod5-3class
In Collection: PointPillars
Config: configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py
Config: configs/pointpillars/pointpillars_hv_secfpn_sbn-all_16xb2-2x_waymoD5-3d-3class.py
Metadata:
Training Data: Waymo
Training Memory (GB): 8.12
Expand Down
6 changes: 4 additions & 2 deletions mmdet3d/apis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
inference_mono_3d_detector,
inference_multi_modality_detector, inference_segmentor,
init_model)
from .inferencers import BaseDet3DInferencer, MonoDet3DInferencer
from .inferencers import (BaseDet3DInferencer, LidarDet3DInferencer,
MonoDet3DInferencer)

__all__ = [
'inference_detector', 'init_model', 'inference_mono_3d_detector',
'convert_SyncBN', 'inference_multi_modality_detector',
'inference_segmentor', 'BaseDet3DInferencer', 'MonoDet3DInferencer'
'inference_segmentor', 'BaseDet3DInferencer', 'MonoDet3DInferencer',
'LidarDet3DInferencer'
]
5 changes: 4 additions & 1 deletion mmdet3d/apis/inferencers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base_det3d_inferencer import BaseDet3DInferencer
from .lidar_det3d_inferencer import LidarDet3DInferencer
from .mono_det3d_inferencer import MonoDet3DInferencer

__all__ = ['BaseDet3DInferencer', 'MonoDet3DInferencer']
__all__ = [
'BaseDet3DInferencer', 'MonoDet3DInferencer', 'LidarDet3DInferencer'
]
49 changes: 48 additions & 1 deletion mmdet3d/apis/inferencers/base_det3d_inferencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import mmengine
import numpy as np
import torch.nn as nn
from mmengine.fileio import (get_file_backend, isdir, join_path,
list_dir_or_file)
from mmengine.infer.infer import BaseInferencer, ModelType
from mmengine.runner import load_checkpoint
from mmengine.structures import InstanceData
Expand Down Expand Up @@ -110,6 +112,51 @@ def _init_model(
model.eval()
return model

def _inputs_to_list(
self,
inputs: Union[dict, list],
modality_key: Union[str, List[str]] = 'points') -> list:
"""Preprocess the inputs to a list.

Preprocess inputs to a list according to its type:

- list or tuple: return inputs
- dict: the value of key 'points'/`img` is
- Directory path: return all files in the directory
- other cases: return a list containing the string. The string
could be a path to file, a url or other types of string according
to the task.

Args:
inputs (Union[dict, list]): Inputs for the inferencer.
modality_key (Union[str, List[str]], optional): The key of the
modality. Defaults to 'points'.

Returns:
list: List of input for the :meth:`preprocess`.
"""
if isinstance(modality_key, str):
modality_key = [modality_key]
assert set(modality_key).issubset({'points', 'img'})

for key in modality_key:
if isinstance(inputs, dict) and isinstance(inputs[key], str):
img = inputs[key]
backend = get_file_backend(img)
if hasattr(backend, 'isdir') and isdir(img):
# Backends like HttpsBackend do not implement `isdir`, so
# only those backends that implement `isdir` could accept
# the inputs as a directory
filename_list = list_dir_or_file(img, list_dir=False)
inputs = [{
f'{key}': join_path(img, filename)
} for filename in filename_list]

if not isinstance(inputs, (list, tuple)):
inputs = [inputs]

return list(inputs)

def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int:
"""Returns the index of the transform in a pipeline.

Expand Down Expand Up @@ -240,7 +287,7 @@ def pred2dict(self, data_sample: InstanceData) -> Dict:
"""
pred_instances = data_sample.pred_instances_3d.numpy()
result = {
'bboxes_3d': pred_instances.bboxes_3d.tensor.numpy().tolist(),
'bboxes_3d': pred_instances.bboxes_3d.tensor.cpu().tolist(),
JingweiZhang12 marked this conversation as resolved.
Show resolved Hide resolved
'labels_3d': pred_instances.labels_3d.tolist(),
'scores_3d': pred_instances.scores_3d.tolist()
}
Expand Down
183 changes: 183 additions & 0 deletions mmdet3d/apis/inferencers/lidar_det3d_inferencer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, List, Optional, Sequence, Union

import mmengine
import numpy as np
from mmengine.dataset import Compose
from mmengine.infer.infer import ModelType
from mmengine.structures import InstanceData

from mmdet3d.registry import INFERENCERS
from mmdet3d.utils import ConfigType, register_all_modules
from .base_det3d_inferencer import BaseDet3DInferencer

InstanceList = List[InstanceData]
InputType = Union[str, np.ndarray]
InputsType = Union[InputType, Sequence[InputType]]
PredType = Union[InstanceData, InstanceList]
ImgType = Union[np.ndarray, Sequence[np.ndarray]]
ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]


@INFERENCERS.register_module(name='det3d-lidar')
@INFERENCERS.register_module()
class LidarDet3DInferencer(BaseDet3DInferencer):
"""The inferencer of LiDAR-based detection.

Args:
model (str, optional): Path to the config file or the model name
defined in metafile. For example, it could be
"pointpillars_kitti-3class" or
"configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py". # noqa: E501
If model is not specified, user must provide the
`weights` saved by MMEngine which contains the config string.
Defaults to None.
weights (str, optional): Path to the checkpoint. If it is not specified
and model is a model name of metafile, the weights will be loaded
from metafile. Defaults to None.
device (str, optional): Device to run inference. If None, the available
device will be automatically used. Defaults to None.
scope (str, optional): The scope of registry.
JingweiZhang12 marked this conversation as resolved.
Show resolved Hide resolved
palette (str, optional): The palette of visualization.
"""

preprocess_kwargs: set = set()
forward_kwargs: set = set()
visualize_kwargs: set = {
'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
'img_out_dir'
}
postprocess_kwargs: set = {
'print_result', 'pred_out_file', 'return_datasample'
}

def __init__(self,
model: Union[ModelType, str, None] = None,
weights: Optional[str] = None,
device: Optional[str] = None,
scope: Optional[str] = 'mmdet3d',
palette: str = 'none') -> None:
# A global counter tracking the number of frames processed, for
# naming of the output results
self.num_visualized_frames = 0
self.palette = palette
register_all_modules()
super().__init__(
model=model, weights=weights, device=device, scope=scope)

def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
"""Preprocess the inputs to a list.

Preprocess inputs to a list according to its type:

- list or tuple: return inputs
- dict: the value with key 'points' is
- Directory path: return all files in the directory
- other cases: return a list containing the string. The string
could be a path to file, a url or other types of string according
to the task.

Args:
inputs (Union[dict, list]): Inputs for the inferencer.

Returns:
list: List of input for the :meth:`preprocess`.
"""
return super()._inputs_to_list(inputs, modality_key='points')

def _init_pipeline(self, cfg: ConfigType) -> Compose:
"""Initialize the test pipeline."""
pipeline_cfg = cfg.test_dataloader.dataset.pipeline

load_img_idx = self._get_transform_idx(pipeline_cfg,
'LoadPointsFromFile')
if load_img_idx == -1:
raise ValueError(
'LoadPointsFromFile is not found in the test pipeline')

load_cfg = pipeline_cfg[load_img_idx]
self.coord_type, self.load_dim = load_cfg['coord_type'], load_cfg[
'load_dim']
self.use_dim = list(range(load_cfg['use_dim'])) if isinstance(
load_cfg['use_dim'], int) else load_cfg['use_dim']

pipeline_cfg[load_img_idx]['type'] = 'LidarDet3DInferencerLoader'
return Compose(pipeline_cfg)

def visualize(self,
inputs: InputsType,
preds: PredType,
return_vis: bool = False,
show: bool = False,
wait_time: int = 0,
draw_pred: bool = True,
pred_score_thr: float = 0.3,
img_out_dir: str = '') -> Union[List[np.ndarray], None]:
"""Visualize predictions.

Args:
inputs (InputsType): Inputs for the inferencer.
preds (PredType): Predictions of the model.
return_vis (bool): Whether to return the visualization result.
Defaults to False.
show (bool): Whether to display the image in a popup window.
Defaults to False.
wait_time (float): The interval of show (s). Defaults to 0.
draw_pred (bool): Whether to draw predicted bounding boxes.
Defaults to True.
pred_score_thr (float): Minimum score of bboxes to draw.
Defaults to 0.3.
img_out_dir (str): Output directory of visualization results.
If left as empty, no file will be saved. Defaults to ''.
Returns:
List[np.ndarray] or None: Returns visualization results only if
applicable.
"""
if self.visualizer is None or (not show and img_out_dir == ''
and not return_vis):
return None

if getattr(self, 'visualizer') is None:
raise ValueError('Visualization needs the "visualizer" term'
'defined in the config, but got None.')

results = []

for single_input, pred in zip(inputs, preds):
single_input = single_input['points']
if isinstance(single_input, str):
pts_bytes = mmengine.fileio.get(single_input)
points = np.frombuffer(pts_bytes, dtype=np.float32)
points = points.reshape(-1, self.load_dim)
points = points[:, self.use_dim]
pc_name = osp.basename(single_input).split('.bin')[0]
pc_name = f'{pc_name}.png'
elif isinstance(single_input, np.ndarray):
points = single_input.copy()
pc_num = str(self.num_visualized_frames).zfill(8)
pc_name = f'pc_{pc_num}.png'
else:
raise ValueError('Unsupported input type: '
f'{type(single_input)}')

o3d_save_path = osp.join(img_out_dir, pc_name) \
if img_out_dir != '' else None

data_input = dict(points=points)
self.visualizer.add_datasample(
pc_name,
data_input,
pred,
show=show,
wait_time=wait_time,
draw_gt=False,
draw_pred=draw_pred,
pred_score_thr=pred_score_thr,
o3d_save_path=o3d_save_path,
vis_task='lidar_det',
)
results.append(points)
self.num_visualized_frames += 1

return results
28 changes: 7 additions & 21 deletions mmdet3d/apis/inferencers/mono_det3d_inferencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
import mmengine
import numpy as np
from mmengine.dataset import Compose
from mmengine.fileio import (get_file_backend, isdir, join_path,
list_dir_or_file)
from mmengine.infer.infer import ModelType
from mmengine.structures import InstanceData

from mmdet3d.registry import INFERENCERS
from mmdet3d.utils import ConfigType
from .base_det3d_inferencer import BaseDet3DInferencer

Expand All @@ -22,6 +21,8 @@
ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]


@INFERENCERS.register_module(name='det3d-mono')
@INFERENCERS.register_module()
class MonoDet3DInferencer(BaseDet3DInferencer):
"""MMDet3D Monocular 3D object detection inferencer.

Expand Down Expand Up @@ -75,7 +76,7 @@ def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
Preprocess inputs to a list according to its type:

- list or tuple: return inputs
- dict:
- dict: the value with key 'img' is
- Directory path: return all files in the directory
- other cases: return a list containing the string. The string
could be a path to file, a url or other types of string according
Expand All @@ -87,22 +88,7 @@ def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
Returns:
list: List of input for the :meth:`preprocess`.
"""

if isinstance(inputs, dict) and isinstance(inputs['img'], str):
img = inputs['img']
backend = get_file_backend(img)
if hasattr(backend, 'isdir') and isdir(img):
# Backends like HttpsBackend do not implement `isdir`, so only
# those backends that implement `isdir` could accept the inputs
# as a directory
filename_list = list_dir_or_file(img, list_dir=False)
img = [join_path(img, filename) for filename in filename_list]
inputs['img'] = img

if not isinstance(inputs, (list, tuple)):
inputs = [inputs]

return list(inputs)
return super()._inputs_to_list(inputs, modality_key='img')

def _init_pipeline(self, cfg: ConfigType) -> Compose:
"""Initialize the test pipeline."""
Expand All @@ -113,7 +99,7 @@ def _init_pipeline(self, cfg: ConfigType) -> Compose:
if load_img_idx == -1:
raise ValueError(
'LoadImageFromFileMono3D is not found in the test pipeline')
pipeline_cfg[load_img_idx]['type'] = 'Mono3DInferencerLoader'
pipeline_cfg[load_img_idx]['type'] = 'MonoDet3DInferencerLoader'
return Compose(pipeline_cfg)

def visualize(self,
Expand Down Expand Up @@ -167,7 +153,7 @@ def visualize(self,
img_name = f'{img_num}.jpg'
else:
raise ValueError('Unsupported input type: '
f'{type(single_input)}')
f"{type(single_input['img'])}")

out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
else None
Expand Down
5 changes: 3 additions & 2 deletions mmdet3d/datasets/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D,
LoadMultiViewImageFromFiles, LoadPointsFromDict,
LoadPointsFromFile, LoadPointsFromMultiSweeps,
Mono3DInferencerLoader, NormalizePointsColor,
MonoDet3DInferencerLoader, NormalizePointsColor,
PointSegClassMapping)
from .test_time_aug import MultiScaleFlipAug3D
# yapf: disable
Expand All @@ -29,5 +29,6 @@
'IndoorPatchPointSample', 'LoadImageFromFileMono3D', 'ObjectNameFilter',
'RandomDropPointsColor', 'RandomJitterPoints', 'AffineResize',
'RandomShiftScale', 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
'MultiViewWrapper', 'PhotoMetricDistortion3D', 'Mono3DInferencerLoader'
'MultiViewWrapper', 'PhotoMetricDistortion3D', 'MonoDet3DInferencerLoader',
'LidarDet3DInferencerLoader'
]
Loading