Skip to content

Commit

Permalink
Add caching for numba and cupy functions (#591)
Browse files Browse the repository at this point in the history
  • Loading branch information
abramov-oleg authored Dec 10, 2023
1 parent 2b50d3a commit 261b005
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 11 deletions.
4 changes: 3 additions & 1 deletion docker/Dockerfile.deepstream
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ RUN apt update \
ARG PROJECT_PATH=/opt/savant
ENV PYTHONUNBUFFERED=1 \
PROJECT_PATH=$PROJECT_PATH \
PYTHONPATH=$PROJECT_PATH
PYTHONPATH=$PROJECT_PATH \
NUMBA_CACHE_DIR=/cache/numba \
CUPY_CACHE_DIR=/cache/cupy
WORKDIR $PROJECT_PATH

# add deepstream libs to path, so that ctypes can load them
Expand Down
2 changes: 0 additions & 2 deletions samples/yolov8_seg/docker-compose.l4t.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ services:
environment:
- MODEL_PATH=/cache/models/yolov8_seg
- DOWNLOAD_PATH=/cache/downloads/yolov8_seg
- CUPY_CACHE_DIR=/cache/cupy
- NUMBA_CACHE_DIR=/cache/numba
- ZMQ_SRC_ENDPOINT=sub+bind:ipc:///tmp/zmq-sockets/input-video.ipc
- ZMQ_SINK_ENDPOINT=pub+bind:ipc:///tmp/zmq-sockets/output-video.ipc
- METRICS_FRAME_PERIOD=1000
Expand Down
2 changes: 0 additions & 2 deletions samples/yolov8_seg/docker-compose.x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ services:
- MODEL_OUTPUT_CONVERTER=gpu_converter
- MODEL_PATH=/cache/models/yolov8_seg
- DOWNLOAD_PATH=/cache/downloads/yolov8_seg
- CUPY_CACHE_DIR=/cache/cupy
- NUMBA_CACHE_DIR=/cache/numba
- ZMQ_SRC_ENDPOINT=sub+bind:ipc:///tmp/zmq-sockets/input-video.ipc
- ZMQ_SINK_ENDPOINT=pub+bind:ipc:///tmp/zmq-sockets/output-video.ipc
- METRICS_FRAME_PERIOD=1000
Expand Down
5 changes: 3 additions & 2 deletions samples/yolov8_seg/module/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def __call__(
return tensors, mask_list


@nb.njit('Tuple((u2[:], f4[:]))(f4[:, :])', nogil=True)
@nb.njit('Tuple((u2[:], f4[:]))(f4[:, :])', nogil=True, cache=True)
def _parse_scores(scores: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
class_ids = np.empty(scores.shape[0], dtype=np.uint16)
confidences = np.empty(scores.shape[0], dtype=scores.dtype)
Expand All @@ -118,7 +118,7 @@ def _parse_scores(scores: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
return class_ids, confidences


@nb.njit('f4[:, ::1](f4[:, :])', nogil=True)
@nb.njit('f4[:, ::1](f4[:, :])', nogil=True, cache=True)
def sigmoid(a: np.ndarray) -> np.ndarray:
ones = np.ones(a.shape, dtype=np.float32)
return np.divide(ones, (ones + np.exp(-a)))
Expand All @@ -127,6 +127,7 @@ def sigmoid(a: np.ndarray) -> np.ndarray:
@nb.njit(
'Tuple((f4[:, ::1], f4[:, :, ::1]))(f4[:, ::1], f4[:, :, ::1], u2, f4, f4, u2)',
nogil=True,
cache=True,
)
def _postproc(
output: np.ndarray,
Expand Down
4 changes: 2 additions & 2 deletions savant/selector/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from savant.utils.nms import nms_cpu


@nb.njit('f4[:, :](f4[:, :], u2, u2, u2, u2)', nogil=True)
@nb.njit('f4[:, :](f4[:, :], u2, u2, u2, u2)', nogil=True, cache=True)
def min_max_bbox_size_selector(
bbox_tensor: np.ndarray,
min_width: int = 0,
Expand Down Expand Up @@ -86,7 +86,7 @@ def __call__(self, bbox_tensor: np.ndarray) -> np.ndarray:
)


@nb.njit('f4[:, :](f4[:, :], f4, f4, u2, u2, u2, u2)', nogil=True)
@nb.njit('f4[:, :](f4[:, :], f4, f4, u2, u2, u2, u2)', nogil=True, cache=True)
def default_selector(
bbox_tensor: np.ndarray,
confidence_threshold: float = 0.0,
Expand Down
4 changes: 2 additions & 2 deletions savant/utils/nms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
__all__ = ['nms_cpu', 'nms_gpu']


@nb.njit('u4[:](f4[:, :], f4[:], f4, u2)', nogil=True)
@nb.njit('u4[:](f4[:, :], f4[:], f4, u2)', nogil=True, cache=True)
def nms_cpu(
bboxes: np.ndarray, confidences: np.ndarray, threshold: float, top_k: int = 300
) -> np.ndarray:
Expand Down Expand Up @@ -172,7 +172,7 @@ def _call_nms_kernel(bboxes: cp.ndarray, threshold: float) -> cp.ndarray:
return _nms_gpu_post(mask.get(), n_bbox, threads_per_block, col_blocks)


@nb.njit('u4[:](u8[:], u2, u2, u2)', nogil=True)
@nb.njit('u4[:](u8[:], u2, u2, u2)', nogil=True, cache=True)
def _nms_gpu_post(
mask: np.ndarray, n_bbox: int, threads_per_block: int, col_blocks: int
) -> np.ndarray:
Expand Down

0 comments on commit 261b005

Please sign in to comment.