From c14602d54096c513edf1c90d6a86cfb538891f0a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" <cjnolet@gmail.com> Date: Mon, 24 Oct 2022 15:04:56 -0400 Subject: [PATCH 1/5] Adding optional handle to each public API function (along with example) --- .../pylibraft/distance/fused_l2_nn.pyx | 12 +- .../pylibraft/distance/pairwise_distance.pyx | 13 +- .../pylibraft/neighbors/CMakeLists.txt | 28 +++ .../pylibraft/neighbors/__init__.pxd | 14 ++ .../pylibraft/pylibraft/neighbors/__init__.py | 14 ++ .../pylibraft/pylibraft/neighbors/ivf_pq.pyx | 192 ++++++++++++++++++ .../random/rmat_rectangular_generator.pyx | 13 +- .../pylibraft/pylibraft/test/test_distance.py | 4 + .../pylibraft/test/test_fused_l2_argmin.py | 6 +- .../pylibraft/pylibraft/test/test_random.py | 9 +- ...terruptible.py => test_z_interruptible.py} | 0 11 files changed, 293 insertions(+), 12 deletions(-) create mode 100644 python/pylibraft/pylibraft/neighbors/CMakeLists.txt create mode 100644 python/pylibraft/pylibraft/neighbors/__init__.pxd create mode 100644 python/pylibraft/pylibraft/neighbors/__init__.py create mode 100644 python/pylibraft/pylibraft/neighbors/ivf_pq.pyx rename python/pylibraft/pylibraft/test/{test_interruptible.py => test_z_interruptible.py} (100%) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index 5fb837c114..a6da3dad48 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -25,6 +25,7 @@ from cython.operator cimport dereference as deref from libcpp cimport bool from .distance_type cimport DistanceType +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t @@ -58,7 +59,7 @@ cdef extern from "raft_distance/fused_l2_min_arg.hpp" \ bool sqrt) -def fused_l2_nn_argmin(X, Y, output, sqrt=True): +def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): """ Compute the 1-nearest neighbors between X and Y using the L2 distance @@ -68,6 +69,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): X : CUDA array interface compliant matrix shape (m, k) Y : CUDA array interface compliant matrix shape (n, k) output : Writable CUDA array interface matrix shape (m, 1) + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -76,6 +78,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): import cupy as cp + from pylibraft.common import Handle from pylibraft.distance import fused_l2_nn n_samples = 5000 @@ -88,7 +91,9 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): dtype=cp.float32) output = cp.empty((n_samples, 1), dtype=cp.int32) - fused_l2_nn_argmin(in1, in2, output) + handle = Handle() + fused_l2_nn_argmin(in1, in2, output, handle=handle) + handle.sync() """ x_cai = X.__cuda_array_interface__ @@ -110,7 +115,8 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): d_ptr = <uintptr_t>output_cai["data"][0] - cdef handle_t *h = new handle_t() + handle = handle if handle != None else Handle() + cdef handle_t *h = <handle_t*><size_t>handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) y_dt = np.dtype(y_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 8d55402e23..af822698bc 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -25,6 +25,8 @@ from cython.operator cimport dereference as deref from libcpp cimport bool from .distance_type cimport DistanceType + +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t @@ -88,7 +90,7 @@ SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", "hamming", "jensenshannon", "cosine", "sqeuclidean"] -def distance(X, Y, dists, metric="euclidean", p=2.0): +def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): """ Compute pairwise distances between X and Y @@ -106,6 +108,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): dists : Writable CUDA array interface matrix shape (m, n) metric : string denoting the metric type (default="euclidean") p : metric parameter (currently used only for "minkowski") + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -114,6 +117,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): import cupy as cp + from pylibraft.common import Handle from pylibraft.distance import pairwise_distance n_samples = 5000 @@ -125,7 +129,9 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): dtype=cp.float32) output = cp.empty((n_samples, n_samples), dtype=cp.float32) - pairwise_distance(in1, in2, output, metric="euclidean") + handle = Handle() + pairwise_distance(in1, in2, output, metric="euclidean", handle=handle) + handle.sync() """ x_cai = X.__cuda_array_interface__ @@ -146,7 +152,8 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): y_ptr = <uintptr_t>y_cai["data"][0] d_ptr = <uintptr_t>dists_cai["data"][0] - cdef handle_t *h = new handle_t() + handle = handle if handle != None else Handle() + cdef handle_t *h = <handle_t*><size_t>handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) y_dt = np.dtype(y_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt new file mode 100644 index 0000000000..9d7d51c6fd --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt @@ -0,0 +1,28 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources ivf_pq.pyx) +set(linked_libraries raft::raft raft::distance) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX neighbors_) + +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") +endforeach() diff --git a/python/pylibraft/pylibraft/neighbors/__init__.pxd b/python/pylibraft/pylibraft/neighbors/__init__.pxd new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/__init__.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/neighbors/__init__.py b/python/pylibraft/pylibraft/neighbors/__init__.py new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx new file mode 100644 index 0000000000..8d55402e23 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx @@ -0,0 +1,192 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +from libc.stdint cimport uintptr_t +from cython.operator cimport dereference as deref + +from libcpp cimport bool +from .distance_type cimport DistanceType +from pylibraft.common.handle cimport handle_t + + +def is_c_cont(cai, dt): + return "strides" not in cai or \ + cai["strides"] is None or \ + cai["strides"][1] == dt.itemsize + + +cdef extern from "raft_distance/pairwise_distance.hpp" \ + namespace "raft::distance::runtime": + + cdef void pairwise_distance(const handle_t &handle, + float *x, + float *y, + float *dists, + int m, + int n, + int k, + DistanceType metric, + bool isRowMajor, + float metric_arg) + + cdef void pairwise_distance(const handle_t &handle, + double *x, + double *y, + double *dists, + int m, + int n, + int k, + DistanceType metric, + bool isRowMajor, + float metric_arg) + +DISTANCE_TYPES = { + "l2": DistanceType.L2SqrtUnexpanded, + "sqeuclidean": DistanceType.L2Unexpanded, + "euclidean": DistanceType.L2SqrtUnexpanded, + "l1": DistanceType.L1, + "cityblock": DistanceType.L1, + "inner_product": DistanceType.InnerProduct, + "chebyshev": DistanceType.Linf, + "canberra": DistanceType.Canberra, + "cosine": DistanceType.CosineExpanded, + "lp": DistanceType.LpUnexpanded, + "correlation": DistanceType.CorrelationExpanded, + "jaccard": DistanceType.JaccardExpanded, + "hellinger": DistanceType.HellingerExpanded, + "braycurtis": DistanceType.BrayCurtis, + "jensenshannon": DistanceType.JensenShannon, + "hamming": DistanceType.HammingUnexpanded, + "kl_divergence": DistanceType.KLDivergence, + "minkowski": DistanceType.LpUnexpanded, + "russellrao": DistanceType.RusselRaoExpanded, + "dice": DistanceType.DiceExpanded +} + +SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", + "chebyshev", "minkowski", "canberra", "kl_divergence", + "correlation", "russellrao", "hellinger", "lp", + "hamming", "jensenshannon", "cosine", "sqeuclidean"] + + +def distance(X, Y, dists, metric="euclidean", p=2.0): + """ + Compute pairwise distances between X and Y + + Valid values for metric: + ["euclidean", "l2", "l1", "cityblock", "inner_product", + "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", + "kl_divergence", "russellrao", "minkowski", "correlation", + "cosine"] + + Parameters + ---------- + + X : CUDA array interface compliant matrix shape (m, k) + Y : CUDA array interface compliant matrix shape (n, k) + dists : Writable CUDA array interface matrix shape (m, n) + metric : string denoting the metric type (default="euclidean") + p : metric parameter (currently used only for "minkowski") + + Examples + -------- + + .. code-block:: python + + import cupy as cp + + from pylibraft.distance import pairwise_distance + + n_samples = 5000 + n_features = 50 + + in1 = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + in2 = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + output = cp.empty((n_samples, n_samples), dtype=cp.float32) + + pairwise_distance(in1, in2, output, metric="euclidean") + """ + + x_cai = X.__cuda_array_interface__ + y_cai = Y.__cuda_array_interface__ + dists_cai = dists.__cuda_array_interface__ + + m = x_cai["shape"][0] + n = y_cai["shape"][0] + + x_k = x_cai["shape"][1] + y_k = y_cai["shape"][1] + + if x_k != y_k: + raise ValueError("Inputs must have same number of columns. " + "a=%s, b=%s" % (x_k, y_k)) + + x_ptr = <uintptr_t>x_cai["data"][0] + y_ptr = <uintptr_t>y_cai["data"][0] + d_ptr = <uintptr_t>dists_cai["data"][0] + + cdef handle_t *h = new handle_t() + + x_dt = np.dtype(x_cai["typestr"]) + y_dt = np.dtype(y_cai["typestr"]) + d_dt = np.dtype(dists_cai["typestr"]) + + x_c_contiguous = is_c_cont(x_cai, x_dt) + y_c_contiguous = is_c_cont(y_cai, y_dt) + + if x_c_contiguous != y_c_contiguous: + raise ValueError("Inputs must have matching strides") + + if metric not in SUPPORTED_DISTANCES: + raise ValueError("metric %s is not supported" % metric) + + cdef DistanceType distance_type = DISTANCE_TYPES[metric] + + if x_dt != y_dt or x_dt != d_dt: + raise ValueError("Inputs must have the same dtypes") + + if x_dt == np.float32: + pairwise_distance(deref(h), + <float*> x_ptr, + <float*> y_ptr, + <float*> d_ptr, + <int>m, + <int>n, + <int>x_k, + <DistanceType>distance_type, + <bool>x_c_contiguous, + <float>p) + elif x_dt == np.float64: + pairwise_distance(deref(h), + <double*> x_ptr, + <double*> y_ptr, + <double*> d_ptr, + <int>m, + <int>n, + <int>x_k, + <DistanceType>distance_type, + <bool>x_c_contiguous, + <float>p) + else: + raise ValueError("dtype %s not supported" % x_dt) diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx index ea28357201..6cef28d150 100644 --- a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -22,6 +22,7 @@ import numpy as np from libc.stdint cimport uintptr_t, int64_t from cython.operator cimport dereference as deref +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t from .rng_state cimport RngState @@ -72,7 +73,7 @@ cdef extern from "raft_distance/random/rmat_rectangular_generator.hpp" \ RngState& r) -def rmat(out, theta, r_scale, c_scale, seed=12345): +def rmat(out, theta, r_scale, c_scale, seed=12345, handle=None): """ Generate RMAT adjacency list based on the input distribution. @@ -87,6 +88,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): r_scale: log2 of number of source nodes c_scale: log2 of number of destination nodes seed: random seed used for reproducibility + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -95,6 +97,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): import cupy as cp + from pylibraft.common import Handle from pylibraft.random import rmat n_edges = 5000 @@ -105,7 +108,9 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): out = cp.empty((n_edges, 2), dtype=cp.int32) theta = cp.random.random_sample(theta_len, dtype=cp.float32) - rmat(out, theta, r_scale, c_scale) + handle = Handle() + rmat(out, theta, r_scale, c_scale, handle=handle) + handle.sync() """ if theta is None: @@ -123,7 +128,9 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): theta_dt = np.dtype(theta_cai["typestr"]) cdef RngState *rng = new RngState(seed) - cdef handle_t *h = new handle_t() + + handle = handle if handle is not None else Handle() + cdef handle_t *h = <handle_t*><size_t>handle.getHandle() if out_dt == np.int32 and theta_dt == np.float32: rmat_rectangular_gen(deref(h), diff --git a/python/pylibraft/pylibraft/test/test_distance.py b/python/pylibraft/pylibraft/test/test_distance.py index b9b4ba9e30..7f35a25493 100644 --- a/python/pylibraft/pylibraft/test/test_distance.py +++ b/python/pylibraft/pylibraft/test/test_distance.py @@ -17,6 +17,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.distance import pairwise_distance from pylibraft.testing.utils import TestDeviceBuffer @@ -53,7 +54,10 @@ def test_distance(n_rows, n_cols, metric, order, dtype): input1_device = TestDeviceBuffer(input1, order) output_device = TestDeviceBuffer(output, order) + handle = Handle() pairwise_distance(input1_device, input1_device, output_device, metric) + handle.sync() + actual = output_device.copy_to_host() actual[actual <= 1e-5] = 0.0 diff --git a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py b/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py index b12cc30472..1ce1ee2d1f 100644 --- a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py +++ b/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py @@ -17,6 +17,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.distance import fused_l2_nn_argmin from pylibraft.testing.utils import TestDeviceBuffer @@ -41,7 +42,10 @@ def test_fused_l2_nn_minarg(n_rows, n_cols, n_clusters, dtype): input2_device = TestDeviceBuffer(input2, "C") output_device = TestDeviceBuffer(output, "C") - fused_l2_nn_argmin(input1_device, input2_device, output_device, True) + handle = Handle() + fused_l2_nn_argmin(input1_device, input2_device, output_device, + True, handle=handle) + handle.sync() actual = output_device.copy_to_host() assert np.allclose(expected, actual, rtol=1e-4) diff --git a/python/pylibraft/pylibraft/test/test_random.py b/python/pylibraft/pylibraft/test/test_random.py index 8a04f707de..e0b7140f1c 100644 --- a/python/pylibraft/pylibraft/test/test_random.py +++ b/python/pylibraft/pylibraft/test/test_random.py @@ -16,6 +16,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.random import rmat from pylibraft.testing.utils import TestDeviceBuffer @@ -46,14 +47,18 @@ def test_rmat(n_edges, r_scale, c_scale, dtype): theta, theta_device = generate_theta(r_scale, c_scale) out_buff = np.empty((n_edges, 2), dtype=dtype) output_device = TestDeviceBuffer(out_buff, "C") - rmat(output_device, theta_device, r_scale, c_scale, 12345) + + handle = Handle() + rmat(output_device, theta_device, r_scale, c_scale, 12345, handle=handle) + handle.sync() output = output_device.copy_to_host() # a more rigorous tests have been done at the c++ level assert np.all(output[:, 0] >= 0) assert np.all(output[:, 0] < 2**r_scale) assert np.all(output[:, 1] >= 0) assert np.all(output[:, 1] < 2**c_scale) - rmat(output_device, theta_device, r_scale, c_scale, 12345) + rmat(output_device, theta_device, r_scale, c_scale, 12345, handle=handle) + handle.sync() output1 = output_device.copy_to_host() assert np.all(np.equal(output, output1)) diff --git a/python/pylibraft/pylibraft/test/test_interruptible.py b/python/pylibraft/pylibraft/test/test_z_interruptible.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_interruptible.py rename to python/pylibraft/pylibraft/test/test_z_interruptible.py From 1b9f8c8614111715ba253e07a6a75036dace4a1e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" <cjnolet@gmail.com> Date: Mon, 24 Oct 2022 15:09:40 -0400 Subject: [PATCH 2/5] Fixing style --- python/pylibraft/pylibraft/distance/fused_l2_nn.pyx | 2 +- python/pylibraft/pylibraft/distance/pairwise_distance.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index a6da3dad48..152b69f96c 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -115,7 +115,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): d_ptr = <uintptr_t>output_cai["data"][0] - handle = handle if handle != None else Handle() + handle = handle if handle is not None else Handle() cdef handle_t *h = <handle_t*><size_t>handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index af822698bc..347703ebcd 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -152,7 +152,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): y_ptr = <uintptr_t>y_cai["data"][0] d_ptr = <uintptr_t>dists_cai["data"][0] - handle = handle if handle != None else Handle() + handle = handle if handle is not None else Handle() cdef handle_t *h = <handle_t*><size_t>handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) From 25cdb3a54542ab63a15eddcbd4f9095e659dbde1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" <cjnolet@gmail.com> Date: Mon, 24 Oct 2022 15:18:05 -0400 Subject: [PATCH 3/5] Updating examples --- python/pylibraft/pylibraft/distance/fused_l2_nn.pyx | 8 +++++++- python/pylibraft/pylibraft/distance/pairwise_distance.pyx | 7 ++++++- .../pylibraft/random/rmat_rectangular_generator.pyx | 6 ++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index 152b69f96c..106cd3f56f 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -91,9 +91,15 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): dtype=cp.float32) output = cp.empty((n_samples, 1), dtype=cp.int32) + # A single RAFT handle can optionally be used across + # pylibraft functions. handle = Handle() + ... fused_l2_nn_argmin(in1, in2, output, handle=handle) - handle.sync() + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() # """ x_cai = X.__cuda_array_interface__ diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 347703ebcd..664d77462b 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -129,9 +129,14 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): dtype=cp.float32) output = cp.empty((n_samples, n_samples), dtype=cp.float32) + # A single RAFT handle can optionally be used across + # pylibraft functions. handle = Handle() + ... pairwise_distance(in1, in2, output, metric="euclidean", handle=handle) - handle.sync() + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized """ x_cai = X.__cuda_array_interface__ diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx index 6cef28d150..b2605bb9e1 100644 --- a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -108,8 +108,14 @@ def rmat(out, theta, r_scale, c_scale, seed=12345, handle=None): out = cp.empty((n_edges, 2), dtype=cp.int32) theta = cp.random.random_sample(theta_len, dtype=cp.float32) + # A single RAFT handle can optionally be used across + # pylibraft functions. handle = Handle() + ... rmat(out, theta, r_scale, c_scale, handle=handle) + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized handle.sync() """ From e0cb66d791e847bbb5fba7c17a679e6bfb8a1c57 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" <cjnolet@gmail.com> Date: Mon, 24 Oct 2022 15:30:09 -0400 Subject: [PATCH 4/5] Removing accidentally checked in files --- .../pylibraft/distance/fused_l2_nn.pyx | 2 +- .../pylibraft/distance/pairwise_distance.pyx | 2 +- .../pylibraft/neighbors/CMakeLists.txt | 28 --- .../pylibraft/neighbors/__init__.pxd | 14 -- .../pylibraft/pylibraft/neighbors/__init__.py | 14 -- .../pylibraft/pylibraft/neighbors/ivf_pq.pyx | 192 ------------------ .../random/rmat_rectangular_generator.pyx | 2 +- 7 files changed, 3 insertions(+), 251 deletions(-) delete mode 100644 python/pylibraft/pylibraft/neighbors/CMakeLists.txt delete mode 100644 python/pylibraft/pylibraft/neighbors/__init__.pxd delete mode 100644 python/pylibraft/pylibraft/neighbors/__init__.py delete mode 100644 python/pylibraft/pylibraft/neighbors/ivf_pq.pyx diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index 106cd3f56f..e312050aaa 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -91,7 +91,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): dtype=cp.float32) output = cp.empty((n_samples, 1), dtype=cp.int32) - # A single RAFT handle can optionally be used across + # A single RAFT handle can optionally be reused across # pylibraft functions. handle = Handle() ... diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 664d77462b..7fc3ce10cf 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -129,7 +129,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): dtype=cp.float32) output = cp.empty((n_samples, n_samples), dtype=cp.float32) - # A single RAFT handle can optionally be used across + # A single RAFT handle can optionally be reused across # pylibraft functions. handle = Handle() ... diff --git a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt deleted file mode 100644 index 9d7d51c6fd..0000000000 --- a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -# Set the list of Cython files to build -set(cython_sources ivf_pq.pyx) -set(linked_libraries raft::raft raft::distance) - -# Build all of the Cython targets -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - MODULE_PREFIX neighbors_) - -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") -endforeach() diff --git a/python/pylibraft/pylibraft/neighbors/__init__.pxd b/python/pylibraft/pylibraft/neighbors/__init__.pxd deleted file mode 100644 index 273b4497cc..0000000000 --- a/python/pylibraft/pylibraft/neighbors/__init__.pxd +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/pylibraft/pylibraft/neighbors/__init__.py b/python/pylibraft/pylibraft/neighbors/__init__.py deleted file mode 100644 index 273b4497cc..0000000000 --- a/python/pylibraft/pylibraft/neighbors/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx deleted file mode 100644 index 8d55402e23..0000000000 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx +++ /dev/null @@ -1,192 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -import numpy as np - -from libc.stdint cimport uintptr_t -from cython.operator cimport dereference as deref - -from libcpp cimport bool -from .distance_type cimport DistanceType -from pylibraft.common.handle cimport handle_t - - -def is_c_cont(cai, dt): - return "strides" not in cai or \ - cai["strides"] is None or \ - cai["strides"][1] == dt.itemsize - - -cdef extern from "raft_distance/pairwise_distance.hpp" \ - namespace "raft::distance::runtime": - - cdef void pairwise_distance(const handle_t &handle, - float *x, - float *y, - float *dists, - int m, - int n, - int k, - DistanceType metric, - bool isRowMajor, - float metric_arg) - - cdef void pairwise_distance(const handle_t &handle, - double *x, - double *y, - double *dists, - int m, - int n, - int k, - DistanceType metric, - bool isRowMajor, - float metric_arg) - -DISTANCE_TYPES = { - "l2": DistanceType.L2SqrtUnexpanded, - "sqeuclidean": DistanceType.L2Unexpanded, - "euclidean": DistanceType.L2SqrtUnexpanded, - "l1": DistanceType.L1, - "cityblock": DistanceType.L1, - "inner_product": DistanceType.InnerProduct, - "chebyshev": DistanceType.Linf, - "canberra": DistanceType.Canberra, - "cosine": DistanceType.CosineExpanded, - "lp": DistanceType.LpUnexpanded, - "correlation": DistanceType.CorrelationExpanded, - "jaccard": DistanceType.JaccardExpanded, - "hellinger": DistanceType.HellingerExpanded, - "braycurtis": DistanceType.BrayCurtis, - "jensenshannon": DistanceType.JensenShannon, - "hamming": DistanceType.HammingUnexpanded, - "kl_divergence": DistanceType.KLDivergence, - "minkowski": DistanceType.LpUnexpanded, - "russellrao": DistanceType.RusselRaoExpanded, - "dice": DistanceType.DiceExpanded -} - -SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", - "chebyshev", "minkowski", "canberra", "kl_divergence", - "correlation", "russellrao", "hellinger", "lp", - "hamming", "jensenshannon", "cosine", "sqeuclidean"] - - -def distance(X, Y, dists, metric="euclidean", p=2.0): - """ - Compute pairwise distances between X and Y - - Valid values for metric: - ["euclidean", "l2", "l1", "cityblock", "inner_product", - "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", - "kl_divergence", "russellrao", "minkowski", "correlation", - "cosine"] - - Parameters - ---------- - - X : CUDA array interface compliant matrix shape (m, k) - Y : CUDA array interface compliant matrix shape (n, k) - dists : Writable CUDA array interface matrix shape (m, n) - metric : string denoting the metric type (default="euclidean") - p : metric parameter (currently used only for "minkowski") - - Examples - -------- - - .. code-block:: python - - import cupy as cp - - from pylibraft.distance import pairwise_distance - - n_samples = 5000 - n_features = 50 - - in1 = cp.random.random_sample((n_samples, n_features), - dtype=cp.float32) - in2 = cp.random.random_sample((n_samples, n_features), - dtype=cp.float32) - output = cp.empty((n_samples, n_samples), dtype=cp.float32) - - pairwise_distance(in1, in2, output, metric="euclidean") - """ - - x_cai = X.__cuda_array_interface__ - y_cai = Y.__cuda_array_interface__ - dists_cai = dists.__cuda_array_interface__ - - m = x_cai["shape"][0] - n = y_cai["shape"][0] - - x_k = x_cai["shape"][1] - y_k = y_cai["shape"][1] - - if x_k != y_k: - raise ValueError("Inputs must have same number of columns. " - "a=%s, b=%s" % (x_k, y_k)) - - x_ptr = <uintptr_t>x_cai["data"][0] - y_ptr = <uintptr_t>y_cai["data"][0] - d_ptr = <uintptr_t>dists_cai["data"][0] - - cdef handle_t *h = new handle_t() - - x_dt = np.dtype(x_cai["typestr"]) - y_dt = np.dtype(y_cai["typestr"]) - d_dt = np.dtype(dists_cai["typestr"]) - - x_c_contiguous = is_c_cont(x_cai, x_dt) - y_c_contiguous = is_c_cont(y_cai, y_dt) - - if x_c_contiguous != y_c_contiguous: - raise ValueError("Inputs must have matching strides") - - if metric not in SUPPORTED_DISTANCES: - raise ValueError("metric %s is not supported" % metric) - - cdef DistanceType distance_type = DISTANCE_TYPES[metric] - - if x_dt != y_dt or x_dt != d_dt: - raise ValueError("Inputs must have the same dtypes") - - if x_dt == np.float32: - pairwise_distance(deref(h), - <float*> x_ptr, - <float*> y_ptr, - <float*> d_ptr, - <int>m, - <int>n, - <int>x_k, - <DistanceType>distance_type, - <bool>x_c_contiguous, - <float>p) - elif x_dt == np.float64: - pairwise_distance(deref(h), - <double*> x_ptr, - <double*> y_ptr, - <double*> d_ptr, - <int>m, - <int>n, - <int>x_k, - <DistanceType>distance_type, - <bool>x_c_contiguous, - <float>p) - else: - raise ValueError("dtype %s not supported" % x_dt) diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx index b2605bb9e1..cef19295ac 100644 --- a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -108,7 +108,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345, handle=None): out = cp.empty((n_edges, 2), dtype=cp.int32) theta = cp.random.random_sample(theta_len, dtype=cp.float32) - # A single RAFT handle can optionally be used across + # A single RAFT handle can optionally be reused across # pylibraft functions. handle = Handle() ... From a6780b4e3e7f1464f67c99592e7010523f495be8 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" <cjnolet@gmail.com> Date: Mon, 24 Oct 2022 19:16:23 -0400 Subject: [PATCH 5/5] Fixing code blocks --- python/pylibraft/pylibraft/distance/fused_l2_nn.pyx | 2 +- python/pylibraft/pylibraft/distance/pairwise_distance.pyx | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index e312050aaa..880bb46a05 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -99,7 +99,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): ... # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized - handle.sync() # + handle.sync() """ x_cai = X.__cuda_array_interface__ diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 7fc3ce10cf..0f7626e8d1 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -137,6 +137,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): ... # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized + handle.sync() """ x_cai = X.__cuda_array_interface__