From c14602d54096c513edf1c90d6a86cfb538891f0a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 24 Oct 2022 15:04:56 -0400 Subject: [PATCH 01/27] Adding optional handle to each public API function (along with example) --- .../pylibraft/distance/fused_l2_nn.pyx | 12 +- .../pylibraft/distance/pairwise_distance.pyx | 13 +- .../pylibraft/neighbors/CMakeLists.txt | 28 +++ .../pylibraft/neighbors/__init__.pxd | 14 ++ .../pylibraft/pylibraft/neighbors/__init__.py | 14 ++ .../pylibraft/pylibraft/neighbors/ivf_pq.pyx | 192 ++++++++++++++++++ .../random/rmat_rectangular_generator.pyx | 13 +- .../pylibraft/pylibraft/test/test_distance.py | 4 + .../pylibraft/test/test_fused_l2_argmin.py | 6 +- .../pylibraft/pylibraft/test/test_random.py | 9 +- ...terruptible.py => test_z_interruptible.py} | 0 11 files changed, 293 insertions(+), 12 deletions(-) create mode 100644 python/pylibraft/pylibraft/neighbors/CMakeLists.txt create mode 100644 python/pylibraft/pylibraft/neighbors/__init__.pxd create mode 100644 python/pylibraft/pylibraft/neighbors/__init__.py create mode 100644 python/pylibraft/pylibraft/neighbors/ivf_pq.pyx rename python/pylibraft/pylibraft/test/{test_interruptible.py => test_z_interruptible.py} (100%) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index 5fb837c114..a6da3dad48 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -25,6 +25,7 @@ from cython.operator cimport dereference as deref from libcpp cimport bool from .distance_type cimport DistanceType +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t @@ -58,7 +59,7 @@ cdef extern from "raft_distance/fused_l2_min_arg.hpp" \ bool sqrt) -def fused_l2_nn_argmin(X, Y, output, sqrt=True): +def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): """ Compute the 1-nearest neighbors between X and Y using the L2 distance @@ -68,6 +69,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): X : CUDA array interface compliant matrix shape (m, k) Y : CUDA array interface compliant matrix shape (n, k) output : Writable CUDA array interface matrix shape (m, 1) + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -76,6 +78,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): import cupy as cp + from pylibraft.common import Handle from pylibraft.distance import fused_l2_nn n_samples = 5000 @@ -88,7 +91,9 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): dtype=cp.float32) output = cp.empty((n_samples, 1), dtype=cp.int32) - fused_l2_nn_argmin(in1, in2, output) + handle = Handle() + fused_l2_nn_argmin(in1, in2, output, handle=handle) + handle.sync() """ x_cai = X.__cuda_array_interface__ @@ -110,7 +115,8 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True): d_ptr = output_cai["data"][0] - cdef handle_t *h = new handle_t() + handle = handle if handle != None else Handle() + cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) y_dt = np.dtype(y_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 8d55402e23..af822698bc 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -25,6 +25,8 @@ from cython.operator cimport dereference as deref from libcpp cimport bool from .distance_type cimport DistanceType + +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t @@ -88,7 +90,7 @@ SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", "hamming", "jensenshannon", "cosine", "sqeuclidean"] -def distance(X, Y, dists, metric="euclidean", p=2.0): +def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): """ Compute pairwise distances between X and Y @@ -106,6 +108,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): dists : Writable CUDA array interface matrix shape (m, n) metric : string denoting the metric type (default="euclidean") p : metric parameter (currently used only for "minkowski") + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -114,6 +117,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): import cupy as cp + from pylibraft.common import Handle from pylibraft.distance import pairwise_distance n_samples = 5000 @@ -125,7 +129,9 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): dtype=cp.float32) output = cp.empty((n_samples, n_samples), dtype=cp.float32) - pairwise_distance(in1, in2, output, metric="euclidean") + handle = Handle() + pairwise_distance(in1, in2, output, metric="euclidean", handle=handle) + handle.sync() """ x_cai = X.__cuda_array_interface__ @@ -146,7 +152,8 @@ def distance(X, Y, dists, metric="euclidean", p=2.0): y_ptr = y_cai["data"][0] d_ptr = dists_cai["data"][0] - cdef handle_t *h = new handle_t() + handle = handle if handle != None else Handle() + cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) y_dt = np.dtype(y_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt new file mode 100644 index 0000000000..9d7d51c6fd --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt @@ -0,0 +1,28 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources ivf_pq.pyx) +set(linked_libraries raft::raft raft::distance) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX neighbors_) + +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") +endforeach() diff --git a/python/pylibraft/pylibraft/neighbors/__init__.pxd b/python/pylibraft/pylibraft/neighbors/__init__.pxd new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/__init__.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/neighbors/__init__.py b/python/pylibraft/pylibraft/neighbors/__init__.py new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx new file mode 100644 index 0000000000..8d55402e23 --- /dev/null +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx @@ -0,0 +1,192 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +from libc.stdint cimport uintptr_t +from cython.operator cimport dereference as deref + +from libcpp cimport bool +from .distance_type cimport DistanceType +from pylibraft.common.handle cimport handle_t + + +def is_c_cont(cai, dt): + return "strides" not in cai or \ + cai["strides"] is None or \ + cai["strides"][1] == dt.itemsize + + +cdef extern from "raft_distance/pairwise_distance.hpp" \ + namespace "raft::distance::runtime": + + cdef void pairwise_distance(const handle_t &handle, + float *x, + float *y, + float *dists, + int m, + int n, + int k, + DistanceType metric, + bool isRowMajor, + float metric_arg) + + cdef void pairwise_distance(const handle_t &handle, + double *x, + double *y, + double *dists, + int m, + int n, + int k, + DistanceType metric, + bool isRowMajor, + float metric_arg) + +DISTANCE_TYPES = { + "l2": DistanceType.L2SqrtUnexpanded, + "sqeuclidean": DistanceType.L2Unexpanded, + "euclidean": DistanceType.L2SqrtUnexpanded, + "l1": DistanceType.L1, + "cityblock": DistanceType.L1, + "inner_product": DistanceType.InnerProduct, + "chebyshev": DistanceType.Linf, + "canberra": DistanceType.Canberra, + "cosine": DistanceType.CosineExpanded, + "lp": DistanceType.LpUnexpanded, + "correlation": DistanceType.CorrelationExpanded, + "jaccard": DistanceType.JaccardExpanded, + "hellinger": DistanceType.HellingerExpanded, + "braycurtis": DistanceType.BrayCurtis, + "jensenshannon": DistanceType.JensenShannon, + "hamming": DistanceType.HammingUnexpanded, + "kl_divergence": DistanceType.KLDivergence, + "minkowski": DistanceType.LpUnexpanded, + "russellrao": DistanceType.RusselRaoExpanded, + "dice": DistanceType.DiceExpanded +} + +SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", + "chebyshev", "minkowski", "canberra", "kl_divergence", + "correlation", "russellrao", "hellinger", "lp", + "hamming", "jensenshannon", "cosine", "sqeuclidean"] + + +def distance(X, Y, dists, metric="euclidean", p=2.0): + """ + Compute pairwise distances between X and Y + + Valid values for metric: + ["euclidean", "l2", "l1", "cityblock", "inner_product", + "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", + "kl_divergence", "russellrao", "minkowski", "correlation", + "cosine"] + + Parameters + ---------- + + X : CUDA array interface compliant matrix shape (m, k) + Y : CUDA array interface compliant matrix shape (n, k) + dists : Writable CUDA array interface matrix shape (m, n) + metric : string denoting the metric type (default="euclidean") + p : metric parameter (currently used only for "minkowski") + + Examples + -------- + + .. code-block:: python + + import cupy as cp + + from pylibraft.distance import pairwise_distance + + n_samples = 5000 + n_features = 50 + + in1 = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + in2 = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + output = cp.empty((n_samples, n_samples), dtype=cp.float32) + + pairwise_distance(in1, in2, output, metric="euclidean") + """ + + x_cai = X.__cuda_array_interface__ + y_cai = Y.__cuda_array_interface__ + dists_cai = dists.__cuda_array_interface__ + + m = x_cai["shape"][0] + n = y_cai["shape"][0] + + x_k = x_cai["shape"][1] + y_k = y_cai["shape"][1] + + if x_k != y_k: + raise ValueError("Inputs must have same number of columns. " + "a=%s, b=%s" % (x_k, y_k)) + + x_ptr = x_cai["data"][0] + y_ptr = y_cai["data"][0] + d_ptr = dists_cai["data"][0] + + cdef handle_t *h = new handle_t() + + x_dt = np.dtype(x_cai["typestr"]) + y_dt = np.dtype(y_cai["typestr"]) + d_dt = np.dtype(dists_cai["typestr"]) + + x_c_contiguous = is_c_cont(x_cai, x_dt) + y_c_contiguous = is_c_cont(y_cai, y_dt) + + if x_c_contiguous != y_c_contiguous: + raise ValueError("Inputs must have matching strides") + + if metric not in SUPPORTED_DISTANCES: + raise ValueError("metric %s is not supported" % metric) + + cdef DistanceType distance_type = DISTANCE_TYPES[metric] + + if x_dt != y_dt or x_dt != d_dt: + raise ValueError("Inputs must have the same dtypes") + + if x_dt == np.float32: + pairwise_distance(deref(h), + x_ptr, + y_ptr, + d_ptr, + m, + n, + x_k, + distance_type, + x_c_contiguous, + p) + elif x_dt == np.float64: + pairwise_distance(deref(h), + x_ptr, + y_ptr, + d_ptr, + m, + n, + x_k, + distance_type, + x_c_contiguous, + p) + else: + raise ValueError("dtype %s not supported" % x_dt) diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx index ea28357201..6cef28d150 100644 --- a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -22,6 +22,7 @@ import numpy as np from libc.stdint cimport uintptr_t, int64_t from cython.operator cimport dereference as deref +from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t from .rng_state cimport RngState @@ -72,7 +73,7 @@ cdef extern from "raft_distance/random/rmat_rectangular_generator.hpp" \ RngState& r) -def rmat(out, theta, r_scale, c_scale, seed=12345): +def rmat(out, theta, r_scale, c_scale, seed=12345, handle=None): """ Generate RMAT adjacency list based on the input distribution. @@ -87,6 +88,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): r_scale: log2 of number of source nodes c_scale: log2 of number of destination nodes seed: random seed used for reproducibility + handle : Optional RAFT handle for reusing expensive CUDA resources Examples -------- @@ -95,6 +97,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): import cupy as cp + from pylibraft.common import Handle from pylibraft.random import rmat n_edges = 5000 @@ -105,7 +108,9 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): out = cp.empty((n_edges, 2), dtype=cp.int32) theta = cp.random.random_sample(theta_len, dtype=cp.float32) - rmat(out, theta, r_scale, c_scale) + handle = Handle() + rmat(out, theta, r_scale, c_scale, handle=handle) + handle.sync() """ if theta is None: @@ -123,7 +128,9 @@ def rmat(out, theta, r_scale, c_scale, seed=12345): theta_dt = np.dtype(theta_cai["typestr"]) cdef RngState *rng = new RngState(seed) - cdef handle_t *h = new handle_t() + + handle = handle if handle is not None else Handle() + cdef handle_t *h = handle.getHandle() if out_dt == np.int32 and theta_dt == np.float32: rmat_rectangular_gen(deref(h), diff --git a/python/pylibraft/pylibraft/test/test_distance.py b/python/pylibraft/pylibraft/test/test_distance.py index b9b4ba9e30..7f35a25493 100644 --- a/python/pylibraft/pylibraft/test/test_distance.py +++ b/python/pylibraft/pylibraft/test/test_distance.py @@ -17,6 +17,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.distance import pairwise_distance from pylibraft.testing.utils import TestDeviceBuffer @@ -53,7 +54,10 @@ def test_distance(n_rows, n_cols, metric, order, dtype): input1_device = TestDeviceBuffer(input1, order) output_device = TestDeviceBuffer(output, order) + handle = Handle() pairwise_distance(input1_device, input1_device, output_device, metric) + handle.sync() + actual = output_device.copy_to_host() actual[actual <= 1e-5] = 0.0 diff --git a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py b/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py index b12cc30472..1ce1ee2d1f 100644 --- a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py +++ b/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py @@ -17,6 +17,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.distance import fused_l2_nn_argmin from pylibraft.testing.utils import TestDeviceBuffer @@ -41,7 +42,10 @@ def test_fused_l2_nn_minarg(n_rows, n_cols, n_clusters, dtype): input2_device = TestDeviceBuffer(input2, "C") output_device = TestDeviceBuffer(output, "C") - fused_l2_nn_argmin(input1_device, input2_device, output_device, True) + handle = Handle() + fused_l2_nn_argmin(input1_device, input2_device, output_device, + True, handle=handle) + handle.sync() actual = output_device.copy_to_host() assert np.allclose(expected, actual, rtol=1e-4) diff --git a/python/pylibraft/pylibraft/test/test_random.py b/python/pylibraft/pylibraft/test/test_random.py index 8a04f707de..e0b7140f1c 100644 --- a/python/pylibraft/pylibraft/test/test_random.py +++ b/python/pylibraft/pylibraft/test/test_random.py @@ -16,6 +16,7 @@ import pytest import numpy as np +from pylibraft.common import Handle from pylibraft.random import rmat from pylibraft.testing.utils import TestDeviceBuffer @@ -46,14 +47,18 @@ def test_rmat(n_edges, r_scale, c_scale, dtype): theta, theta_device = generate_theta(r_scale, c_scale) out_buff = np.empty((n_edges, 2), dtype=dtype) output_device = TestDeviceBuffer(out_buff, "C") - rmat(output_device, theta_device, r_scale, c_scale, 12345) + + handle = Handle() + rmat(output_device, theta_device, r_scale, c_scale, 12345, handle=handle) + handle.sync() output = output_device.copy_to_host() # a more rigorous tests have been done at the c++ level assert np.all(output[:, 0] >= 0) assert np.all(output[:, 0] < 2**r_scale) assert np.all(output[:, 1] >= 0) assert np.all(output[:, 1] < 2**c_scale) - rmat(output_device, theta_device, r_scale, c_scale, 12345) + rmat(output_device, theta_device, r_scale, c_scale, 12345, handle=handle) + handle.sync() output1 = output_device.copy_to_host() assert np.all(np.equal(output, output1)) diff --git a/python/pylibraft/pylibraft/test/test_interruptible.py b/python/pylibraft/pylibraft/test/test_z_interruptible.py similarity index 100% rename from python/pylibraft/pylibraft/test/test_interruptible.py rename to python/pylibraft/pylibraft/test/test_z_interruptible.py From 1b9f8c8614111715ba253e07a6a75036dace4a1e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 24 Oct 2022 15:09:40 -0400 Subject: [PATCH 02/27] Fixing style --- python/pylibraft/pylibraft/distance/fused_l2_nn.pyx | 2 +- python/pylibraft/pylibraft/distance/pairwise_distance.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index a6da3dad48..152b69f96c 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -115,7 +115,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): d_ptr = output_cai["data"][0] - handle = handle if handle != None else Handle() + handle = handle if handle is not None else Handle() cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index af822698bc..347703ebcd 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -152,7 +152,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): y_ptr = y_cai["data"][0] d_ptr = dists_cai["data"][0] - handle = handle if handle != None else Handle() + handle = handle if handle is not None else Handle() cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) From 25cdb3a54542ab63a15eddcbd4f9095e659dbde1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 24 Oct 2022 15:18:05 -0400 Subject: [PATCH 03/27] Updating examples --- python/pylibraft/pylibraft/distance/fused_l2_nn.pyx | 8 +++++++- python/pylibraft/pylibraft/distance/pairwise_distance.pyx | 7 ++++++- .../pylibraft/random/rmat_rectangular_generator.pyx | 6 ++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index 152b69f96c..106cd3f56f 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -91,9 +91,15 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): dtype=cp.float32) output = cp.empty((n_samples, 1), dtype=cp.int32) + # A single RAFT handle can optionally be used across + # pylibraft functions. handle = Handle() + ... fused_l2_nn_argmin(in1, in2, output, handle=handle) - handle.sync() + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() # """ x_cai = X.__cuda_array_interface__ diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 347703ebcd..664d77462b 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -129,9 +129,14 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): dtype=cp.float32) output = cp.empty((n_samples, n_samples), dtype=cp.float32) + # A single RAFT handle can optionally be used across + # pylibraft functions. handle = Handle() + ... pairwise_distance(in1, in2, output, metric="euclidean", handle=handle) - handle.sync() + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized """ x_cai = X.__cuda_array_interface__ diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx index 6cef28d150..b2605bb9e1 100644 --- a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -108,8 +108,14 @@ def rmat(out, theta, r_scale, c_scale, seed=12345, handle=None): out = cp.empty((n_edges, 2), dtype=cp.int32) theta = cp.random.random_sample(theta_len, dtype=cp.float32) + # A single RAFT handle can optionally be used across + # pylibraft functions. handle = Handle() + ... rmat(out, theta, r_scale, c_scale, handle=handle) + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized handle.sync() """ From e0cb66d791e847bbb5fba7c17a679e6bfb8a1c57 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 24 Oct 2022 15:30:09 -0400 Subject: [PATCH 04/27] Removing accidentally checked in files --- .../pylibraft/distance/fused_l2_nn.pyx | 2 +- .../pylibraft/distance/pairwise_distance.pyx | 2 +- .../pylibraft/neighbors/CMakeLists.txt | 28 --- .../pylibraft/neighbors/__init__.pxd | 14 -- .../pylibraft/pylibraft/neighbors/__init__.py | 14 -- .../pylibraft/pylibraft/neighbors/ivf_pq.pyx | 192 ------------------ .../random/rmat_rectangular_generator.pyx | 2 +- 7 files changed, 3 insertions(+), 251 deletions(-) delete mode 100644 python/pylibraft/pylibraft/neighbors/CMakeLists.txt delete mode 100644 python/pylibraft/pylibraft/neighbors/__init__.pxd delete mode 100644 python/pylibraft/pylibraft/neighbors/__init__.py delete mode 100644 python/pylibraft/pylibraft/neighbors/ivf_pq.pyx diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index 106cd3f56f..e312050aaa 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -91,7 +91,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): dtype=cp.float32) output = cp.empty((n_samples, 1), dtype=cp.int32) - # A single RAFT handle can optionally be used across + # A single RAFT handle can optionally be reused across # pylibraft functions. handle = Handle() ... diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 664d77462b..7fc3ce10cf 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -129,7 +129,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): dtype=cp.float32) output = cp.empty((n_samples, n_samples), dtype=cp.float32) - # A single RAFT handle can optionally be used across + # A single RAFT handle can optionally be reused across # pylibraft functions. handle = Handle() ... diff --git a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt b/python/pylibraft/pylibraft/neighbors/CMakeLists.txt deleted file mode 100644 index 9d7d51c6fd..0000000000 --- a/python/pylibraft/pylibraft/neighbors/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -# Set the list of Cython files to build -set(cython_sources ivf_pq.pyx) -set(linked_libraries raft::raft raft::distance) - -# Build all of the Cython targets -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" - MODULE_PREFIX neighbors_) - -foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) - set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") -endforeach() diff --git a/python/pylibraft/pylibraft/neighbors/__init__.pxd b/python/pylibraft/pylibraft/neighbors/__init__.pxd deleted file mode 100644 index 273b4497cc..0000000000 --- a/python/pylibraft/pylibraft/neighbors/__init__.pxd +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/pylibraft/pylibraft/neighbors/__init__.py b/python/pylibraft/pylibraft/neighbors/__init__.py deleted file mode 100644 index 273b4497cc..0000000000 --- a/python/pylibraft/pylibraft/neighbors/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx deleted file mode 100644 index 8d55402e23..0000000000 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq.pyx +++ /dev/null @@ -1,192 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -import numpy as np - -from libc.stdint cimport uintptr_t -from cython.operator cimport dereference as deref - -from libcpp cimport bool -from .distance_type cimport DistanceType -from pylibraft.common.handle cimport handle_t - - -def is_c_cont(cai, dt): - return "strides" not in cai or \ - cai["strides"] is None or \ - cai["strides"][1] == dt.itemsize - - -cdef extern from "raft_distance/pairwise_distance.hpp" \ - namespace "raft::distance::runtime": - - cdef void pairwise_distance(const handle_t &handle, - float *x, - float *y, - float *dists, - int m, - int n, - int k, - DistanceType metric, - bool isRowMajor, - float metric_arg) - - cdef void pairwise_distance(const handle_t &handle, - double *x, - double *y, - double *dists, - int m, - int n, - int k, - DistanceType metric, - bool isRowMajor, - float metric_arg) - -DISTANCE_TYPES = { - "l2": DistanceType.L2SqrtUnexpanded, - "sqeuclidean": DistanceType.L2Unexpanded, - "euclidean": DistanceType.L2SqrtUnexpanded, - "l1": DistanceType.L1, - "cityblock": DistanceType.L1, - "inner_product": DistanceType.InnerProduct, - "chebyshev": DistanceType.Linf, - "canberra": DistanceType.Canberra, - "cosine": DistanceType.CosineExpanded, - "lp": DistanceType.LpUnexpanded, - "correlation": DistanceType.CorrelationExpanded, - "jaccard": DistanceType.JaccardExpanded, - "hellinger": DistanceType.HellingerExpanded, - "braycurtis": DistanceType.BrayCurtis, - "jensenshannon": DistanceType.JensenShannon, - "hamming": DistanceType.HammingUnexpanded, - "kl_divergence": DistanceType.KLDivergence, - "minkowski": DistanceType.LpUnexpanded, - "russellrao": DistanceType.RusselRaoExpanded, - "dice": DistanceType.DiceExpanded -} - -SUPPORTED_DISTANCES = ["euclidean", "l1", "cityblock", "l2", "inner_product", - "chebyshev", "minkowski", "canberra", "kl_divergence", - "correlation", "russellrao", "hellinger", "lp", - "hamming", "jensenshannon", "cosine", "sqeuclidean"] - - -def distance(X, Y, dists, metric="euclidean", p=2.0): - """ - Compute pairwise distances between X and Y - - Valid values for metric: - ["euclidean", "l2", "l1", "cityblock", "inner_product", - "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", - "kl_divergence", "russellrao", "minkowski", "correlation", - "cosine"] - - Parameters - ---------- - - X : CUDA array interface compliant matrix shape (m, k) - Y : CUDA array interface compliant matrix shape (n, k) - dists : Writable CUDA array interface matrix shape (m, n) - metric : string denoting the metric type (default="euclidean") - p : metric parameter (currently used only for "minkowski") - - Examples - -------- - - .. code-block:: python - - import cupy as cp - - from pylibraft.distance import pairwise_distance - - n_samples = 5000 - n_features = 50 - - in1 = cp.random.random_sample((n_samples, n_features), - dtype=cp.float32) - in2 = cp.random.random_sample((n_samples, n_features), - dtype=cp.float32) - output = cp.empty((n_samples, n_samples), dtype=cp.float32) - - pairwise_distance(in1, in2, output, metric="euclidean") - """ - - x_cai = X.__cuda_array_interface__ - y_cai = Y.__cuda_array_interface__ - dists_cai = dists.__cuda_array_interface__ - - m = x_cai["shape"][0] - n = y_cai["shape"][0] - - x_k = x_cai["shape"][1] - y_k = y_cai["shape"][1] - - if x_k != y_k: - raise ValueError("Inputs must have same number of columns. " - "a=%s, b=%s" % (x_k, y_k)) - - x_ptr = x_cai["data"][0] - y_ptr = y_cai["data"][0] - d_ptr = dists_cai["data"][0] - - cdef handle_t *h = new handle_t() - - x_dt = np.dtype(x_cai["typestr"]) - y_dt = np.dtype(y_cai["typestr"]) - d_dt = np.dtype(dists_cai["typestr"]) - - x_c_contiguous = is_c_cont(x_cai, x_dt) - y_c_contiguous = is_c_cont(y_cai, y_dt) - - if x_c_contiguous != y_c_contiguous: - raise ValueError("Inputs must have matching strides") - - if metric not in SUPPORTED_DISTANCES: - raise ValueError("metric %s is not supported" % metric) - - cdef DistanceType distance_type = DISTANCE_TYPES[metric] - - if x_dt != y_dt or x_dt != d_dt: - raise ValueError("Inputs must have the same dtypes") - - if x_dt == np.float32: - pairwise_distance(deref(h), - x_ptr, - y_ptr, - d_ptr, - m, - n, - x_k, - distance_type, - x_c_contiguous, - p) - elif x_dt == np.float64: - pairwise_distance(deref(h), - x_ptr, - y_ptr, - d_ptr, - m, - n, - x_k, - distance_type, - x_c_contiguous, - p) - else: - raise ValueError("dtype %s not supported" % x_dt) diff --git a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx index b2605bb9e1..cef19295ac 100644 --- a/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx +++ b/python/pylibraft/pylibraft/random/rmat_rectangular_generator.pyx @@ -108,7 +108,7 @@ def rmat(out, theta, r_scale, c_scale, seed=12345, handle=None): out = cp.empty((n_edges, 2), dtype=cp.int32) theta = cp.random.random_sample(theta_len, dtype=cp.float32) - # A single RAFT handle can optionally be used across + # A single RAFT handle can optionally be reused across # pylibraft functions. handle = Handle() ... From a6780b4e3e7f1464f67c99592e7010523f495be8 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 24 Oct 2022 19:16:23 -0400 Subject: [PATCH 05/27] Fixing code blocks --- python/pylibraft/pylibraft/distance/fused_l2_nn.pyx | 2 +- python/pylibraft/pylibraft/distance/pairwise_distance.pyx | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx index e312050aaa..880bb46a05 100644 --- a/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx +++ b/python/pylibraft/pylibraft/distance/fused_l2_nn.pyx @@ -99,7 +99,7 @@ def fused_l2_nn_argmin(X, Y, output, sqrt=True, handle=None): ... # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized - handle.sync() # + handle.sync() """ x_cai = X.__cuda_array_interface__ diff --git a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx index 7fc3ce10cf..0f7626e8d1 100644 --- a/python/pylibraft/pylibraft/distance/pairwise_distance.pyx +++ b/python/pylibraft/pylibraft/distance/pairwise_distance.pyx @@ -137,6 +137,7 @@ def distance(X, Y, dists, metric="euclidean", p=2.0, handle=None): ... # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized + handle.sync() """ x_cai = X.__cuda_array_interface__ From 7015cca6ee49b1333dfc43adea06651511bbbc14 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 15:44:07 -0400 Subject: [PATCH 06/27] Cleaning up kmeans internals (removing explicit const for mdspan, using const for underlying pointers where appropriate). Pulling centroid update logic into its own function. --- cpp/include/raft/cluster/detail/kmeans.cuh | 342 ++++++++++++------ .../raft/cluster/detail/kmeans_common.cuh | 48 +-- 2 files changed, 264 insertions(+), 126 deletions(-) diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 94fee3edbf..134cc1c45d 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -58,8 +58,8 @@ namespace detail { template void initRandom(const raft::handle_t& handle, const KMeansParams& params, - const raft::device_matrix_view& X, - const raft::device_matrix_view& centroids) + raft::device_matrix_view X, + raft::device_matrix_view centroids) { cudaStream_t stream = handle.get_stream(); auto n_clusters = params.n_clusters; @@ -83,8 +83,8 @@ void initRandom(const raft::handle_t& handle, template void kmeansPlusPlus(const raft::handle_t& handle, const KMeansParams& params, - const raft::device_matrix_view& X, - const raft::device_matrix_view& centroidsRawData, + raft::device_matrix_view X, + raft::device_matrix_view centroidsRawData, rmm::device_uvector& workspace) { cudaStream_t stream = handle.get_stream(); @@ -257,15 +257,134 @@ void kmeansPlusPlus(const raft::handle_t& handle, } /// <<<< Step-5 >>> } +/** + * + * @tparam DataT + * @tparam IndexT + * @param handle + * @param[in] X + * @param[in] weight + * @param[in] cur_centroids + * @param[in] l2norm_x + * @param[out] min_cluster_and_dist + * @param[out] new_centroids + * @param[out] weight_in_cluster + * @param[inout] workspace + */ +template +void update_centroids( + const raft::handle_t& handle, + const KMeansParams& params, + raft::device_matrix_view X, + raft::device_vector_view weight, + raft::device_matrix_view cur_centroids, + raft::device_vector_view l2norm_x, + raft::device_vector_view, IndexT> min_cluster_and_dist, + raft::device_matrix_view new_centroids, + raft::device_vector_view weight_in_cluster, + rmm::device_uvector& L2NormBuf_OR_DistBuf, + rmm::device_uvector& workspace) +{ + auto n_clusters = cur_centroids.extent(0); + auto n_samples = X.extent(0); + + // computes minClusterAndDistance[0:n_samples) where + // minClusterAndDistance[i] is a pair where + // 'key' is index to a sample in 'centroids' (index of the nearest + // centroid) and 'value' is the distance between the sample 'X[i]' and the + // 'centroid[key]' + detail::minClusterAndDistanceCompute(handle, + params, + X, + cur_centroids, + min_cluster_and_dist, + l2norm_x, + L2NormBuf_OR_DistBuf, + workspace); + + // Using TransformInputIteratorT to dereference an array of + // raft::KeyValuePair and converting them to just return the Key to be used + // in reduce_rows_by_key prims + detail::KeyValueIndexOp conversion_op; + cub::TransformInputIterator, + raft::KeyValuePair*> + itr(min_cluster_and_dist.data_handle(), conversion_op); + + workspace.resize(n_samples, handle.get_stream()); + + // Calculates weighted sum of all the samples assigned to cluster-i and store the + // result in newCentroids[i] + raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), + X.extent(1), + itr, + weight.data_handle(), + workspace.data(), + X.extent(0), + X.extent(1), + n_clusters, + new_centroids.data_handle(), + handle.get_stream()); + + // Reduce weights by key to compute weight in each cluster + raft::linalg::reduce_cols_by_key(weight.data_handle(), + itr, + weight_in_cluster.data_handle(), + (IndexT)1, + (IndexT)weight.extent(0), + (IndexT)n_clusters, + handle.get_stream()); + + // Computes newCentroids[i] = newCentroids[i]/wtInCluster[i] where + // newCentroids[n_clusters x n_features] - 2D array, newCentroids[i] has sum of all the + // samples assigned to cluster-i wtInCluster[n_clusters] - 1D array, wtInCluster[i] contains # + // of samples in cluster-i. + // Note - when wtInCluster[i] is 0, newCentroid[i] is reset to 0 + raft::linalg::matrixVectorOp( + new_centroids.data_handle(), + new_centroids.data_handle(), + weight_in_cluster.data_handle(), + new_centroids.extent(1), + new_centroids.extent(0), + true, + false, + [=] __device__(DataT mat, DataT vec) { + if (vec == 0) + return DataT(0); + else + return mat / vec; + }, + handle.get_stream()); + + // copy centroids[i] to newCentroids[i] when wtInCluster[i] is 0 + cub::ArgIndexInputIterator itr_wt(weight_in_cluster.data_handle()); + raft::matrix::gather_if( + const_cast(cur_centroids.data_handle()), + static_cast(cur_centroids.extent(1)), + static_cast(cur_centroids.extent(0)), + itr_wt, + itr_wt, + static_cast(weight_in_cluster.size()), + new_centroids.data_handle(), + [=] __device__(raft::KeyValuePair map) { // predicate + // copy when the # of samples in the cluster is 0 + return map.value == 0; + }, + [=] __device__(raft::KeyValuePair map) { // map + return map.key; + }, + handle.get_stream()); +} + // TODO: Resizing is needed to use mdarray instead of rmm::device_uvector template void kmeans_fit_main(const raft::handle_t& handle, const KMeansParams& params, - const raft::device_matrix_view& X, - const raft::device_vector_view& weight, - const raft::device_matrix_view& centroidsRawData, - const raft::host_scalar_view& inertia, - const raft::host_scalar_view& n_iter, + raft::device_matrix_view X, + raft::device_vector_view weight, + raft::device_matrix_view centroidsRawData, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter, rmm::device_uvector& workspace) { logger::get(RAFT_NAME).set_level(params.verbosity); @@ -297,6 +416,9 @@ void kmeans_fit_main(const raft::handle_t& handle, // L2 norm of X: ||x||^2 auto L2NormX = raft::make_device_vector(handle, n_samples); + auto l2normx_view = + raft::make_device_vector_view(L2NormX.data_handle(), n_samples); + if (metric == raft::distance::DistanceType::L2Expanded || metric == raft::distance::DistanceType::L2SqrtExpanded) { raft::linalg::rowNorm(L2NormX.data_handle(), @@ -323,95 +445,109 @@ void kmeans_fit_main(const raft::handle_t& handle, auto centroids = raft::make_device_matrix_view( centroidsRawData.data_handle(), n_clusters, n_features); - // computes minClusterAndDistance[0:n_samples) where - // minClusterAndDistance[i] is a pair where - // 'key' is index to a sample in 'centroids' (index of the nearest - // centroid) and 'value' is the distance between the sample 'X[i]' and the - // 'centroid[key]' - detail::minClusterAndDistanceCompute(handle, - params, - X, - centroids, - minClusterAndDistance.view(), - L2NormX.view(), - L2NormBuf_OR_DistBuf, - workspace); - - // Using TransformInputIteratorT to dereference an array of - // raft::KeyValuePair and converting them to just return the Key to be used - // in reduce_rows_by_key prims - detail::KeyValueIndexOp conversion_op; - cub::TransformInputIterator, - raft::KeyValuePair*> - itr(minClusterAndDistance.data_handle(), conversion_op); - - workspace.resize(n_samples, stream); - - // Calculates weighted sum of all the samples assigned to cluster-i and store the - // result in newCentroids[i] - raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), - X.extent(1), - itr, - weight.data_handle(), - workspace.data(), - X.extent(0), - X.extent(1), - n_clusters, - newCentroids.data_handle(), - stream); - - // Reduce weights by key to compute weight in each cluster - raft::linalg::reduce_cols_by_key(weight.data_handle(), - itr, - wtInCluster.data_handle(), - (IndexT)1, - (IndexT)weight.extent(0), - (IndexT)n_clusters, - stream); - - // Computes newCentroids[i] = newCentroids[i]/wtInCluster[i] where - // newCentroids[n_clusters x n_features] - 2D array, newCentroids[i] has sum of all the - // samples assigned to cluster-i wtInCluster[n_clusters] - 1D array, wtInCluster[i] contains # - // of samples in cluster-i. - // Note - when wtInCluster[i] is 0, newCentroid[i] is reset to 0 - raft::linalg::matrixVectorOp( - newCentroids.data_handle(), - newCentroids.data_handle(), - wtInCluster.data_handle(), - newCentroids.extent(1), - newCentroids.extent(0), - true, - false, - [=] __device__(DataT mat, DataT vec) { - if (vec == 0) - return DataT(0); - else - return mat / vec; - }, - stream); - - // copy centroids[i] to newCentroids[i] when wtInCluster[i] is 0 - cub::ArgIndexInputIterator itr_wt(wtInCluster.data_handle()); - raft::matrix::gather_if( - centroids.data_handle(), - centroids.extent(1), - centroids.extent(0), - itr_wt, - itr_wt, - wtInCluster.size(), - newCentroids.data_handle(), - [=] __device__(raft::KeyValuePair map) { // predicate - // copy when the # of samples in the cluster is 0 - if (map.value == 0) - return true; - else - return false; - }, - [=] __device__(raft::KeyValuePair map) { // map - return map.key; - }, - stream); + update_centroids(handle, + params, + X, + weight, + raft::make_device_matrix_view( + centroidsRawData.data_handle(), n_clusters, n_features), + l2normx_view, + minClusterAndDistance.view(), + newCentroids.view(), + wtInCluster.view(), + L2NormBuf_OR_DistBuf, + workspace); + + // // computes minClusterAndDistance[0:n_samples) where + // // minClusterAndDistance[i] is a pair where + // // 'key' is index to a sample in 'centroids' (index of the nearest + // // centroid) and 'value' is the distance between the sample 'X[i]' and the + // // 'centroid[key]' + // detail::minClusterAndDistanceCompute(handle, + // params, + // X, + // centroids, + // minClusterAndDistance.view(), + // L2NormX.view(), + // L2NormBuf_OR_DistBuf, + // workspace); + // + // // Using TransformInputIteratorT to dereference an array of + // // raft::KeyValuePair and converting them to just return the Key to be used + // // in reduce_rows_by_key prims + // detail::KeyValueIndexOp conversion_op; + // cub::TransformInputIterator, + // raft::KeyValuePair*> + // itr(minClusterAndDistance.data_handle(), conversion_op); + // + // workspace.resize(n_samples, stream); + // + // // Calculates weighted sum of all the samples assigned to cluster-i and store the + // // result in newCentroids[i] + // raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), + // X.extent(1), + // itr, + // weight.data_handle(), + // workspace.data(), + // X.extent(0), + // X.extent(1), + // n_clusters, + // newCentroids.data_handle(), + // stream); + // + // // Reduce weights by key to compute weight in each cluster + // raft::linalg::reduce_cols_by_key(weight.data_handle(), + // itr, + // wtInCluster.data_handle(), + // (IndexT)1, + // (IndexT)weight.extent(0), + // (IndexT)n_clusters, + // stream); + // + // // Computes newCentroids[i] = newCentroids[i]/wtInCluster[i] where + // // newCentroids[n_clusters x n_features] - 2D array, newCentroids[i] has sum of all the + // // samples assigned to cluster-i wtInCluster[n_clusters] - 1D array, wtInCluster[i] + // contains # + // // of samples in cluster-i. + // // Note - when wtInCluster[i] is 0, newCentroid[i] is reset to 0 + // raft::linalg::matrixVectorOp( + // newCentroids.data_handle(), + // newCentroids.data_handle(), + // wtInCluster.data_handle(), + // newCentroids.extent(1), + // newCentroids.extent(0), + // true, + // false, + // [=] __device__(DataT mat, DataT vec) { + // if (vec == 0) + // return DataT(0); + // else + // return mat / vec; + // }, + // stream); + // + // // copy centroids[i] to newCentroids[i] when wtInCluster[i] is 0 + // cub::ArgIndexInputIterator itr_wt(wtInCluster.data_handle()); + // raft::matrix::gather_if( + // centroids.data_handle(), + // centroids.extent(1), + // centroids.extent(0), + // itr_wt, + // itr_wt, + // wtInCluster.size(), + // newCentroids.data_handle(), + // [=] __device__(raft::KeyValuePair map) { // predicate + // // copy when the # of samples in the cluster is 0 + // if (map.value == 0) + // return true; + // else + // return false; + // }, + // [=] __device__(raft::KeyValuePair map) { // map + // return map.key; + // }, + // stream); // compute the squared norm between the newCentroids and the original // centroids, destructor releases the resource @@ -480,7 +616,7 @@ void kmeans_fit_main(const raft::handle_t& handle, X, centroids, minClusterAndDistance.view(), - L2NormX.view(), + l2normx_view, L2NormBuf_OR_DistBuf, workspace); @@ -543,8 +679,8 @@ void kmeans_fit_main(const raft::handle_t& handle, template void initScalableKMeansPlusPlus(const raft::handle_t& handle, const KMeansParams& params, - const raft::device_matrix_view X, - const raft::device_matrix_view centroidsRawData, + raft::device_matrix_view X, + raft::device_matrix_view centroidsRawData, rmm::device_uvector& workspace) { cudaStream_t stream = handle.get_stream(); @@ -992,12 +1128,14 @@ void kmeans_predict(handle_t const& handle, // 'key' is index to a sample in 'centroids' (index of the nearest // centroid) and 'value' is the distance between the sample 'X[i]' and the // 'centroid[key]' + auto l2normx_view = + raft::make_device_vector_view(L2NormX.data_handle(), n_samples); detail::minClusterAndDistanceCompute(handle, params, X, centroids, minClusterAndDistance.view(), - L2NormX.view(), + l2normx_view, L2NormBuf_OR_DistBuf, workspace); diff --git a/cpp/include/raft/cluster/detail/kmeans_common.cuh b/cpp/include/raft/cluster/detail/kmeans_common.cuh index d4dd565ea0..4626df4157 100644 --- a/cpp/include/raft/cluster/detail/kmeans_common.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_common.cuh @@ -126,7 +126,7 @@ void countLabels(const raft::handle_t& handle, template void checkWeight(const raft::handle_t& handle, - const raft::device_vector_view& weight, + raft::device_vector_view weight, rmm::device_uvector& workspace) { cudaStream_t stream = handle.get_stream(); @@ -181,9 +181,9 @@ IndexT getCentroidsBatchSize(const KMeansParams& params, IndexT n_local_clusters template void computeClusterCost(const raft::handle_t& handle, - const raft::device_vector_view& minClusterDistance, + raft::device_vector_view minClusterDistance, rmm::device_uvector& workspace, - const raft::device_scalar_view& clusterCost, + raft::device_scalar_view clusterCost, ReductionOpT reduction_op) { cudaStream_t stream = handle.get_stream(); @@ -211,9 +211,9 @@ void computeClusterCost(const raft::handle_t& handle, template void sampleCentroids(const raft::handle_t& handle, - const raft::device_matrix_view& X, - const raft::device_vector_view& minClusterDistance, - const raft::device_vector_view& isSampleCentroid, + raft::device_matrix_view X, + raft::device_vector_view minClusterDistance, + raft::device_vector_view isSampleCentroid, SamplingOp& select_op, rmm::device_uvector& inRankCp, rmm::device_uvector& workspace) @@ -277,9 +277,9 @@ void sampleCentroids(const raft::handle_t& handle, // result will be stored in 'pairwiseDistance[n x k]' template void pairwise_distance_kmeans(const raft::handle_t& handle, - const raft::device_matrix_view X, - const raft::device_matrix_view centroids, - const raft::device_matrix_view pairwiseDistance, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_matrix_view pairwiseDistance, rmm::device_uvector& workspace, raft::distance::DistanceType metric) { @@ -305,8 +305,8 @@ void pairwise_distance_kmeans(const raft::handle_t& handle, // in 'out' does not modify the input template void shuffleAndGather(const raft::handle_t& handle, - const raft::device_matrix_view& in, - const raft::device_matrix_view& out, + raft::device_matrix_view in, + raft::device_matrix_view out, uint32_t n_samples_to_gather, uint64_t seed, rmm::device_uvector* workspace = nullptr) @@ -354,10 +354,10 @@ template void minClusterAndDistanceCompute( const raft::handle_t& handle, const KMeansParams& params, - const raft::device_matrix_view X, - const raft::device_matrix_view centroids, - const raft::device_vector_view, IndexT> minClusterAndDistance, - const raft::device_vector_view L2NormX, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_vector_view, IndexT> minClusterAndDistance, + raft::device_vector_view L2NormX, rmm::device_uvector& L2NormBuf_OR_DistBuf, rmm::device_uvector& workspace) { @@ -414,7 +414,7 @@ void minClusterAndDistanceCompute( minClusterAndDistance.data_handle() + dIdx, ns); auto L2NormXView = - raft::make_device_vector_view(L2NormX.data_handle() + dIdx, ns); + raft::make_device_vector_view(L2NormX.data_handle() + dIdx, ns); if (metric == raft::distance::DistanceType::L2Expanded || metric == raft::distance::DistanceType::L2SqrtExpanded) { @@ -484,10 +484,10 @@ void minClusterAndDistanceCompute( template void minClusterDistanceCompute(const raft::handle_t& handle, const KMeansParams& params, - const raft::device_matrix_view& X, - const raft::device_matrix_view& centroids, - const raft::device_vector_view& minClusterDistance, - const raft::device_vector_view& L2NormX, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_vector_view minClusterDistance, + raft::device_vector_view L2NormX, rmm::device_uvector& L2NormBuf_OR_DistBuf, rmm::device_uvector& workspace) { @@ -609,11 +609,11 @@ void minClusterDistanceCompute(const raft::handle_t& handle, template void countSamplesInCluster(const raft::handle_t& handle, const KMeansParams& params, - const raft::device_matrix_view& X, - const raft::device_vector_view L2NormX, - const raft::device_matrix_view centroids, + raft::device_matrix_view X, + raft::device_vector_view L2NormX, + raft::device_matrix_view centroids, rmm::device_uvector& workspace, - const raft::device_vector_view sampleCountInCluster) + raft::device_vector_view sampleCountInCluster) { cudaStream_t stream = handle.get_stream(); auto n_samples = X.extent(0); From ef230d0a886099c5b40bf9ac407e9d59b3877dc3 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 15:48:36 -0400 Subject: [PATCH 07/27] Removing commented out code --- cpp/include/raft/cluster/detail/kmeans.cuh | 91 ---------------------- 1 file changed, 91 deletions(-) diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 134cc1c45d..338954efb6 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -458,97 +458,6 @@ void kmeans_fit_main(const raft::handle_t& handle, L2NormBuf_OR_DistBuf, workspace); - // // computes minClusterAndDistance[0:n_samples) where - // // minClusterAndDistance[i] is a pair where - // // 'key' is index to a sample in 'centroids' (index of the nearest - // // centroid) and 'value' is the distance between the sample 'X[i]' and the - // // 'centroid[key]' - // detail::minClusterAndDistanceCompute(handle, - // params, - // X, - // centroids, - // minClusterAndDistance.view(), - // L2NormX.view(), - // L2NormBuf_OR_DistBuf, - // workspace); - // - // // Using TransformInputIteratorT to dereference an array of - // // raft::KeyValuePair and converting them to just return the Key to be used - // // in reduce_rows_by_key prims - // detail::KeyValueIndexOp conversion_op; - // cub::TransformInputIterator, - // raft::KeyValuePair*> - // itr(minClusterAndDistance.data_handle(), conversion_op); - // - // workspace.resize(n_samples, stream); - // - // // Calculates weighted sum of all the samples assigned to cluster-i and store the - // // result in newCentroids[i] - // raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), - // X.extent(1), - // itr, - // weight.data_handle(), - // workspace.data(), - // X.extent(0), - // X.extent(1), - // n_clusters, - // newCentroids.data_handle(), - // stream); - // - // // Reduce weights by key to compute weight in each cluster - // raft::linalg::reduce_cols_by_key(weight.data_handle(), - // itr, - // wtInCluster.data_handle(), - // (IndexT)1, - // (IndexT)weight.extent(0), - // (IndexT)n_clusters, - // stream); - // - // // Computes newCentroids[i] = newCentroids[i]/wtInCluster[i] where - // // newCentroids[n_clusters x n_features] - 2D array, newCentroids[i] has sum of all the - // // samples assigned to cluster-i wtInCluster[n_clusters] - 1D array, wtInCluster[i] - // contains # - // // of samples in cluster-i. - // // Note - when wtInCluster[i] is 0, newCentroid[i] is reset to 0 - // raft::linalg::matrixVectorOp( - // newCentroids.data_handle(), - // newCentroids.data_handle(), - // wtInCluster.data_handle(), - // newCentroids.extent(1), - // newCentroids.extent(0), - // true, - // false, - // [=] __device__(DataT mat, DataT vec) { - // if (vec == 0) - // return DataT(0); - // else - // return mat / vec; - // }, - // stream); - // - // // copy centroids[i] to newCentroids[i] when wtInCluster[i] is 0 - // cub::ArgIndexInputIterator itr_wt(wtInCluster.data_handle()); - // raft::matrix::gather_if( - // centroids.data_handle(), - // centroids.extent(1), - // centroids.extent(0), - // itr_wt, - // itr_wt, - // wtInCluster.size(), - // newCentroids.data_handle(), - // [=] __device__(raft::KeyValuePair map) { // predicate - // // copy when the # of samples in the cluster is 0 - // if (map.value == 0) - // return true; - // else - // return false; - // }, - // [=] __device__(raft::KeyValuePair map) { // map - // return map.key; - // }, - // stream); - // compute the squared norm between the newCentroids and the original // centroids, destructor releases the resource auto sqrdNorm = raft::make_device_scalar(handle, DataT(0)); From af0910e209c275e3d4e3d3426bbb8a11124f1215 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 17:31:52 -0400 Subject: [PATCH 08/27] Adding kmeans module to pylibraft for `update_centroids` function. --- cpp/CMakeLists.txt | 2 + cpp/include/raft/cluster/detail/kmeans.cuh | 66 +++--- .../raft/cluster/detail/kmeans_common.cuh | 33 +-- cpp/include/raft/cluster/kmeans.cuh | 111 ++++++++++- cpp/include/raft_distance/kmeans.hpp | 56 ++++++ cpp/src/distance/update_centroids.cuh | 62 ++++++ cpp/src/distance/update_centroids_double.cu | 43 ++++ cpp/src/distance/update_centroids_float.cu | 43 ++++ .../pylibraft/cluster/CMakeLists.txt | 28 +++ .../pylibraft/pylibraft/cluster/__init__.pxd | 14 ++ .../pylibraft/pylibraft/cluster/__init__.py | 14 ++ python/pylibraft/pylibraft/cluster/kmeans.pyx | 188 ++++++++++++++++++ 12 files changed, 612 insertions(+), 48 deletions(-) create mode 100644 cpp/include/raft_distance/kmeans.hpp create mode 100644 cpp/src/distance/update_centroids.cuh create mode 100644 cpp/src/distance/update_centroids_double.cu create mode 100644 cpp/src/distance/update_centroids_float.cu create mode 100644 python/pylibraft/pylibraft/cluster/CMakeLists.txt create mode 100644 python/pylibraft/pylibraft/cluster/__init__.pxd create mode 100644 python/pylibraft/pylibraft/cluster/__init__.py create mode 100644 python/pylibraft/pylibraft/cluster/kmeans.pyx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 58006d69e7..97bd6ebebb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -245,6 +245,8 @@ if(RAFT_COMPILE_DIST_LIBRARY) add_library(raft_distance_lib src/distance/pairwise_distance.cu src/distance/fused_l2_min_arg.cu + src/distance/update_centroids_float.cu + src/distance/update_centroids_double.cu src/distance/specializations/detail/canberra.cu src/distance/specializations/detail/chebyshev.cu src/distance/specializations/detail/correlation.cu diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 338954efb6..b213af6255 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -103,7 +103,7 @@ void kmeansPlusPlus(const raft::handle_t& handle, n_samples, n_trials); - auto dataBatchSize = getDataBatchSize(params, n_samples); + auto dataBatchSize = getDataBatchSize(params.batch_samples, n_samples); // temporary buffers std::vector h_wt(n_samples); @@ -149,12 +149,14 @@ void kmeansPlusPlus(const raft::handle_t& handle, // Calculate cluster distance, d^2(x, C), for all the points x in X to the nearest centroid detail::minClusterDistanceCompute(handle, - params, X, centroids, minClusterDistance.view(), L2NormX.view(), L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, workspace); RAFT_LOG_DEBUG(" k-means++ - Sampled %d/%d centroids", n_clusters_picked, n_clusters); @@ -262,27 +264,29 @@ void kmeansPlusPlus(const raft::handle_t& handle, * @tparam DataT * @tparam IndexT * @param handle - * @param[in] X - * @param[in] weight - * @param[in] cur_centroids + * @param[in] X input matrix (size n_samples, n_features) + * @param[in] weight number of samples currently assigned to each centroid + * @param[in] cur_centroids matrix of current centroids (size n_clusters, n_features) * @param[in] l2norm_x * @param[out] min_cluster_and_dist * @param[out] new_centroids - * @param[out] weight_in_cluster + * @param[out] new_weight * @param[inout] workspace */ template void update_centroids( const raft::handle_t& handle, - const KMeansParams& params, raft::device_matrix_view X, raft::device_vector_view weight, raft::device_matrix_view cur_centroids, raft::device_vector_view l2norm_x, raft::device_vector_view, IndexT> min_cluster_and_dist, raft::device_matrix_view new_centroids, - raft::device_vector_view weight_in_cluster, + raft::device_vector_view new_weight, rmm::device_uvector& L2NormBuf_OR_DistBuf, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids, rmm::device_uvector& workspace) { auto n_clusters = cur_centroids.extent(0); @@ -294,12 +298,14 @@ void update_centroids( // centroid) and 'value' is the distance between the sample 'X[i]' and the // 'centroid[key]' detail::minClusterAndDistanceCompute(handle, - params, X, cur_centroids, min_cluster_and_dist, l2norm_x, L2NormBuf_OR_DistBuf, + metric, + batch_samples, + batch_centroids, workspace); // Using TransformInputIteratorT to dereference an array of @@ -314,7 +320,7 @@ void update_centroids( workspace.resize(n_samples, handle.get_stream()); // Calculates weighted sum of all the samples assigned to cluster-i and store the - // result in newCentroids[i] + // result in new_centroids[i] raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), X.extent(1), itr, @@ -329,21 +335,21 @@ void update_centroids( // Reduce weights by key to compute weight in each cluster raft::linalg::reduce_cols_by_key(weight.data_handle(), itr, - weight_in_cluster.data_handle(), + new_weight.data_handle(), (IndexT)1, (IndexT)weight.extent(0), (IndexT)n_clusters, handle.get_stream()); - // Computes newCentroids[i] = newCentroids[i]/wtInCluster[i] where - // newCentroids[n_clusters x n_features] - 2D array, newCentroids[i] has sum of all the - // samples assigned to cluster-i wtInCluster[n_clusters] - 1D array, wtInCluster[i] contains # + // Computes new_centroids[i] = new_centroids[i]/new_weight[i] where + // new_centroids[n_clusters x n_features] - 2D array, new_centroids[i] has sum of all the + // samples assigned to cluster-i new_weight[n_clusters] - 1D array, new_weight[i] contains # // of samples in cluster-i. - // Note - when wtInCluster[i] is 0, newCentroid[i] is reset to 0 + // Note - when new_weight[i] is 0, new_centroids[i] is reset to 0 raft::linalg::matrixVectorOp( new_centroids.data_handle(), new_centroids.data_handle(), - weight_in_cluster.data_handle(), + new_weight.data_handle(), new_centroids.extent(1), new_centroids.extent(0), true, @@ -356,15 +362,15 @@ void update_centroids( }, handle.get_stream()); - // copy centroids[i] to newCentroids[i] when wtInCluster[i] is 0 - cub::ArgIndexInputIterator itr_wt(weight_in_cluster.data_handle()); + // copy centroids[i] to new_centroids[i] when new_weight[i] is 0 + cub::ArgIndexInputIterator itr_wt(new_weight.data_handle()); raft::matrix::gather_if( const_cast(cur_centroids.data_handle()), static_cast(cur_centroids.extent(1)), static_cast(cur_centroids.extent(0)), itr_wt, itr_wt, - static_cast(weight_in_cluster.size()), + static_cast(new_weight.size()), new_centroids.data_handle(), [=] __device__(raft::KeyValuePair map) { // predicate // copy when the # of samples in the cluster is 0 @@ -446,7 +452,6 @@ void kmeans_fit_main(const raft::handle_t& handle, centroidsRawData.data_handle(), n_clusters, n_features); update_centroids(handle, - params, X, weight, raft::make_device_matrix_view( @@ -456,6 +461,9 @@ void kmeans_fit_main(const raft::handle_t& handle, newCentroids.view(), wtInCluster.view(), L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, workspace); // compute the squared norm between the newCentroids and the original @@ -521,12 +529,14 @@ void kmeans_fit_main(const raft::handle_t& handle, centroidsRawData.data_handle(), n_clusters, n_features); detail::minClusterAndDistanceCompute(handle, - params, X, centroids, minClusterAndDistance.view(), l2normx_view, L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, workspace); // TODO: add different templates for InType of binaryOp to avoid thrust transform @@ -651,12 +661,14 @@ void initScalableKMeansPlusPlus(const raft::handle_t& handle, // <<< Step-2 >>>: psi <- phi_X (C) detail::minClusterDistanceCompute(handle, - params, X, potentialCentroids, minClusterDistanceVec.view(), L2NormX.view(), L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, workspace); // compute partial cluster cost from the samples in rank @@ -682,12 +694,14 @@ void initScalableKMeansPlusPlus(const raft::handle_t& handle, potentialCentroids.extent(0)); detail::minClusterDistanceCompute(handle, - params, X, potentialCentroids, minClusterDistanceVec.view(), L2NormX.view(), L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, workspace); detail::computeClusterCost(handle, @@ -1040,12 +1054,14 @@ void kmeans_predict(handle_t const& handle, auto l2normx_view = raft::make_device_vector_view(L2NormX.data_handle(), n_samples); detail::minClusterAndDistanceCompute(handle, - params, X, centroids, minClusterAndDistance.view(), l2normx_view, L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, workspace); // calculate cluster cost phi_x(C) @@ -1198,7 +1214,7 @@ void kmeans_transform(const raft::handle_t& handle, // Device-accessible allocation of expandable storage used as temorary buffers rmm::device_uvector workspace(0, stream); - auto dataBatchSize = getDataBatchSize(params, n_samples); + auto dataBatchSize = getDataBatchSize(params.batch_samples, n_samples); // tile over the input data and calculate distance matrix [n_samples x // n_clusters] diff --git a/cpp/include/raft/cluster/detail/kmeans_common.cuh b/cpp/include/raft/cluster/detail/kmeans_common.cuh index 4626df4157..a3005ce0d4 100644 --- a/cpp/include/raft/cluster/detail/kmeans_common.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_common.cuh @@ -166,16 +166,16 @@ void checkWeight(const raft::handle_t& handle, } template -IndexT getDataBatchSize(const KMeansParams& params, IndexT n_samples) +IndexT getDataBatchSize(int batch_samples, IndexT n_samples) { - auto minVal = std::min(static_cast(params.batch_samples), n_samples); + auto minVal = std::min(static_cast(batch_samples), n_samples); return (minVal == 0) ? n_samples : minVal; } template -IndexT getCentroidsBatchSize(const KMeansParams& params, IndexT n_local_clusters) +IndexT getCentroidsBatchSize(int batch_centroids, IndexT n_local_clusters) { - auto minVal = std::min(static_cast(params.batch_centroids), n_local_clusters); + auto minVal = std::min(static_cast(batch_centroids), n_local_clusters); return (minVal == 0) ? n_local_clusters : minVal; } @@ -353,21 +353,22 @@ void shuffleAndGather(const raft::handle_t& handle, template void minClusterAndDistanceCompute( const raft::handle_t& handle, - const KMeansParams& params, raft::device_matrix_view X, raft::device_matrix_view centroids, raft::device_vector_view, IndexT> minClusterAndDistance, raft::device_vector_view L2NormX, rmm::device_uvector& L2NormBuf_OR_DistBuf, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids, rmm::device_uvector& workspace) { cudaStream_t stream = handle.get_stream(); auto n_samples = X.extent(0); auto n_features = X.extent(1); auto n_clusters = centroids.extent(0); - auto metric = params.metric; - auto dataBatchSize = getDataBatchSize(params, n_samples); - auto centroidsBatchSize = getCentroidsBatchSize(params, n_clusters); + auto dataBatchSize = getDataBatchSize(batch_samples, n_samples); + auto centroidsBatchSize = getCentroidsBatchSize(batch_centroids, n_clusters); if (metric == raft::distance::DistanceType::L2Expanded || metric == raft::distance::DistanceType::L2SqrtExpanded) { @@ -380,6 +381,9 @@ void minClusterAndDistanceCompute( true, stream); } else { + // TODO: Unless pool allocator is used, passing in a workspace for this + // isn't really increasing performance because this needs to do a re-allocation + // anyways. L2NormBuf_OR_DistBuf.resize(dataBatchSize * centroidsBatchSize, stream); } @@ -483,22 +487,23 @@ void minClusterAndDistanceCompute( template void minClusterDistanceCompute(const raft::handle_t& handle, - const KMeansParams& params, raft::device_matrix_view X, raft::device_matrix_view centroids, raft::device_vector_view minClusterDistance, raft::device_vector_view L2NormX, rmm::device_uvector& L2NormBuf_OR_DistBuf, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids, rmm::device_uvector& workspace) { cudaStream_t stream = handle.get_stream(); auto n_samples = X.extent(0); auto n_features = X.extent(1); auto n_clusters = centroids.extent(0); - auto metric = params.metric; - auto dataBatchSize = getDataBatchSize(params, n_samples); - auto centroidsBatchSize = getCentroidsBatchSize(params, n_clusters); + auto dataBatchSize = getDataBatchSize(batch_samples, n_samples); + auto centroidsBatchSize = getCentroidsBatchSize(batch_centroids, n_clusters); if (metric == raft::distance::DistanceType::L2Expanded || metric == raft::distance::DistanceType::L2SqrtExpanded) { @@ -635,12 +640,14 @@ void countSamplesInCluster(const raft::handle_t& handle, // centroid) and 'value' is the distance between the sample 'X[i]' and the // 'centroid[key]' detail::minClusterAndDistanceCompute(handle, - params, X, (raft::device_matrix_view)centroids, minClusterAndDistance.view(), L2NormX, L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, workspace); // Using TransformInputIteratorT to dereference an array of raft::KeyValuePair diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index ef1fb44dfd..a3ebc07dd6 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -317,6 +317,61 @@ void cluster_cost(const raft::handle_t& handle, handle, minClusterDistance, workspace, clusterCost, reduction_op); } +/** + * @brief Update centroids given current centroids and number of points assigned to each centroid. + * This function also produces a vector of RAFT key/value pairs containing the cluster assignment + * for each point and its distance. + * + * @tparam DataT + * @tparam IndexT + * @param[in] handle: Raft handle to use for managing library resources + * @param[in] X: input matrix (size n_samples, n_features) + * @param[in] weight: number of samples currently assigned to each centroid (size n_clusters) + * @param[in] cur_centroids: matrix of current centroids (size n_clusters, n_features) + * @param[in] l2norm_x: optional array of l2 norms for each input data sample (size n_samples) + * @param[out] min_cluster_and_dist: output vector to store key/value pairs of min cluster indices + * and distances (size n_clusters) + * @param[out] new_centroids: output matrix of updated centroids (size n_clusters, n_features) + * @param[out] new_weight: number of samples assigned to each new centroid (size n_clusters) + * @param[in] metric: distance metric to use. Must be either L2Expanded, L2SqrtExpanded, + * L2Unexpanded, or L2SqrtUnexpanded + * @param[in] batch_samples: batch size for data samples when computing distances + * @param[in] batch_centroids: batch size for centroids when computing distances + */ +template +void update_centroids( + const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view weight, + raft::device_matrix_view cur_centroids, + std::optional> l2norm_x, + raft::device_vector_view, IndexT> min_cluster_and_dist, + raft::device_matrix_view new_centroids, + raft::device_vector_view new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids) +{ + // TODO: Passing these into the algorithm doesn't really present much of a benefit + // because they are being resized anyways. + rmm::device_uvector dist_workspace(0, handle.get_stream()); + rmm::device_uvector workspace(0, handle.get_stream()); + + detail::update_centroids(handle, + X, + weight, + cur_centroids, + l2norm_x.value(), + min_cluster_and_dist, + new_centroids, + new_weight, + dist_workspace, + metric, + batch_samples, + batch_centroids, + workspace); +} + /** * @brief Compute distance for every sample to it's nearest centroid * @@ -340,16 +395,26 @@ void cluster_cost(const raft::handle_t& handle, */ template void min_cluster_distance(const raft::handle_t& handle, - const KMeansParams& params, raft::device_matrix_view X, raft::device_matrix_view centroids, raft::device_vector_view minClusterDistance, raft::device_vector_view L2NormX, rmm::device_uvector& L2NormBuf_OR_DistBuf, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids, rmm::device_uvector& workspace) { - detail::minClusterDistanceCompute( - handle, params, X, centroids, minClusterDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); + detail::minClusterDistanceCompute(handle, + X, + centroids, + minClusterDistance, + L2NormX, + L2NormBuf_OR_DistBuf, + metric, + batch_samples, + batch_centroids, + workspace); } /** @@ -379,16 +444,26 @@ void min_cluster_distance(const raft::handle_t& handle, template void min_cluster_and_distance( const raft::handle_t& handle, - const KMeansParams& params, raft::device_matrix_view X, raft::device_matrix_view centroids, raft::device_vector_view, IndexT> minClusterAndDistance, raft::device_vector_view L2NormX, rmm::device_uvector& L2NormBuf_OR_DistBuf, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids, rmm::device_uvector& workspace) { - detail::minClusterAndDistanceCompute( - handle, params, X, centroids, minClusterAndDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); + detail::minClusterAndDistanceCompute(handle, + X, + centroids, + minClusterAndDistance, + L2NormX, + L2NormBuf_OR_DistBuf, + metric, + batch_samples, + batch_centroids, + workspace); } /** @@ -821,8 +896,16 @@ void minClusterDistanceCompute(const raft::handle_t& handle, rmm::device_uvector& L2NormBuf_OR_DistBuf, rmm::device_uvector& workspace) { - kmeans::min_cluster_distance( - handle, params, X, centroids, minClusterDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); + kmeans::min_cluster_distance(handle, + X, + centroids, + minClusterDistance, + L2NormX, + L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, + workspace); } /** @@ -860,8 +943,16 @@ void minClusterAndDistanceCompute( rmm::device_uvector& L2NormBuf_OR_DistBuf, rmm::device_uvector& workspace) { - kmeans::min_cluster_and_distance( - handle, params, X, centroids, minClusterAndDistance, L2NormX, L2NormBuf_OR_DistBuf, workspace); + kmeans::min_cluster_and_distance(handle, + X, + centroids, + minClusterAndDistance, + L2NormX, + L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, + workspace); } /** diff --git a/cpp/include/raft_distance/kmeans.hpp b/cpp/include/raft_distance/kmeans.hpp new file mode 100644 index 0000000000..fea6fffcc8 --- /dev/null +++ b/cpp/include/raft_distance/kmeans.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "update_centroids.cuh" +#include +#include +#include + +namespace raft::cluster::kmeans::runtime { + + void update_centroids( + const raft::handle_t& handle, + const float *X, + int n_samples, + int n_features, + int n_clusters, + const float *weight, + const float *cur_centroids, + const float *l2norm_x, + float *new_centroids, + float *new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids); + + + void update_centroids( + const raft::handle_t& handle, + const double *X, + int n_samples, + int n_features, + int n_clusters, + const double *weight, + const double *cur_centroids, + const double *l2norm_x, + double *new_centroids, + double *new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids); + + +} // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids.cuh b/cpp/src/distance/update_centroids.cuh new file mode 100644 index 0000000000..0518933560 --- /dev/null +++ b/cpp/src/distance/update_centroids.cuh @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace raft::cluster::kmeans::runtime { + + + template + void update_centroids( + const raft::handle_t& handle, + const DataT *X, + int n_samples, + int n_features, + int n_clusters, + const DataT *weight, + const DataT *cur_centroids, + const DataT *l2norm_x, + DataT *new_centroids, + DataT *new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids) { + + auto min_cluster_and_dist = raft::make_device_vector, IndexT>(handle, n_samples); + auto X_view = raft::make_device_matrix_view(X, n_samples, n_features); + auto weight_view = raft::make_device_vector_view(weight, n_clusters); + auto cur_centroids_view = raft::make_device_matrix_view(cur_centroids, n_clusters, n_features); + auto l2norm_x_view = raft::make_device_vector_view(l2norm_x, n_samples); + auto new_centroids_view = raft::make_device_matrix_view(new_centroids, n_clusters, n_features); + auto new_weight_view = raft::make_device_vector_view(new_weight, n_clusters); + + raft::cluster::kmeans::update_centroids( + handle, + X_view, + weight_view, + cur_centroids_view, + l2norm_x_view, + min_cluster_and_dist.view(), + new_centroids_view, + new_weight_view, + metric, + batch_samples, + batch_centroids); + } +} // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids_double.cu b/cpp/src/distance/update_centroids_double.cu new file mode 100644 index 0000000000..02955cf8da --- /dev/null +++ b/cpp/src/distance/update_centroids_double.cu @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "update_centroids.cuh" +#include +#include +#include + +namespace raft::cluster::kmeans::runtime { + + void update_centroids( + const raft::handle_t& handle, + const double *X, + int n_samples, + int n_features, + int n_clusters, + const double *weight, + const double *cur_centroids, + const double *l2norm_x, + double *new_centroids, + double *new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids) { + update_centroids(handle, X, n_samples, n_features, n_clusters, weight, cur_centroids, l2norm_x, + new_centroids, new_weight, metric, batch_samples, batch_centroids); + } + + +} // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids_float.cu b/cpp/src/distance/update_centroids_float.cu new file mode 100644 index 0000000000..974846bc0a --- /dev/null +++ b/cpp/src/distance/update_centroids_float.cu @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "update_centroids.cuh" +#include +#include +#include + +namespace raft::cluster::kmeans::runtime { + + void update_centroids( + const raft::handle_t& handle, + const float *X, + int n_samples, + int n_features, + int n_clusters, + const float *weight, + const float *cur_centroids, + const float *l2norm_x, + float *new_centroids, + float *new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids) { + update_centroids(handle, X, n_samples, n_features, n_clusters, weight, cur_centroids, l2norm_x, + new_centroids, new_weight, metric, batch_samples, batch_centroids); + } + + +} // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/python/pylibraft/pylibraft/cluster/CMakeLists.txt b/python/pylibraft/pylibraft/cluster/CMakeLists.txt new file mode 100644 index 0000000000..44e34e0213 --- /dev/null +++ b/python/pylibraft/pylibraft/cluster/CMakeLists.txt @@ -0,0 +1,28 @@ +# ============================================================================= +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources kmeans.pyx) +set(linked_libraries raft::raft raft::distance) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" + MODULE_PREFIX cluster_) + +foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../library") +endforeach() diff --git a/python/pylibraft/pylibraft/cluster/__init__.pxd b/python/pylibraft/pylibraft/cluster/__init__.pxd new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/cluster/__init__.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/cluster/__init__.py b/python/pylibraft/pylibraft/cluster/__init__.py new file mode 100644 index 0000000000..273b4497cc --- /dev/null +++ b/python/pylibraft/pylibraft/cluster/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx new file mode 100644 index 0000000000..2d7f9254d2 --- /dev/null +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -0,0 +1,188 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np + +from libc.stdint cimport uintptr_t +from cython.operator cimport dereference as deref + +from libcpp cimport bool +from .distance_type cimport DistanceType + +from pylibraft.common import Handle +from pylibraft.common.handle cimport handle_t + + +def is_c_cont(cai, dt): + return "strides" not in cai or \ + cai["strides"] is None or \ + cai["strides"][1] == dt.itemsize + + +cdef extern from "raft_distance/kmeans.hpp" \ + namespace "raft::cluster::kmeans::runtime": + + cdef void update_centroids( + const handle_t& handle, + const double *X, + int n_samples, + int n_features, + int n_clusters, + const double *weight, + const double *cur_centroids, + const double *l2norm_x, + double *new_centroids, + double *new_weight, + DistanceType metric, + int batch_samples, + int batch_centroids); + + cdef void update_centroids( + const handle_t& handle, + const float *X, + int n_samples, + int n_features, + int n_clusters, + const float *weight, + const float *cur_centroids, + const float *l2norm_x, + float *new_centroids, + float *new_weight, + DistanceType metric, + int batch_samples, + int batch_centroids); + +def update_centroids(X, cur_centroids, weight, l2norm_x, + new_centroids, new_weight, + metric="euclidean", + batch_samples, batch_centroids, + handle=None): + """ + Compute pairwise distances between X and Y + + Valid values for metric: + ["euclidean", "l2", "l1", "cityblock", "inner_product", + "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", + "kl_divergence", "russellrao", "minkowski", "correlation", + "cosine"] + + Parameters + ---------- + + X : CUDA array interface compliant matrix shape (m, k) + Y : CUDA array interface compliant matrix shape (n, k) + dists : Writable CUDA array interface matrix shape (m, n) + metric : string denoting the metric type (default="euclidean") + p : metric parameter (currently used only for "minkowski") + handle : Optional RAFT handle for reusing expensive CUDA resources + + Examples + -------- + + .. code-block:: python + + import cupy as cp + + from pylibraft.common import Handle + from pylibraft.distance import pairwise_distance + + n_samples = 5000 + n_features = 50 + + in1 = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + in2 = cp.random.random_sample((n_samples, n_features), + dtype=cp.float32) + output = cp.empty((n_samples, n_samples), dtype=cp.float32) + + # A single RAFT handle can optionally be reused across + # pylibraft functions. + handle = Handle() + ... + pairwise_distance(in1, in2, output, metric="euclidean", handle=handle) + ... + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() + """ + + x_cai = X.__cuda_array_interface__ + y_cai = Y.__cuda_array_interface__ + dists_cai = dists.__cuda_array_interface__ + + m = x_cai["shape"][0] + n = y_cai["shape"][0] + + x_k = x_cai["shape"][1] + y_k = y_cai["shape"][1] + + if x_k != y_k: + raise ValueError("Inputs must have same number of columns. " + "a=%s, b=%s" % (x_k, y_k)) + + x_ptr = x_cai["data"][0] + y_ptr = y_cai["data"][0] + d_ptr = dists_cai["data"][0] + + handle = handle if handle is not None else Handle() + cdef handle_t *h = handle.getHandle() + + x_dt = np.dtype(x_cai["typestr"]) + y_dt = np.dtype(y_cai["typestr"]) + d_dt = np.dtype(dists_cai["typestr"]) + + x_c_contiguous = is_c_cont(x_cai, x_dt) + y_c_contiguous = is_c_cont(y_cai, y_dt) + + if x_c_contiguous != y_c_contiguous: + raise ValueError("Inputs must have matching strides") + + if metric not in SUPPORTED_DISTANCES: + raise ValueError("metric %s is not supported" % metric) + + cdef DistanceType distance_type = DISTANCE_TYPES[metric] + + if x_dt != y_dt or x_dt != d_dt: + raise ValueError("Inputs must have the same dtypes") + + if x_dt == np.float32: + pairwise_distance(deref(h), + x_ptr, + y_ptr, + d_ptr, + m, + n, + x_k, + distance_type, + x_c_contiguous, + p) + elif x_dt == np.float64: + pairwise_distance(deref(h), + x_ptr, + y_ptr, + d_ptr, + m, + n, + x_k, + distance_type, + x_c_contiguous, + p) + else: + raise ValueError("dtype %s not supported" % x_dt) From 6a2a8e7495f1f0bea3828bf37610b8df17ad8699 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 18:54:40 -0400 Subject: [PATCH 09/27] compute_new_centroids test is returning results. Still need to validate if they are correct. --- cpp/include/raft/cluster/detail/kmeans.cuh | 14 +- cpp/include/raft/cluster/kmeans.cuh | 4 +- cpp/include/raft_distance/kmeans.hpp | 12 +- cpp/src/distance/update_centroids.cuh | 74 ++++---- cpp/src/distance/update_centroids_double.cu | 46 +++-- cpp/src/distance/update_centroids_float.cu | 46 +++-- python/pylibraft/CMakeLists.txt | 1 + python/pylibraft/pylibraft/cluster/kmeans.pyx | 164 +++++++++++------- .../pylibraft/pylibraft/distance/__init__.py | 3 +- .../pylibraft/pylibraft/test/test_kmeans.py | 86 +++++++++ 10 files changed, 299 insertions(+), 151 deletions(-) create mode 100644 python/pylibraft/pylibraft/test/test_kmeans.py diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index b213af6255..222ebf065a 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -277,8 +277,8 @@ template void update_centroids( const raft::handle_t& handle, raft::device_matrix_view X, + raft::device_matrix_view centroids, raft::device_vector_view weight, - raft::device_matrix_view cur_centroids, raft::device_vector_view l2norm_x, raft::device_vector_view, IndexT> min_cluster_and_dist, raft::device_matrix_view new_centroids, @@ -289,7 +289,7 @@ void update_centroids( int batch_centroids, rmm::device_uvector& workspace) { - auto n_clusters = cur_centroids.extent(0); + auto n_clusters = centroids.extent(0); auto n_samples = X.extent(0); // computes minClusterAndDistance[0:n_samples) where @@ -299,7 +299,7 @@ void update_centroids( // 'centroid[key]' detail::minClusterAndDistanceCompute(handle, X, - cur_centroids, + centroids, min_cluster_and_dist, l2norm_x, L2NormBuf_OR_DistBuf, @@ -365,9 +365,9 @@ void update_centroids( // copy centroids[i] to new_centroids[i] when new_weight[i] is 0 cub::ArgIndexInputIterator itr_wt(new_weight.data_handle()); raft::matrix::gather_if( - const_cast(cur_centroids.data_handle()), - static_cast(cur_centroids.extent(1)), - static_cast(cur_centroids.extent(0)), + const_cast(centroids.data_handle()), + static_cast(centroids.extent(1)), + static_cast(centroids.extent(0)), itr_wt, itr_wt, static_cast(new_weight.size()), @@ -453,9 +453,9 @@ void kmeans_fit_main(const raft::handle_t& handle, update_centroids(handle, X, - weight, raft::make_device_matrix_view( centroidsRawData.data_handle(), n_clusters, n_features), + weight, l2normx_view, minClusterAndDistance.view(), newCentroids.view(), diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index a3ebc07dd6..b9a4e442fc 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -342,8 +342,8 @@ template void update_centroids( const raft::handle_t& handle, raft::device_matrix_view X, + raft::device_matrix_view centroids, raft::device_vector_view weight, - raft::device_matrix_view cur_centroids, std::optional> l2norm_x, raft::device_vector_view, IndexT> min_cluster_and_dist, raft::device_matrix_view new_centroids, @@ -359,8 +359,8 @@ void update_centroids( detail::update_centroids(handle, X, + centroids, weight, - cur_centroids, l2norm_x.value(), min_cluster_and_dist, new_centroids, diff --git a/cpp/include/raft_distance/kmeans.hpp b/cpp/include/raft_distance/kmeans.hpp index fea6fffcc8..c093559a62 100644 --- a/cpp/include/raft_distance/kmeans.hpp +++ b/cpp/include/raft_distance/kmeans.hpp @@ -14,21 +14,19 @@ * limitations under the License. */ -#include "update_centroids.cuh" #include -#include -#include +#include namespace raft::cluster::kmeans::runtime { void update_centroids( - const raft::handle_t& handle, + raft::handle_t const &handle, const float *X, int n_samples, int n_features, int n_clusters, + const float *centroids, const float *weight, - const float *cur_centroids, const float *l2norm_x, float *new_centroids, float *new_weight, @@ -38,13 +36,13 @@ namespace raft::cluster::kmeans::runtime { void update_centroids( - const raft::handle_t& handle, + raft::handle_t const &handle, const double *X, int n_samples, int n_features, int n_clusters, + const double *centroids, const double *weight, - const double *cur_centroids, const double *l2norm_x, double *new_centroids, double *new_weight, diff --git a/cpp/src/distance/update_centroids.cuh b/cpp/src/distance/update_centroids.cuh index 0518933560..6085b51d13 100644 --- a/cpp/src/distance/update_centroids.cuh +++ b/cpp/src/distance/update_centroids.cuh @@ -21,42 +21,42 @@ namespace raft::cluster::kmeans::runtime { +template +void update_centroids(raft::handle_t const& handle, + const DataT* X, + int n_samples, + int n_features, + int n_clusters, + const DataT* centroids, + const DataT* weight, + const DataT* l2norm_x, + DataT* new_centroids, + DataT* new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids) +{ + auto min_cluster_and_dist = + raft::make_device_vector, IndexT>(handle, n_samples); + auto X_view = raft::make_device_matrix_view(X, n_samples, n_features); + auto centroids_view = + raft::make_device_matrix_view(centroids, n_clusters, n_features); + auto weight_view = raft::make_device_vector_view(weight, n_clusters); + auto l2norm_x_view = raft::make_device_vector_view(l2norm_x, n_samples); + auto new_centroids_view = + raft::make_device_matrix_view(new_centroids, n_clusters, n_features); + auto new_weight_view = raft::make_device_vector_view(new_weight, n_clusters); - template - void update_centroids( - const raft::handle_t& handle, - const DataT *X, - int n_samples, - int n_features, - int n_clusters, - const DataT *weight, - const DataT *cur_centroids, - const DataT *l2norm_x, - DataT *new_centroids, - DataT *new_weight, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids) { - - auto min_cluster_and_dist = raft::make_device_vector, IndexT>(handle, n_samples); - auto X_view = raft::make_device_matrix_view(X, n_samples, n_features); - auto weight_view = raft::make_device_vector_view(weight, n_clusters); - auto cur_centroids_view = raft::make_device_matrix_view(cur_centroids, n_clusters, n_features); - auto l2norm_x_view = raft::make_device_vector_view(l2norm_x, n_samples); - auto new_centroids_view = raft::make_device_matrix_view(new_centroids, n_clusters, n_features); - auto new_weight_view = raft::make_device_vector_view(new_weight, n_clusters); - - raft::cluster::kmeans::update_centroids( - handle, - X_view, - weight_view, - cur_centroids_view, - l2norm_x_view, - min_cluster_and_dist.view(), - new_centroids_view, - new_weight_view, - metric, - batch_samples, - batch_centroids); - } + raft::cluster::kmeans::update_centroids(handle, + X_view, + centroids_view, + weight_view, + l2norm_x_view, + min_cluster_and_dist.view(), + new_centroids_view, + new_weight_view, + metric, + batch_samples, + batch_centroids); +} } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids_double.cu b/cpp/src/distance/update_centroids_double.cu index 02955cf8da..b508e0d9b7 100644 --- a/cpp/src/distance/update_centroids_double.cu +++ b/cpp/src/distance/update_centroids_double.cu @@ -21,23 +21,33 @@ namespace raft::cluster::kmeans::runtime { - void update_centroids( - const raft::handle_t& handle, - const double *X, - int n_samples, - int n_features, - int n_clusters, - const double *weight, - const double *cur_centroids, - const double *l2norm_x, - double *new_centroids, - double *new_weight, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids) { - update_centroids(handle, X, n_samples, n_features, n_clusters, weight, cur_centroids, l2norm_x, - new_centroids, new_weight, metric, batch_samples, batch_centroids); - } - +void update_centroids(raft::handle_t const& handle, + const double* X, + int n_samples, + int n_features, + int n_clusters, + const double* centroids, + const double* weight, + const double* l2norm_x, + double* new_centroids, + double* new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids) +{ + update_centroids(handle, + X, + n_samples, + n_features, + n_clusters, + centroids, + weight, + l2norm_x, + new_centroids, + new_weight, + metric, + batch_samples, + batch_centroids); +} } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids_float.cu b/cpp/src/distance/update_centroids_float.cu index 974846bc0a..8974d32e20 100644 --- a/cpp/src/distance/update_centroids_float.cu +++ b/cpp/src/distance/update_centroids_float.cu @@ -21,23 +21,33 @@ namespace raft::cluster::kmeans::runtime { - void update_centroids( - const raft::handle_t& handle, - const float *X, - int n_samples, - int n_features, - int n_clusters, - const float *weight, - const float *cur_centroids, - const float *l2norm_x, - float *new_centroids, - float *new_weight, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids) { - update_centroids(handle, X, n_samples, n_features, n_clusters, weight, cur_centroids, l2norm_x, - new_centroids, new_weight, metric, batch_samples, batch_centroids); - } - +void update_centroids(raft::handle_t const& handle, + const float* X, + int n_samples, + int n_features, + int n_clusters, + const float* centroids, + const float* weight, + const float* l2norm_x, + float* new_centroids, + float* new_weight, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids) +{ + update_centroids(handle, + X, + n_samples, + n_features, + n_clusters, + centroids, + weight, + l2norm_x, + new_centroids, + new_weight, + metric, + batch_samples, + batch_centroids); +} } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index b90465ce32..f3dd7eaaa2 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -71,3 +71,4 @@ rapids_cython_init() add_subdirectory(pylibraft/common) add_subdirectory(pylibraft/distance) add_subdirectory(pylibraft/random) +add_subdirectory(pylibraft/cluster) diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index 2d7f9254d2..c52271ae00 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -24,11 +24,13 @@ from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref from libcpp cimport bool -from .distance_type cimport DistanceType +from pylibraft.distance.distance_type cimport DistanceType from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t +from pylibraft.distance import DISTANCE_TYPES + def is_c_cont(cai, dt): return "strides" not in cai or \ @@ -45,8 +47,8 @@ cdef extern from "raft_distance/kmeans.hpp" \ int n_samples, int n_features, int n_clusters, + const double *centroids, const double *weight, - const double *cur_centroids, const double *l2norm_x, double *new_centroids, double *new_weight, @@ -60,8 +62,8 @@ cdef extern from "raft_distance/kmeans.hpp" \ int n_samples, int n_features, int n_clusters, + const float *centroids, const float *weight, - const float *cur_centroids, const float *l2norm_x, float *new_centroids, float *new_weight, @@ -69,28 +71,38 @@ cdef extern from "raft_distance/kmeans.hpp" \ int batch_samples, int batch_centroids); -def update_centroids(X, cur_centroids, weight, l2norm_x, - new_centroids, new_weight, +def compute_new_centroids(X, + centroids, + weight, + l2norm_x, + new_centroids, + new_weight, + batch_samples, + batch_centroids, metric="euclidean", - batch_samples, batch_centroids, handle=None): """ - Compute pairwise distances between X and Y + Compute new centroids given an input matrix and existing centroids Valid values for metric: - ["euclidean", "l2", "l1", "cityblock", "inner_product", - "chebyshev", "canberra", "lp", "hellinger", "jensenshannon", - "kl_divergence", "russellrao", "minkowski", "correlation", - "cosine"] + ["euclidean", "sqeuclidean"] Parameters ---------- - X : CUDA array interface compliant matrix shape (m, k) - Y : CUDA array interface compliant matrix shape (n, k) - dists : Writable CUDA array interface matrix shape (m, n) - metric : string denoting the metric type (default="euclidean") - p : metric parameter (currently used only for "minkowski") + X : Input CUDA array interface compliant matrix shape (m, k) + centroids : Input CUDA array interface compliant matrix shape + (n_clusters, k) + weight : Input CUDA array interface compliant matrix shape (n_clusters, 1) + l2norm_x : Input CUDA array interface compliant matrix shape (m, 1) + new_centroids : Writable CUDA array interface compliant matrix shape + (n_clusters, k) + new_weight : Writable CUDA array interface compliant matrix shape + (n_clusters, 1) + batch_samples : integer specifying the batch size for X to compute + distances in batches + batch_centroids : integer specifying the batch size for centroids + to compute distances in batches handle : Optional RAFT handle for reusing expensive CUDA resources Examples @@ -101,88 +113,118 @@ def update_centroids(X, cur_centroids, weight, l2norm_x, import cupy as cp from pylibraft.common import Handle - from pylibraft.distance import pairwise_distance + from pylibaft.cluster.kmeans import update_centroids + from pylibraft.distance import fused_l2_nn_argmin + + # A single RAFT handle can optionally be reused across + # pylibraft functions. + handle = Handle() n_samples = 5000 n_features = 50 + n_clusters = 3 - in1 = cp.random.random_sample((n_samples, n_features), - dtype=cp.float32) - in2 = cp.random.random_sample((n_samples, n_features), + X = cp.random.random_sample((n_samples, n_features), dtype=cp.float32) - output = cp.empty((n_samples, n_samples), dtype=cp.float32) + centroids = cp.random.random_sample((n_clusters, n_features), + dtype=cp.float32) + + argmin = cp.empty((n_samples, ), dtype=cp.int32) + + l2norm_x = cupy.linalg.norm(X, axis=0, ord=2) + + fused_l2_nn_argmin(centroids, X, argmin, handle=handle) + + weight, _ = cp.histogram(argmin, bins=cp.arange(0, n_clusters+1)).astype(cp.float32) + + new_weight = cp.empty((n_clusters, ), dtype=cp.float32) + new_centroids = cp.empty((n_clusters, n_features), dtype=cp.float32) + + compute_new_centroids(X, centroids, weight, l2norm_x, new_centroids, new_weight, n_samples, n_clusters) - # A single RAFT handle can optionally be reused across - # pylibraft functions. - handle = Handle() - ... - pairwise_distance(in1, in2, output, metric="euclidean", handle=handle) - ... # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized handle.sync() """ x_cai = X.__cuda_array_interface__ - y_cai = Y.__cuda_array_interface__ - dists_cai = dists.__cuda_array_interface__ + centroids_cai = centroids.__cuda_array_interface__ + weight_cai = weight.__cuda_array_interface__ + l2norm_x_cai = l2norm_x.__cuda_array_interface__ + new_centroids_cai = new_centroids.__cuda_array_interface__ + new_weight_cai = new_weight.__cuda_array_interface__ m = x_cai["shape"][0] - n = y_cai["shape"][0] + n_clusters = centroids_cai["shape"][0] x_k = x_cai["shape"][1] - y_k = y_cai["shape"][1] + centroids_k = centroids_cai["shape"][1] + new_centroids_k = centroids_cai["shape"][1] - if x_k != y_k: + if x_k != centroids_k: raise ValueError("Inputs must have same number of columns. " - "a=%s, b=%s" % (x_k, y_k)) + "a=%s, b=%s" % (x_k, centroids_k)) x_ptr = x_cai["data"][0] - y_ptr = y_cai["data"][0] - d_ptr = dists_cai["data"][0] + centroids_ptr = centroids_cai["data"][0] + weight_ptr = weight_cai["data"][0] + l2norm_x_ptr = l2norm_x_cai["data"][0] + new_centroids_ptr = new_centroids_cai["data"][0] + new_weight_ptr = new_weight_cai["data"][0] handle = handle if handle is not None else Handle() cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) - y_dt = np.dtype(y_cai["typestr"]) - d_dt = np.dtype(dists_cai["typestr"]) + centroids_dt = np.dtype(centroids_cai["typestr"]) + weight_dt = np.dtype(weight_cai["typestr"]) + new_centroids_dt = np.dtype(new_centroids_cai["typestr"]) + new_weight_dt = np.dtype(new_weight_cai["typestr"]) + l2norm_x_dt = np.dtype(l2norm_x_cai["typestr"]) x_c_contiguous = is_c_cont(x_cai, x_dt) - y_c_contiguous = is_c_cont(y_cai, y_dt) - - if x_c_contiguous != y_c_contiguous: - raise ValueError("Inputs must have matching strides") + centroids_c_contiguous = is_c_cont(centroids_cai, centroids_dt) + new_centroids_c_contiguous = is_c_cont(new_centroids_cai, new_centroids_dt) - if metric not in SUPPORTED_DISTANCES: - raise ValueError("metric %s is not supported" % metric) + if not x_c_contiguous or not centroids_c_contiguous \ + or not new_centroids_c_contiguous: + raise ValueError("Inputs must all be c contiguous") cdef DistanceType distance_type = DISTANCE_TYPES[metric] - if x_dt != y_dt or x_dt != d_dt: - raise ValueError("Inputs must have the same dtypes") + if x_dt != centroids_dt or x_dt != weight_dt \ + or x_dt != new_centroids_dt or x_dt != new_weight_dt \ + or x_dt != l2norm_x_dt: + raise ValueError("Inputs must all have the same dtypes " + "(float32 or float64)") if x_dt == np.float32: - pairwise_distance(deref(h), + update_centroids(deref(h), x_ptr, - y_ptr, - d_ptr, - m, - n, - x_k, + m, + x_k, + n_clusters, + centroids_ptr, + weight_ptr, + l2norm_x_ptr, + new_centroids_ptr, + new_weight_ptr, distance_type, - x_c_contiguous, - p) + batch_samples, + batch_centroids) elif x_dt == np.float64: - pairwise_distance(deref(h), + update_centroids(deref(h), x_ptr, - y_ptr, - d_ptr, - m, - n, - x_k, + m, + x_k, + n_clusters, + centroids_ptr, + weight_ptr, + l2norm_x_ptr, + new_centroids_ptr, + new_weight_ptr, distance_type, - x_c_contiguous, - p) + batch_samples, + batch_centroids) else: raise ValueError("dtype %s not supported" % x_dt) diff --git a/python/pylibraft/pylibraft/distance/__init__.py b/python/pylibraft/pylibraft/distance/__init__.py index a3c4e2229b..7d68f02936 100644 --- a/python/pylibraft/pylibraft/distance/__init__.py +++ b/python/pylibraft/pylibraft/distance/__init__.py @@ -14,4 +14,5 @@ # from .fused_l2_nn import fused_l2_nn_argmin -from .pairwise_distance import distance as pairwise_distance \ No newline at end of file +from .pairwise_distance import distance as pairwise_distance +from .pairwise_distance import DISTANCE_TYPES \ No newline at end of file diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py new file mode 100644 index 0000000000..4b512a166d --- /dev/null +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -0,0 +1,86 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from scipy.spatial.distance import cdist +import pytest +import numpy as np + +from pylibraft.common import Handle +from pylibraft.cluster.kmeans import compute_new_centroids +from pylibraft.distance import fused_l2_nn_argmin + +from pylibraft.testing.utils import TestDeviceBuffer + + +@pytest.mark.parametrize("n_rows", [100]) +@pytest.mark.parametrize("n_cols", [100]) +@pytest.mark.parametrize("n_clusters", [5]) +@pytest.mark.parametrize("metric", ["euclidean", "sqeuclidean"]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype): + + order = "C" + + # A single RAFT handle can optionally be reused across + # pylibraft functions. + handle = Handle() + + X = np.random.random_sample((n_rows, n_cols)).astype(np.float32) + centroids = np.random.random_sample((n_clusters, n_cols)).astype(np.float32) + + + l2norm_x = np.linalg.norm(X, axis=0, ord=2) + + + + new_weight = np.empty((n_clusters, ), dtype=np.float32) + new_centroids = np.empty((n_clusters, n_cols), dtype=np.float32) + + X_device = TestDeviceBuffer(X, order) + centroids_device = TestDeviceBuffer(centroids, order) + + argmin = np.empty((n_rows, ), dtype=np.int32) + argmin_device = TestDeviceBuffer(argmin, order) + + weight, _ = np.histogram(argmin_device.copy_to_host(), bins=np.arange(0, n_clusters+1)) + weight = weight.astype(np.float32) + + weight_device = TestDeviceBuffer(weight, order) + + fused_l2_nn_argmin(centroids_device, X_device, argmin_device, handle=handle) + + new_weight_device = TestDeviceBuffer(new_weight, order) + new_centroids_device = TestDeviceBuffer(new_centroids, order) + l2norm_x_device = TestDeviceBuffer(l2norm_x, order) + + compute_new_centroids(X_device, + centroids_device, + weight_device, + l2norm_x_device, + new_centroids_device, + new_weight_device, + n_rows, + n_clusters) + + # pylibraft functions are often asynchronous so the + # handle needs to be explicitly synchronized + handle.sync() + + print(str(new_centroids)) + print(str(new_weight)) + + # actual[actual <= 1e-5] = 0.0 + # + # assert np.allclose(expected, actual, rtol=1e-4) From 223a4a485c61fe683bffaed4f061fc912234d105 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 19:48:28 -0400 Subject: [PATCH 10/27] Fixing sample weight --- cpp/include/raft/cluster/detail/kmeans.cuh | 26 +++---- cpp/include/raft/cluster/kmeans.cuh | 22 +++--- cpp/include/raft_distance/kmeans.hpp | 12 ++-- cpp/src/distance/update_centroids.cuh | 18 ++--- cpp/src/distance/update_centroids_double.cu | 12 ++-- cpp/src/distance/update_centroids_float.cu | 12 ++-- python/pylibraft/pylibraft/cluster/kmeans.pyx | 67 +++++++++---------- .../pylibraft/pylibraft/test/test_kmeans.py | 48 ++++++------- 8 files changed, 104 insertions(+), 113 deletions(-) diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 222ebf065a..cb81bcd744 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -277,12 +277,12 @@ template void update_centroids( const raft::handle_t& handle, raft::device_matrix_view X, - raft::device_matrix_view centroids, - raft::device_vector_view weight, + raft::device_vector_view sample_weights, raft::device_vector_view l2norm_x, + raft::device_matrix_view centroids, raft::device_vector_view, IndexT> min_cluster_and_dist, + raft::device_vector_view weight_per_cluster, raft::device_matrix_view new_centroids, - raft::device_vector_view new_weight, rmm::device_uvector& L2NormBuf_OR_DistBuf, raft::distance::DistanceType metric, int batch_samples, @@ -324,7 +324,7 @@ void update_centroids( raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), X.extent(1), itr, - weight.data_handle(), + sample_weights.data_handle(), workspace.data(), X.extent(0), X.extent(1), @@ -333,11 +333,11 @@ void update_centroids( handle.get_stream()); // Reduce weights by key to compute weight in each cluster - raft::linalg::reduce_cols_by_key(weight.data_handle(), + raft::linalg::reduce_cols_by_key(sample_weights.data_handle(), itr, - new_weight.data_handle(), + weight_per_cluster.data_handle(), (IndexT)1, - (IndexT)weight.extent(0), + (IndexT)sample_weights.extent(0), (IndexT)n_clusters, handle.get_stream()); @@ -349,7 +349,7 @@ void update_centroids( raft::linalg::matrixVectorOp( new_centroids.data_handle(), new_centroids.data_handle(), - new_weight.data_handle(), + weight_per_cluster.data_handle(), new_centroids.extent(1), new_centroids.extent(0), true, @@ -363,14 +363,14 @@ void update_centroids( handle.get_stream()); // copy centroids[i] to new_centroids[i] when new_weight[i] is 0 - cub::ArgIndexInputIterator itr_wt(new_weight.data_handle()); + cub::ArgIndexInputIterator itr_wt(weight_per_cluster.data_handle()); raft::matrix::gather_if( const_cast(centroids.data_handle()), static_cast(centroids.extent(1)), static_cast(centroids.extent(0)), itr_wt, itr_wt, - static_cast(new_weight.size()), + static_cast(weight_per_cluster.size()), new_centroids.data_handle(), [=] __device__(raft::KeyValuePair map) { // predicate // copy when the # of samples in the cluster is 0 @@ -453,13 +453,13 @@ void kmeans_fit_main(const raft::handle_t& handle, update_centroids(handle, X, - raft::make_device_matrix_view( - centroidsRawData.data_handle(), n_clusters, n_features), weight, l2normx_view, + raft::make_device_matrix_view( + centroidsRawData.data_handle(), n_clusters, n_features), minClusterAndDistance.view(), - newCentroids.view(), wtInCluster.view(), + newCentroids.view(), L2NormBuf_OR_DistBuf, params.metric, params.batch_samples, diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index b9a4e442fc..71142bb9f9 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -326,13 +326,13 @@ void cluster_cost(const raft::handle_t& handle, * @tparam IndexT * @param[in] handle: Raft handle to use for managing library resources * @param[in] X: input matrix (size n_samples, n_features) - * @param[in] weight: number of samples currently assigned to each centroid (size n_clusters) - * @param[in] cur_centroids: matrix of current centroids (size n_clusters, n_features) + * @param[in] sample_weights: number of samples currently assigned to each centroid (size n_samples) * @param[in] l2norm_x: optional array of l2 norms for each input data sample (size n_samples) + * @param[in] centroids: matrix of current centroids (size n_clusters, n_features) * @param[out] min_cluster_and_dist: output vector to store key/value pairs of min cluster indices * and distances (size n_clusters) * @param[out] new_centroids: output matrix of updated centroids (size n_clusters, n_features) - * @param[out] new_weight: number of samples assigned to each new centroid (size n_clusters) + * @param[out] weight_per_cluster: sum of sample weights per cluster (size n_clusters) * @param[in] metric: distance metric to use. Must be either L2Expanded, L2SqrtExpanded, * L2Unexpanded, or L2SqrtUnexpanded * @param[in] batch_samples: batch size for data samples when computing distances @@ -342,12 +342,12 @@ template void update_centroids( const raft::handle_t& handle, raft::device_matrix_view X, - raft::device_matrix_view centroids, - raft::device_vector_view weight, + raft::device_vector_view sample_weights, std::optional> l2norm_x, + raft::device_matrix_view centroids, raft::device_vector_view, IndexT> min_cluster_and_dist, + raft::device_vector_view weight_per_cluster, raft::device_matrix_view new_centroids, - raft::device_vector_view new_weight, raft::distance::DistanceType metric, int batch_samples, int batch_centroids) @@ -359,12 +359,12 @@ void update_centroids( detail::update_centroids(handle, X, - centroids, - weight, + sample_weights, l2norm_x.value(), + centroids, min_cluster_and_dist, + weight_per_cluster, new_centroids, - new_weight, dist_workspace, metric, batch_samples, @@ -580,14 +580,14 @@ template void fit_main(const raft::handle_t& handle, const KMeansParams& params, raft::device_matrix_view X, - raft::device_vector_view weight, + raft::device_vector_view sample_weights, raft::device_matrix_view centroids, raft::host_scalar_view inertia, raft::host_scalar_view n_iter, rmm::device_uvector& workspace) { detail::kmeans_fit_main( - handle, params, X, weight, centroids, inertia, n_iter, workspace); + handle, params, X, sample_weights, centroids, inertia, n_iter, workspace); } }; // end namespace raft::cluster::kmeans diff --git a/cpp/include/raft_distance/kmeans.hpp b/cpp/include/raft_distance/kmeans.hpp index c093559a62..b57db3590f 100644 --- a/cpp/include/raft_distance/kmeans.hpp +++ b/cpp/include/raft_distance/kmeans.hpp @@ -25,11 +25,11 @@ namespace raft::cluster::kmeans::runtime { int n_samples, int n_features, int n_clusters, - const float *centroids, - const float *weight, + const float *sample_weights, const float *l2norm_x, + const float *centroids, float *new_centroids, - float *new_weight, + float *weight_per_cluster, raft::distance::DistanceType metric, int batch_samples, int batch_centroids); @@ -41,11 +41,11 @@ namespace raft::cluster::kmeans::runtime { int n_samples, int n_features, int n_clusters, - const double *centroids, - const double *weight, + const double *sample_weights, const double *l2norm_x, + const double *centroids, double *new_centroids, - double *new_weight, + double *weight_per_cluster, raft::distance::DistanceType metric, int batch_samples, int batch_centroids); diff --git a/cpp/src/distance/update_centroids.cuh b/cpp/src/distance/update_centroids.cuh index 6085b51d13..c6d0d7c461 100644 --- a/cpp/src/distance/update_centroids.cuh +++ b/cpp/src/distance/update_centroids.cuh @@ -27,11 +27,11 @@ void update_centroids(raft::handle_t const& handle, int n_samples, int n_features, int n_clusters, - const DataT* centroids, - const DataT* weight, + const DataT* sample_weights, const DataT* l2norm_x, + const DataT* centroids, DataT* new_centroids, - DataT* new_weight, + DataT* weight_per_cluster, raft::distance::DistanceType metric, int batch_samples, int batch_centroids) @@ -41,20 +41,22 @@ void update_centroids(raft::handle_t const& handle, auto X_view = raft::make_device_matrix_view(X, n_samples, n_features); auto centroids_view = raft::make_device_matrix_view(centroids, n_clusters, n_features); - auto weight_view = raft::make_device_vector_view(weight, n_clusters); + auto sample_weights_view = + raft::make_device_vector_view(sample_weights, n_clusters); auto l2norm_x_view = raft::make_device_vector_view(l2norm_x, n_samples); auto new_centroids_view = raft::make_device_matrix_view(new_centroids, n_clusters, n_features); - auto new_weight_view = raft::make_device_vector_view(new_weight, n_clusters); + auto weight_per_cluster_view = + raft::make_device_vector_view(weight_per_cluster, n_clusters); raft::cluster::kmeans::update_centroids(handle, X_view, - centroids_view, - weight_view, + sample_weights_view, l2norm_x_view, + centroids_view, min_cluster_and_dist.view(), + weight_per_cluster_view, new_centroids_view, - new_weight_view, metric, batch_samples, batch_centroids); diff --git a/cpp/src/distance/update_centroids_double.cu b/cpp/src/distance/update_centroids_double.cu index b508e0d9b7..c16ff00345 100644 --- a/cpp/src/distance/update_centroids_double.cu +++ b/cpp/src/distance/update_centroids_double.cu @@ -26,11 +26,11 @@ void update_centroids(raft::handle_t const& handle, int n_samples, int n_features, int n_clusters, - const double* centroids, - const double* weight, + const double* sample_weights, const double* l2norm_x, + const double* centroids, double* new_centroids, - double* new_weight, + double* weight_per_cluster, raft::distance::DistanceType metric, int batch_samples, int batch_centroids) @@ -40,11 +40,11 @@ void update_centroids(raft::handle_t const& handle, n_samples, n_features, n_clusters, - centroids, - weight, + sample_weights, l2norm_x, + centroids, new_centroids, - new_weight, + weight_per_cluster, metric, batch_samples, batch_centroids); diff --git a/cpp/src/distance/update_centroids_float.cu b/cpp/src/distance/update_centroids_float.cu index 8974d32e20..d7d5e6f263 100644 --- a/cpp/src/distance/update_centroids_float.cu +++ b/cpp/src/distance/update_centroids_float.cu @@ -26,11 +26,11 @@ void update_centroids(raft::handle_t const& handle, int n_samples, int n_features, int n_clusters, - const float* centroids, - const float* weight, + const float* sample_weights, const float* l2norm_x, + const float* centroids, float* new_centroids, - float* new_weight, + float* weight_per_cluster, raft::distance::DistanceType metric, int batch_samples, int batch_centroids) @@ -40,11 +40,11 @@ void update_centroids(raft::handle_t const& handle, n_samples, n_features, n_clusters, - centroids, - weight, + sample_weights, l2norm_x, + centroids, new_centroids, - new_weight, + weight_per_cluster, metric, batch_samples, batch_centroids); diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index c52271ae00..c890a24f89 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -47,11 +47,11 @@ cdef extern from "raft_distance/kmeans.hpp" \ int n_samples, int n_features, int n_clusters, - const double *centroids, - const double *weight, + const double *sample_weights, const double *l2norm_x, + const double *centroids, double *new_centroids, - double *new_weight, + double *weight_per_cluster, DistanceType metric, int batch_samples, int batch_centroids); @@ -62,21 +62,21 @@ cdef extern from "raft_distance/kmeans.hpp" \ int n_samples, int n_features, int n_clusters, - const float *centroids, - const float *weight, + const float *sample_weights, const float *l2norm_x, + const float *centroids, float *new_centroids, - float *new_weight, + float *weight_per_cluster, DistanceType metric, int batch_samples, int batch_centroids); def compute_new_centroids(X, - centroids, - weight, + sample_weights, l2norm_x, + centroids, new_centroids, - new_weight, + weight_per_cluster, batch_samples, batch_centroids, metric="euclidean", @@ -91,14 +91,14 @@ def compute_new_centroids(X, ---------- X : Input CUDA array interface compliant matrix shape (m, k) + sample_weights : Input CUDA array interface compliant matrix shape (n_clusters, 1) + l2norm_x : Input CUDA array interface compliant matrix shape (m, 1) centroids : Input CUDA array interface compliant matrix shape (n_clusters, k) - weight : Input CUDA array interface compliant matrix shape (n_clusters, 1) - l2norm_x : Input CUDA array interface compliant matrix shape (m, 1) new_centroids : Writable CUDA array interface compliant matrix shape (n_clusters, k) - new_weight : Writable CUDA array interface compliant matrix shape - (n_clusters, 1) + weight_per_cluster : Writable CUDA array interface compliant matrix shape + (n_clusters, 1) batch_samples : integer specifying the batch size for X to compute distances in batches batch_centroids : integer specifying the batch size for centroids @@ -126,21 +126,20 @@ def compute_new_centroids(X, X = cp.random.random_sample((n_samples, n_features), dtype=cp.float32) + centroids = cp.random.random_sample((n_clusters, n_features), dtype=cp.float32) - argmin = cp.empty((n_samples, ), dtype=cp.int32) - l2norm_x = cupy.linalg.norm(X, axis=0, ord=2) - fused_l2_nn_argmin(centroids, X, argmin, handle=handle) + sample_weights = cp.ones((n_samples,)).astype(cp.float32) - weight, _ = cp.histogram(argmin, bins=cp.arange(0, n_clusters+1)).astype(cp.float32) - - new_weight = cp.empty((n_clusters, ), dtype=cp.float32) + weight_per_cluster = cp.empty((n_clusters, ), dtype=cp.float32) new_centroids = cp.empty((n_clusters, n_features), dtype=cp.float32) - compute_new_centroids(X, centroids, weight, l2norm_x, new_centroids, new_weight, n_samples, n_clusters) + compute_new_centroids(X, sample_weights, l2norm_x, centroids, + new_centroids, weight_per_cluster, + n_samples, n_clusters) # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized @@ -149,10 +148,10 @@ def compute_new_centroids(X, x_cai = X.__cuda_array_interface__ centroids_cai = centroids.__cuda_array_interface__ - weight_cai = weight.__cuda_array_interface__ + sample_weights_cai = sample_weights.__cuda_array_interface__ l2norm_x_cai = l2norm_x.__cuda_array_interface__ new_centroids_cai = new_centroids.__cuda_array_interface__ - new_weight_cai = new_weight.__cuda_array_interface__ + weight_per_cluster_cai = weight_per_cluster.__cuda_array_interface__ m = x_cai["shape"][0] n_clusters = centroids_cai["shape"][0] @@ -167,19 +166,19 @@ def compute_new_centroids(X, x_ptr = x_cai["data"][0] centroids_ptr = centroids_cai["data"][0] - weight_ptr = weight_cai["data"][0] + sample_weights_ptr = sample_weights_cai["data"][0] l2norm_x_ptr = l2norm_x_cai["data"][0] new_centroids_ptr = new_centroids_cai["data"][0] - new_weight_ptr = new_weight_cai["data"][0] + weight_per_cluster_ptr = weight_per_cluster_cai["data"][0] handle = handle if handle is not None else Handle() cdef handle_t *h = handle.getHandle() x_dt = np.dtype(x_cai["typestr"]) centroids_dt = np.dtype(centroids_cai["typestr"]) - weight_dt = np.dtype(weight_cai["typestr"]) + sample_weights_dt = np.dtype(sample_weights_cai["typestr"]) new_centroids_dt = np.dtype(new_centroids_cai["typestr"]) - new_weight_dt = np.dtype(new_weight_cai["typestr"]) + weight_per_cluster_dt = np.dtype(weight_per_cluster_cai["typestr"]) l2norm_x_dt = np.dtype(l2norm_x_cai["typestr"]) x_c_contiguous = is_c_cont(x_cai, x_dt) @@ -192,8 +191,8 @@ def compute_new_centroids(X, cdef DistanceType distance_type = DISTANCE_TYPES[metric] - if x_dt != centroids_dt or x_dt != weight_dt \ - or x_dt != new_centroids_dt or x_dt != new_weight_dt \ + if x_dt != centroids_dt or x_dt != sample_weights_dt \ + or x_dt != new_centroids_dt or x_dt != weight_per_cluster_dt \ or x_dt != l2norm_x_dt: raise ValueError("Inputs must all have the same dtypes " "(float32 or float64)") @@ -204,11 +203,11 @@ def compute_new_centroids(X, m, x_k, n_clusters, - centroids_ptr, - weight_ptr, + sample_weights_ptr, l2norm_x_ptr, + centroids_ptr, new_centroids_ptr, - new_weight_ptr, + weight_per_cluster_ptr, distance_type, batch_samples, batch_centroids) @@ -218,11 +217,11 @@ def compute_new_centroids(X, m, x_k, n_clusters, - centroids_ptr, - weight_ptr, + sample_weights_ptr, l2norm_x_ptr, + centroids_ptr, new_centroids_ptr, - new_weight_ptr, + weight_per_cluster_ptr, distance_type, batch_samples, batch_centroids) diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index 4b512a166d..b053838195 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -13,7 +13,6 @@ # limitations under the License. # -from scipy.spatial.distance import cdist import pytest import numpy as np @@ -37,49 +36,40 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype): # pylibraft functions. handle = Handle() - X = np.random.random_sample((n_rows, n_cols)).astype(np.float32) - centroids = np.random.random_sample((n_clusters, n_cols)).astype(np.float32) - - - l2norm_x = np.linalg.norm(X, axis=0, ord=2) - - - - new_weight = np.empty((n_clusters, ), dtype=np.float32) - new_centroids = np.empty((n_clusters, n_cols), dtype=np.float32) - + X = np.random.random_sample((n_rows, n_cols)).astype(dtype) X_device = TestDeviceBuffer(X, order) + + centroids = np.random.random_sample((n_clusters, n_cols)).astype(dtype) centroids_device = TestDeviceBuffer(centroids, order) - argmin = np.empty((n_rows, ), dtype=np.int32) - argmin_device = TestDeviceBuffer(argmin, order) + l2norm_x = np.linalg.norm(X, axis=0, ord=2) - weight, _ = np.histogram(argmin_device.copy_to_host(), bins=np.arange(0, n_clusters+1)) - weight = weight.astype(np.float32) + weight_per_cluster = np.empty((n_clusters, ), dtype=dtype) + weight_per_cluster_device = TestDeviceBuffer(weight_per_cluster, order) - weight_device = TestDeviceBuffer(weight, order) + new_centroids = np.empty((n_clusters, n_cols), dtype=dtype) + new_centroids_device = TestDeviceBuffer(new_centroids, order) - fused_l2_nn_argmin(centroids_device, X_device, argmin_device, handle=handle) + sample_weights = np.ones((n_rows,)).astype(dtype) / n_rows + sample_weights_device = TestDeviceBuffer(sample_weights, order) - new_weight_device = TestDeviceBuffer(new_weight, order) - new_centroids_device = TestDeviceBuffer(new_centroids, order) l2norm_x_device = TestDeviceBuffer(l2norm_x, order) compute_new_centroids(X_device, - centroids_device, - weight_device, - l2norm_x_device, - new_centroids_device, - new_weight_device, - n_rows, - n_clusters) + sample_weights_device, + l2norm_x_device, + centroids_device, + new_centroids_device, + weight_per_cluster_device, + n_rows, + n_clusters) # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized handle.sync() - print(str(new_centroids)) - print(str(new_weight)) + print(str(new_centroids_device.copy_to_host())) + print(str(weight_per_cluster_device.copy_to_host())) # actual[actual <= 1e-5] = 0.0 # From 68d7fb274e28c7997af3853627b14beed7cdbb09 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 20:30:43 -0400 Subject: [PATCH 11/27] Allowing most of the outputs to be optional --- cpp/include/raft_distance/kmeans.hpp | 58 +++++----- cpp/src/distance/update_centroids.cuh | 40 ++++++- python/pylibraft/pylibraft/cluster/kmeans.pyx | 100 ++++++++++-------- .../pylibraft/pylibraft/test/test_kmeans.py | 29 +++-- python/raft-dask/record.txt | 24 +++++ 5 files changed, 159 insertions(+), 92 deletions(-) create mode 100644 python/raft-dask/record.txt diff --git a/cpp/include/raft_distance/kmeans.hpp b/cpp/include/raft_distance/kmeans.hpp index b57db3590f..aa01e8b4e2 100644 --- a/cpp/include/raft_distance/kmeans.hpp +++ b/cpp/include/raft_distance/kmeans.hpp @@ -14,41 +14,37 @@ * limitations under the License. */ -#include #include +#include namespace raft::cluster::kmeans::runtime { - void update_centroids( - raft::handle_t const &handle, - const float *X, - int n_samples, - int n_features, - int n_clusters, - const float *sample_weights, - const float *l2norm_x, - const float *centroids, - float *new_centroids, - float *weight_per_cluster, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids); - - - void update_centroids( - raft::handle_t const &handle, - const double *X, - int n_samples, - int n_features, - int n_clusters, - const double *sample_weights, - const double *l2norm_x, - const double *centroids, - double *new_centroids, - double *weight_per_cluster, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids); +void update_centroids(raft::handle_t const& handle, + const float* X, + int n_samples, + int n_features, + int n_clusters, + const float* sample_weights, + const float* l2norm_x, + const float* centroids, + float* new_centroids, + float* weight_per_cluster, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids); +void update_centroids(raft::handle_t const& handle, + const double* X, + int n_samples, + int n_features, + int n_clusters, + const double* sample_weights, + const double* l2norm_x, + const double* centroids, + double* new_centroids, + double* weight_per_cluster, + raft::distance::DistanceType metric, + int batch_samples, + int batch_centroids); } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids.cuh b/cpp/src/distance/update_centroids.cuh index c6d0d7c461..6781040cb3 100644 --- a/cpp/src/distance/update_centroids.cuh +++ b/cpp/src/distance/update_centroids.cuh @@ -18,6 +18,7 @@ #include #include #include +#include namespace raft::cluster::kmeans::runtime { @@ -41,13 +42,42 @@ void update_centroids(raft::handle_t const& handle, auto X_view = raft::make_device_matrix_view(X, n_samples, n_features); auto centroids_view = raft::make_device_matrix_view(centroids, n_clusters, n_features); - auto sample_weights_view = - raft::make_device_vector_view(sample_weights, n_clusters); - auto l2norm_x_view = raft::make_device_vector_view(l2norm_x, n_samples); + + rmm::device_uvector sample_weights_uvec(0, handle.get_stream()); + if (sample_weights == nullptr) { + sample_weights_uvec.resize(n_samples, handle.get_stream()); + DataT weight = 1.0 / n_samples; + thrust::fill(handle.get_thrust_policy(), + sample_weights_uvec.data(), + sample_weights_uvec.data() + n_samples, + weight); + } + auto sample_weights_view = raft::make_device_vector_view( + sample_weights == nullptr ? sample_weights_uvec.data() : sample_weights, n_clusters); + + rmm::device_uvector l2norm_x_uvec(0, handle.get_stream()); + if (l2norm_x == nullptr) { + l2norm_x_uvec.resize(n_samples, handle.get_stream()); + raft::linalg::rowNorm(l2norm_x_uvec.data(), + X, + n_samples, + n_features, + raft::linalg::L2Norm, + true, + handle.get_stream()); + } + auto l2norm_x_view = raft::make_device_vector_view( + l2norm_x == nullptr ? l2norm_x_uvec.data() : l2norm_x, n_samples); + auto new_centroids_view = raft::make_device_matrix_view(new_centroids, n_clusters, n_features); - auto weight_per_cluster_view = - raft::make_device_vector_view(weight_per_cluster, n_clusters); + rmm::device_uvector weight_per_cluster_uvec(0, handle.get_stream()); + if (weight_per_cluster == nullptr) { + weight_per_cluster_uvec.resize(n_clusters, handle.get_stream()); + } + auto weight_per_cluster_view = raft::make_device_vector_view( + weight_per_cluster == nullptr ? weight_per_cluster_uvec.data() : weight_per_cluster, + n_clusters); raft::cluster::kmeans::update_centroids(handle, X_view, diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index c890a24f89..1aea00ad0c 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -24,6 +24,8 @@ from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref from libcpp cimport bool +from libcpp cimport nullptr + from pylibraft.distance.distance_type cimport DistanceType from pylibraft.common import Handle @@ -71,16 +73,15 @@ cdef extern from "raft_distance/kmeans.hpp" \ int batch_samples, int batch_centroids); -def compute_new_centroids(X, - sample_weights, - l2norm_x, - centroids, - new_centroids, - weight_per_cluster, - batch_samples, - batch_centroids, - metric="euclidean", - handle=None): +def compute_new_centroids(X, centroids, + new_centroids, + sample_weights=None, + l2norm_x=None, + weight_per_cluster=None, + batch_samples=None, + batch_centroids=None, + metric="euclidean", + handle=None): """ Compute new centroids given an input matrix and existing centroids @@ -91,18 +92,18 @@ def compute_new_centroids(X, ---------- X : Input CUDA array interface compliant matrix shape (m, k) - sample_weights : Input CUDA array interface compliant matrix shape (n_clusters, 1) - l2norm_x : Input CUDA array interface compliant matrix shape (m, 1) centroids : Input CUDA array interface compliant matrix shape (n_clusters, k) new_centroids : Writable CUDA array interface compliant matrix shape (n_clusters, k) - weight_per_cluster : Writable CUDA array interface compliant matrix shape - (n_clusters, 1) - batch_samples : integer specifying the batch size for X to compute - distances in batches - batch_centroids : integer specifying the batch size for centroids - to compute distances in batches + sample_weights : Optional input CUDA array interface compliant matrix shape (n_clusters, 1) default: None + l2norm_x : Optional input CUDA array interface compliant matrix shape (m, 1) default: None + weight_per_cluster : Optional writable CUDA array interface compliant matrix shape + (n_clusters, 1) default: None + batch_samples : Optional integer specifying the batch size for X to compute + distances in batches. default: m + batch_centroids : Optional integer specifying the batch size for centroids + to compute distances in batches. default: n_clusters handle : Optional RAFT handle for reusing expensive CUDA resources Examples @@ -130,16 +131,9 @@ def compute_new_centroids(X, centroids = cp.random.random_sample((n_clusters, n_features), dtype=cp.float32) - l2norm_x = cupy.linalg.norm(X, axis=0, ord=2) - - sample_weights = cp.ones((n_samples,)).astype(cp.float32) - - weight_per_cluster = cp.empty((n_clusters, ), dtype=cp.float32) new_centroids = cp.empty((n_clusters, n_features), dtype=cp.float32) - compute_new_centroids(X, sample_weights, l2norm_x, centroids, - new_centroids, weight_per_cluster, - n_samples, n_clusters) + compute_new_centroids(X, centroids, new_centroids) # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized @@ -148,39 +142,57 @@ def compute_new_centroids(X, x_cai = X.__cuda_array_interface__ centroids_cai = centroids.__cuda_array_interface__ - sample_weights_cai = sample_weights.__cuda_array_interface__ - l2norm_x_cai = l2norm_x.__cuda_array_interface__ new_centroids_cai = new_centroids.__cuda_array_interface__ - weight_per_cluster_cai = weight_per_cluster.__cuda_array_interface__ m = x_cai["shape"][0] n_clusters = centroids_cai["shape"][0] - x_k = x_cai["shape"][1] + + if batch_samples is None: + batch_samples = m + + if batch_centroids is None: + batch_centroids = n_clusters + centroids_k = centroids_cai["shape"][1] new_centroids_k = centroids_cai["shape"][1] + x_dt = np.dtype(x_cai["typestr"]) + centroids_dt = np.dtype(centroids_cai["typestr"]) + new_centroids_dt = np.dtype(new_centroids_cai["typestr"]) + if x_k != centroids_k: raise ValueError("Inputs must have same number of columns. " "a=%s, b=%s" % (x_k, centroids_k)) x_ptr = x_cai["data"][0] centroids_ptr = centroids_cai["data"][0] - sample_weights_ptr = sample_weights_cai["data"][0] - l2norm_x_ptr = l2norm_x_cai["data"][0] new_centroids_ptr = new_centroids_cai["data"][0] - weight_per_cluster_ptr = weight_per_cluster_cai["data"][0] + + if sample_weights is not None: + sample_weights_cai = sample_weights.__cuda_array_interface__ + sample_weights_ptr = sample_weights_cai["data"][0] + sample_weights_dt = np.dtype(sample_weights_cai["typestr"]) + else: + sample_weights_ptr = nullptr + + if l2norm_x is not None: + l2norm_x_cai = l2norm_x.__cuda_array_interface__ + l2norm_x_ptr = l2norm_x_cai["data"][0] + l2norm_x_dt = np.dtype(l2norm_x_cai["typestr"]) + else: + l2norm_x_ptr = nullptr + + if weight_per_cluster is not None: + weight_per_cluster_cai = weight_per_cluster.__cuda_array_interface__ + weight_per_cluster_ptr = weight_per_cluster_cai["data"][0] + weight_per_cluster_dt = np.dtype(weight_per_cluster_cai["typestr"]) + else: + weight_per_cluster_ptr = nullptr handle = handle if handle is not None else Handle() cdef handle_t *h = handle.getHandle() - x_dt = np.dtype(x_cai["typestr"]) - centroids_dt = np.dtype(centroids_cai["typestr"]) - sample_weights_dt = np.dtype(sample_weights_cai["typestr"]) - new_centroids_dt = np.dtype(new_centroids_cai["typestr"]) - weight_per_cluster_dt = np.dtype(weight_per_cluster_cai["typestr"]) - l2norm_x_dt = np.dtype(l2norm_x_cai["typestr"]) - x_c_contiguous = is_c_cont(x_cai, x_dt) centroids_c_contiguous = is_c_cont(centroids_cai, centroids_dt) new_centroids_c_contiguous = is_c_cont(new_centroids_cai, new_centroids_dt) @@ -191,11 +203,9 @@ def compute_new_centroids(X, cdef DistanceType distance_type = DISTANCE_TYPES[metric] - if x_dt != centroids_dt or x_dt != sample_weights_dt \ - or x_dt != new_centroids_dt or x_dt != weight_per_cluster_dt \ - or x_dt != l2norm_x_dt: - raise ValueError("Inputs must all have the same dtypes " - "(float32 or float64)") + if x_dt != centroids_dt or x_dt != new_centroids_dt: + raise ValueError("Inputs must all have the same dtypes " + "(float32 or float64)") if x_dt == np.float32: update_centroids(deref(h), diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index b053838195..6262fe8775 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -28,7 +28,9 @@ @pytest.mark.parametrize("n_clusters", [5]) @pytest.mark.parametrize("metric", ["euclidean", "sqeuclidean"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype): +@pytest.mark.parametrize("additional_args", [True, False]) +def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, + additional_args): order = "C" @@ -43,33 +45,38 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype): centroids_device = TestDeviceBuffer(centroids, order) l2norm_x = np.linalg.norm(X, axis=0, ord=2) + l2norm_x_device = TestDeviceBuffer(l2norm_x, order) \ + if additional_args else None weight_per_cluster = np.empty((n_clusters, ), dtype=dtype) - weight_per_cluster_device = TestDeviceBuffer(weight_per_cluster, order) + weight_per_cluster_device = TestDeviceBuffer(weight_per_cluster, order) \ + if additional_args else None new_centroids = np.empty((n_clusters, n_cols), dtype=dtype) new_centroids_device = TestDeviceBuffer(new_centroids, order) - sample_weights = np.ones((n_rows,)).astype(dtype) / n_rows - sample_weights_device = TestDeviceBuffer(sample_weights, order) + sample_weights = np.ones((n_rows,)).astype(dtype) + sample_weights_device = TestDeviceBuffer(sample_weights, order) \ + if additional_args else None - l2norm_x_device = TestDeviceBuffer(l2norm_x, order) compute_new_centroids(X_device, - sample_weights_device, - l2norm_x_device, centroids_device, new_centroids_device, - weight_per_cluster_device, - n_rows, - n_clusters) + sample_weights=sample_weights_device, + l2norm_x=l2norm_x_device, + weight_per_cluster=weight_per_cluster_device, + batch_samples=n_rows, + batch_centroids=n_clusters) # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized handle.sync() print(str(new_centroids_device.copy_to_host())) - print(str(weight_per_cluster_device.copy_to_host())) + + if(additional_args): + print(str(weight_per_cluster_device.copy_to_host())) # actual[actual <= 1e-5] = 0.0 # diff --git a/python/raft-dask/record.txt b/python/raft-dask/record.txt new file mode 100644 index 0000000000..9856937ae6 --- /dev/null +++ b/python/raft-dask/record.txt @@ -0,0 +1,24 @@ +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/__init__.py +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/_version.py +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/include_test/__init__.py +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__init__.py +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/comms.py +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/utils.py +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/ucx.py +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/include_test/raft_include_test.cpython-39-x86_64-linux-gnu.so +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__init__.pxd +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/comms_utils.cpython-39-x86_64-linux-gnu.so +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/nccl.cpython-39-x86_64-linux-gnu.so +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/__pycache__/__init__.cpython-39.pyc +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/__pycache__/_version.cpython-39.pyc +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/include_test/__pycache__/__init__.cpython-39.pyc +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/__init__.cpython-39.pyc +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/comms.cpython-39.pyc +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/utils.cpython-39.pyc +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/ucx.cpython-39.pyc +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/top_level.txt +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/PKG-INFO +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/SOURCES.txt +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/requires.txt +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/dependency_links.txt +/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/not-zip-safe From af401aedaab3f5f00615eb190aabec93eb91b7f1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 20:33:25 -0400 Subject: [PATCH 12/27] Fixing python astyle --- python/pylibraft/pylibraft/test/test_kmeans.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index 6262fe8775..f4a0d9d062 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -18,7 +18,6 @@ from pylibraft.common import Handle from pylibraft.cluster.kmeans import compute_new_centroids -from pylibraft.distance import fused_l2_nn_argmin from pylibraft.testing.utils import TestDeviceBuffer @@ -59,7 +58,6 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, sample_weights_device = TestDeviceBuffer(sample_weights, order) \ if additional_args else None - compute_new_centroids(X_device, centroids_device, new_centroids_device, From 8de21273bb4bc7e3084ae109fc29f90e09cfe4c3 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 21:01:19 -0400 Subject: [PATCH 13/27] Style checks --- python/pylibraft/pylibraft/cluster/kmeans.pyx | 56 +++++++++---------- .../pylibraft/pylibraft/test/test_kmeans.py | 3 +- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index 1aea00ad0c..354b91650f 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -44,34 +44,34 @@ cdef extern from "raft_distance/kmeans.hpp" \ namespace "raft::cluster::kmeans::runtime": cdef void update_centroids( - const handle_t& handle, - const double *X, - int n_samples, - int n_features, - int n_clusters, - const double *sample_weights, - const double *l2norm_x, - const double *centroids, - double *new_centroids, - double *weight_per_cluster, - DistanceType metric, - int batch_samples, - int batch_centroids); - - cdef void update_centroids( - const handle_t& handle, - const float *X, - int n_samples, - int n_features, - int n_clusters, - const float *sample_weights, - const float *l2norm_x, - const float *centroids, - float *new_centroids, - float *weight_per_cluster, - DistanceType metric, - int batch_samples, - int batch_centroids); + const handle_t& handle, + const double *X, + int n_samples, + int n_features, + int n_clusters, + const double *sample_weights, + const double *l2norm_x, + const double *centroids, + double *new_centroids, + double *weight_per_cluster, + DistanceType metric, + int batch_samples, + int batch_centroids); + + cdef void update_centroids(v + const handle_t& handle, + const float *X, + int n_samples, + int n_features, + int n_clusters, + const float *sample_weights, + const float *l2norm_x, + const float *centroids, + float *new_centroids, + float *weight_per_cluster, + DistanceType metric, + int batch_samples, + int batch_centroids); def compute_new_centroids(X, centroids, new_centroids, diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index f4a0d9d062..cef838de99 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -65,7 +65,8 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, l2norm_x=l2norm_x_device, weight_per_cluster=weight_per_cluster_device, batch_samples=n_rows, - batch_centroids=n_clusters) + batch_centroids=n_clusters, + handle=handle) # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized From 27d51fafdc5d624b23518603e7730a0ec8588c29 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 21:06:13 -0400 Subject: [PATCH 14/27] FIxing style --- python/pylibraft/.flake8 | 9 +++ python/pylibraft/.flake8.cython | 28 +++++++ python/pylibraft/pylibraft/cluster/kmeans.pyx | 73 ++++++++++--------- 3 files changed, 75 insertions(+), 35 deletions(-) create mode 100644 python/pylibraft/.flake8 create mode 100644 python/pylibraft/.flake8.cython diff --git a/python/pylibraft/.flake8 b/python/pylibraft/.flake8 new file mode 100644 index 0000000000..ef2e5a8495 --- /dev/null +++ b/python/pylibraft/.flake8 @@ -0,0 +1,9 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. + +[flake8] +exclude = __init__.py +ignore = + # line break before binary operator + W503 + # whitespace before : + E203 \ No newline at end of file diff --git a/python/pylibraft/.flake8.cython b/python/pylibraft/.flake8.cython new file mode 100644 index 0000000000..3cd436d3f3 --- /dev/null +++ b/python/pylibraft/.flake8.cython @@ -0,0 +1,28 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +[flake8] +filename = *.pyx, *.pxd +exclude = *.egg, build, docs, .git +ignore = E999, E225, E226, E227, W503, W504 + +# Rules ignored: +# E999: invalid syntax (works for Python, not Cython) +# E225: Missing whitespace around operators (breaks cython casting syntax like ) +# E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) +# E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) +# W503: line break before binary operator (breaks lines that start with a pointer) +# W504: line break after binary operator (breaks lines that end with a pointer) diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index 354b91650f..54a1dcbe6b 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -56,9 +56,9 @@ cdef extern from "raft_distance/kmeans.hpp" \ double *weight_per_cluster, DistanceType metric, int batch_samples, - int batch_centroids); + int batch_centroids) - cdef void update_centroids(v + cdef void update_centroids( const handle_t& handle, const float *X, int n_samples, @@ -71,7 +71,8 @@ cdef extern from "raft_distance/kmeans.hpp" \ float *weight_per_cluster, DistanceType metric, int batch_samples, - int batch_centroids); + int batch_centroids) + def compute_new_centroids(X, centroids, new_centroids, @@ -96,10 +97,12 @@ def compute_new_centroids(X, centroids, (n_clusters, k) new_centroids : Writable CUDA array interface compliant matrix shape (n_clusters, k) - sample_weights : Optional input CUDA array interface compliant matrix shape (n_clusters, 1) default: None - l2norm_x : Optional input CUDA array interface compliant matrix shape (m, 1) default: None - weight_per_cluster : Optional writable CUDA array interface compliant matrix shape - (n_clusters, 1) default: None + sample_weights : Optional input CUDA array interface compliant matrix shape + (n_clusters, 1) default: None + l2norm_x : Optional input CUDA array interface compliant matrix shape + (m, 1) default: None + weight_per_cluster : Optional writable CUDA array interface compliant + matrix shape (n_clusters, 1) default: None batch_samples : Optional integer specifying the batch size for X to compute distances in batches. default: m batch_centroids : Optional integer specifying the batch size for centroids @@ -198,42 +201,42 @@ def compute_new_centroids(X, centroids, new_centroids_c_contiguous = is_c_cont(new_centroids_cai, new_centroids_dt) if not x_c_contiguous or not centroids_c_contiguous \ - or not new_centroids_c_contiguous: - raise ValueError("Inputs must all be c contiguous") + or not new_centroids_c_contiguous: + raise ValueError("Inputs must all be c contiguous") cdef DistanceType distance_type = DISTANCE_TYPES[metric] if x_dt != centroids_dt or x_dt != new_centroids_dt: - raise ValueError("Inputs must all have the same dtypes " - "(float32 or float64)") + raise ValueError("Inputs must all have the same dtypes " + "(float32 or float64)") if x_dt == np.float32: update_centroids(deref(h), - x_ptr, - m, - x_k, - n_clusters, - sample_weights_ptr, - l2norm_x_ptr, - centroids_ptr, - new_centroids_ptr, - weight_per_cluster_ptr, - distance_type, - batch_samples, - batch_centroids) + x_ptr, + m, + x_k, + n_clusters, + sample_weights_ptr, + l2norm_x_ptr, + centroids_ptr, + new_centroids_ptr, + weight_per_cluster_ptr, + distance_type, + batch_samples, + batch_centroids) elif x_dt == np.float64: update_centroids(deref(h), - x_ptr, - m, - x_k, - n_clusters, - sample_weights_ptr, - l2norm_x_ptr, - centroids_ptr, - new_centroids_ptr, - weight_per_cluster_ptr, - distance_type, - batch_samples, - batch_centroids) + x_ptr, + m, + x_k, + n_clusters, + sample_weights_ptr, + l2norm_x_ptr, + centroids_ptr, + new_centroids_ptr, + weight_per_cluster_ptr, + distance_type, + batch_samples, + batch_centroids) else: raise ValueError("dtype %s not supported" % x_dt) From 6c8b3b4a62a0ebcd7d58914c35a039ff1a3793f8 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 23:43:45 -0400 Subject: [PATCH 15/27] Adding assertion to pytest for naive solutoin --- cpp/include/raft/cluster/detail/kmeans.cuh | 15 +++---- cpp/src/distance/update_centroids.cuh | 2 +- python/pylibraft/pylibraft/cluster/kmeans.pyx | 4 +- .../pylibraft/pylibraft/test/test_kmeans.py | 41 ++++++++++++------- 4 files changed, 38 insertions(+), 24 deletions(-) diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index cb81bcd744..1e17561254 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -319,7 +319,7 @@ void update_centroids( workspace.resize(n_samples, handle.get_stream()); - // Calculates weighted sum of all the samples assigned to cluster-i and store the + // Calculates weighted sum of all the samples assigned to cluster-i and stores the // result in new_centroids[i] raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), X.extent(1), @@ -341,11 +341,12 @@ void update_centroids( (IndexT)n_clusters, handle.get_stream()); - // Computes new_centroids[i] = new_centroids[i]/new_weight[i] where + // Computes new_centroids[i] = new_centroids[i]/weight_per_cluster[i] where // new_centroids[n_clusters x n_features] - 2D array, new_centroids[i] has sum of all the - // samples assigned to cluster-i new_weight[n_clusters] - 1D array, new_weight[i] contains # - // of samples in cluster-i. - // Note - when new_weight[i] is 0, new_centroids[i] is reset to 0 + // samples assigned to cluster-i + // weight_per_cluster[n_clusters] - 1D array, weight_per_cluster[i] contains sum of weights in + // cluster-i. + // Note - when weight_per_cluster[i] is 0, new_centroids[i] is reset to 0 raft::linalg::matrixVectorOp( new_centroids.data_handle(), new_centroids.data_handle(), @@ -362,7 +363,7 @@ void update_centroids( }, handle.get_stream()); - // copy centroids[i] to new_centroids[i] when new_weight[i] is 0 + // copy centroids[i] to new_centroids[i] when weight_per_cluster[i] is 0 cub::ArgIndexInputIterator itr_wt(weight_per_cluster.data_handle()); raft::matrix::gather_if( const_cast(centroids.data_handle()), @@ -373,7 +374,7 @@ void update_centroids( static_cast(weight_per_cluster.size()), new_centroids.data_handle(), [=] __device__(raft::KeyValuePair map) { // predicate - // copy when the # of samples in the cluster is 0 + // copy when the sum of weights in the cluster is 0 return map.value == 0; }, [=] __device__(raft::KeyValuePair map) { // map diff --git a/cpp/src/distance/update_centroids.cuh b/cpp/src/distance/update_centroids.cuh index 6781040cb3..2f6183abd9 100644 --- a/cpp/src/distance/update_centroids.cuh +++ b/cpp/src/distance/update_centroids.cuh @@ -53,7 +53,7 @@ void update_centroids(raft::handle_t const& handle, weight); } auto sample_weights_view = raft::make_device_vector_view( - sample_weights == nullptr ? sample_weights_uvec.data() : sample_weights, n_clusters); + sample_weights == nullptr ? sample_weights_uvec.data() : sample_weights, n_samples); rmm::device_uvector l2norm_x_uvec(0, handle.get_stream()); if (l2norm_x == nullptr) { diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index 54a1dcbe6b..ab267e7b95 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -136,7 +136,7 @@ def compute_new_centroids(X, centroids, new_centroids = cp.empty((n_clusters, n_features), dtype=cp.float32) - compute_new_centroids(X, centroids, new_centroids) + compute_new_centroids(X, centroids, new_centroids, handle=handle) # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized @@ -148,8 +148,8 @@ def compute_new_centroids(X, centroids, new_centroids_cai = new_centroids.__cuda_array_interface__ m = x_cai["shape"][0] - n_clusters = centroids_cai["shape"][0] x_k = x_cai["shape"][1] + n_clusters = centroids_cai["shape"][0] if batch_samples is None: batch_samples = m diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index cef838de99..482f40287b 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -18,13 +18,14 @@ from pylibraft.common import Handle from pylibraft.cluster.kmeans import compute_new_centroids +from pylibraft.distance import pairwise_distance from pylibraft.testing.utils import TestDeviceBuffer @pytest.mark.parametrize("n_rows", [100]) -@pytest.mark.parametrize("n_cols", [100]) -@pytest.mark.parametrize("n_clusters", [5]) +@pytest.mark.parametrize("n_cols", [5, 25]) +@pytest.mark.parametrize("n_clusters", [5, 15]) @pytest.mark.parametrize("metric", ["euclidean", "sqeuclidean"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("additional_args", [True, False]) @@ -40,24 +41,40 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, X = np.random.random_sample((n_rows, n_cols)).astype(dtype) X_device = TestDeviceBuffer(X, order) - centroids = np.random.random_sample((n_clusters, n_cols)).astype(dtype) + centroids = X[:n_clusters] centroids_device = TestDeviceBuffer(centroids, order) - l2norm_x = np.linalg.norm(X, axis=0, ord=2) + l2norm_x = np.sum(X**2, axis=1) l2norm_x_device = TestDeviceBuffer(l2norm_x, order) \ if additional_args else None - weight_per_cluster = np.empty((n_clusters, ), dtype=dtype) + weight_per_cluster = np.zeros((n_clusters, ), dtype=dtype) weight_per_cluster_device = TestDeviceBuffer(weight_per_cluster, order) \ if additional_args else None - new_centroids = np.empty((n_clusters, n_cols), dtype=dtype) + new_centroids = np.zeros((n_clusters, n_cols), dtype=dtype) new_centroids_device = TestDeviceBuffer(new_centroids, order) - sample_weights = np.ones((n_rows,)).astype(dtype) + sample_weights = np.ones((n_rows,)).astype(dtype) / n_rows sample_weights_device = TestDeviceBuffer(sample_weights, order) \ if additional_args else None + + # Compute new centroids naively + dists = np.zeros((n_rows, n_clusters), dtype=dtype) + dists_device = TestDeviceBuffer(dists, order) + pairwise_distance(X_device, centroids_device, dists_device, metric=metric) + handle.sync() + + labels = np.argmin(dists_device.copy_to_host(), axis=1) + expected_centers = np.empty((n_clusters, n_cols), dtype=dtype) + expected_wX = X * sample_weights.reshape((-1, 1)) + for i in range(n_clusters): + j = expected_wX[labels == i] + j = j.sum(axis=0) + g = sample_weights[labels == i].sum() + expected_centers[i, :] = j / g + compute_new_centroids(X_device, centroids_device, new_centroids_device, @@ -66,17 +83,13 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, weight_per_cluster=weight_per_cluster_device, batch_samples=n_rows, batch_centroids=n_clusters, + metric=metric, handle=handle) # pylibraft functions are often asynchronous so the # handle needs to be explicitly synchronized handle.sync() - print(str(new_centroids_device.copy_to_host())) - - if(additional_args): - print(str(weight_per_cluster_device.copy_to_host())) + actual_centers = new_centroids_device.copy_to_host() - # actual[actual <= 1e-5] = 0.0 - # - # assert np.allclose(expected, actual, rtol=1e-4) + assert np.allclose(expected_centers, actual_centers, rtol=1e-6) From ec6a18b3959f899e16ca1110ef58a9f864573824 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 23:44:44 -0400 Subject: [PATCH 16/27] Forcing batch size --- python/pylibraft/pylibraft/test/test_kmeans.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index 482f40287b..94784f0f24 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -81,8 +81,8 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, sample_weights=sample_weights_device, l2norm_x=l2norm_x_device, weight_per_cluster=weight_per_cluster_device, - batch_samples=n_rows, - batch_centroids=n_clusters, + batch_samples=n_rows/2, + batch_centroids=n_clusters/2, metric=metric, handle=handle) From 4a61ec21b269edb3903cdda2a4ac91c0c4e48b44 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 23:48:36 -0400 Subject: [PATCH 17/27] Fixing style --- python/pylibraft/pylibraft/test/test_kmeans.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index 94784f0f24..154622d196 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -58,8 +58,7 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, sample_weights = np.ones((n_rows,)).astype(dtype) / n_rows sample_weights_device = TestDeviceBuffer(sample_weights, order) \ if additional_args else None - - +2 # Compute new centroids naively dists = np.zeros((n_rows, n_clusters), dtype=dtype) dists_device = TestDeviceBuffer(dists, order) From d5b8c49680efcce71f12ed0b3b99ca69f0d9561b Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 25 Oct 2022 23:50:41 -0400 Subject: [PATCH 18/27] Typo --- python/pylibraft/pylibraft/test/test_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index 154622d196..e2f7b4a5a7 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -58,7 +58,7 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, sample_weights = np.ones((n_rows,)).astype(dtype) / n_rows sample_weights_device = TestDeviceBuffer(sample_weights, order) \ if additional_args else None -2 + # Compute new centroids naively dists = np.zeros((n_rows, n_clusters), dtype=dtype) dists_device = TestDeviceBuffer(dists, order) From 0cfd7d98bce9d387ff896ad6c138b7789a9d3515 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 26 Oct 2022 10:43:21 -0400 Subject: [PATCH 19/27] Removing argmin computation from `compute_new-centroids` --- cpp/include/raft/cluster/detail/kmeans.cuh | 87 +++++++++---------- cpp/include/raft/cluster/kmeans.cuh | 37 +++----- cpp/include/raft_distance/kmeans.hpp | 14 +-- cpp/src/distance/update_centroids.cuh | 31 +------ cpp/src/distance/update_centroids_double.cu | 14 +-- cpp/src/distance/update_centroids_float.cu | 14 +-- python/pylibraft/pylibraft/cluster/kmeans.pyx | 63 ++++---------- .../pylibraft/pylibraft/test/test_kmeans.py | 13 ++- 8 files changed, 89 insertions(+), 184 deletions(-) diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh index 1e17561254..f1c95070c9 100644 --- a/cpp/include/raft/cluster/detail/kmeans.cuh +++ b/cpp/include/raft/cluster/detail/kmeans.cuh @@ -273,57 +273,28 @@ void kmeansPlusPlus(const raft::handle_t& handle, * @param[out] new_weight * @param[inout] workspace */ -template -void update_centroids( - const raft::handle_t& handle, - raft::device_matrix_view X, - raft::device_vector_view sample_weights, - raft::device_vector_view l2norm_x, - raft::device_matrix_view centroids, - raft::device_vector_view, IndexT> min_cluster_and_dist, - raft::device_vector_view weight_per_cluster, - raft::device_matrix_view new_centroids, - rmm::device_uvector& L2NormBuf_OR_DistBuf, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids, - rmm::device_uvector& workspace) +template +void update_centroids(const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view sample_weights, + raft::device_matrix_view centroids, + + // TODO: Figure out how to best wrap iterator types in mdspan + LabelsIterator cluster_labels, + raft::device_vector_view weight_per_cluster, + raft::device_matrix_view new_centroids, + rmm::device_uvector& workspace) { auto n_clusters = centroids.extent(0); auto n_samples = X.extent(0); - // computes minClusterAndDistance[0:n_samples) where - // minClusterAndDistance[i] is a pair where - // 'key' is index to a sample in 'centroids' (index of the nearest - // centroid) and 'value' is the distance between the sample 'X[i]' and the - // 'centroid[key]' - detail::minClusterAndDistanceCompute(handle, - X, - centroids, - min_cluster_and_dist, - l2norm_x, - L2NormBuf_OR_DistBuf, - metric, - batch_samples, - batch_centroids, - workspace); - - // Using TransformInputIteratorT to dereference an array of - // raft::KeyValuePair and converting them to just return the Key to be used - // in reduce_rows_by_key prims - detail::KeyValueIndexOp conversion_op; - cub::TransformInputIterator, - raft::KeyValuePair*> - itr(min_cluster_and_dist.data_handle(), conversion_op); - workspace.resize(n_samples, handle.get_stream()); // Calculates weighted sum of all the samples assigned to cluster-i and stores the // result in new_centroids[i] raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), X.extent(1), - itr, + cluster_labels, sample_weights.data_handle(), workspace.data(), X.extent(0), @@ -334,7 +305,7 @@ void update_centroids( // Reduce weights by key to compute weight in each cluster raft::linalg::reduce_cols_by_key(sample_weights.data_handle(), - itr, + cluster_labels, weight_per_cluster.data_handle(), (IndexT)1, (IndexT)sample_weights.extent(0), @@ -452,19 +423,39 @@ void kmeans_fit_main(const raft::handle_t& handle, auto centroids = raft::make_device_matrix_view( centroidsRawData.data_handle(), n_clusters, n_features); + // computes minClusterAndDistance[0:n_samples) where + // minClusterAndDistance[i] is a pair where + // 'key' is index to a sample in 'centroids' (index of the nearest + // centroid) and 'value' is the distance between the sample 'X[i]' and the + // 'centroid[key]' + detail::minClusterAndDistanceCompute(handle, + X, + centroids, + minClusterAndDistance.view(), + l2normx_view, + L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, + workspace); + + // Using TransformInputIteratorT to dereference an array of + // raft::KeyValuePair and converting them to just return the Key to be used + // in reduce_rows_by_key prims + detail::KeyValueIndexOp conversion_op; + cub::TransformInputIterator, + raft::KeyValuePair*> + itr(minClusterAndDistance.data_handle(), conversion_op); + update_centroids(handle, X, weight, - l2normx_view, raft::make_device_matrix_view( centroidsRawData.data_handle(), n_clusters, n_features), - minClusterAndDistance.view(), + itr, wtInCluster.view(), newCentroids.view(), - L2NormBuf_OR_DistBuf, - params.metric, - params.batch_samples, - params.batch_centroids, workspace); // compute the squared norm between the newCentroids and the original diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 71142bb9f9..17a8467274 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -338,38 +338,21 @@ void cluster_cost(const raft::handle_t& handle, * @param[in] batch_samples: batch size for data samples when computing distances * @param[in] batch_centroids: batch size for centroids when computing distances */ -template -void update_centroids( - const raft::handle_t& handle, - raft::device_matrix_view X, - raft::device_vector_view sample_weights, - std::optional> l2norm_x, - raft::device_matrix_view centroids, - raft::device_vector_view, IndexT> min_cluster_and_dist, - raft::device_vector_view weight_per_cluster, - raft::device_matrix_view new_centroids, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids) +template +void update_centroids(const raft::handle_t& handle, + raft::device_matrix_view X, + raft::device_vector_view sample_weights, + raft::device_matrix_view centroids, + LabelsIterator labels, + raft::device_vector_view weight_per_cluster, + raft::device_matrix_view new_centroids) { // TODO: Passing these into the algorithm doesn't really present much of a benefit // because they are being resized anyways. - rmm::device_uvector dist_workspace(0, handle.get_stream()); rmm::device_uvector workspace(0, handle.get_stream()); - detail::update_centroids(handle, - X, - sample_weights, - l2norm_x.value(), - centroids, - min_cluster_and_dist, - weight_per_cluster, - new_centroids, - dist_workspace, - metric, - batch_samples, - batch_centroids, - workspace); + detail::update_centroids( + handle, X, sample_weights, centroids, labels, weight_per_cluster, new_centroids, workspace); } /** diff --git a/cpp/include/raft_distance/kmeans.hpp b/cpp/include/raft_distance/kmeans.hpp index aa01e8b4e2..19f92dd977 100644 --- a/cpp/include/raft_distance/kmeans.hpp +++ b/cpp/include/raft_distance/kmeans.hpp @@ -25,13 +25,10 @@ void update_centroids(raft::handle_t const& handle, int n_features, int n_clusters, const float* sample_weights, - const float* l2norm_x, const float* centroids, + const int* labels, float* new_centroids, - float* weight_per_cluster, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids); + float* weight_per_cluster); void update_centroids(raft::handle_t const& handle, const double* X, @@ -39,12 +36,9 @@ void update_centroids(raft::handle_t const& handle, int n_features, int n_clusters, const double* sample_weights, - const double* l2norm_x, const double* centroids, + const int* labels, double* new_centroids, - double* weight_per_cluster, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids); + double* weight_per_cluster); } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids.cuh b/cpp/src/distance/update_centroids.cuh index 2f6183abd9..91f3e1e2a3 100644 --- a/cpp/src/distance/update_centroids.cuh +++ b/cpp/src/distance/update_centroids.cuh @@ -29,16 +29,11 @@ void update_centroids(raft::handle_t const& handle, int n_features, int n_clusters, const DataT* sample_weights, - const DataT* l2norm_x, const DataT* centroids, + const IndexT* labels, DataT* new_centroids, - DataT* weight_per_cluster, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids) + DataT* weight_per_cluster) { - auto min_cluster_and_dist = - raft::make_device_vector, IndexT>(handle, n_samples); auto X_view = raft::make_device_matrix_view(X, n_samples, n_features); auto centroids_view = raft::make_device_matrix_view(centroids, n_clusters, n_features); @@ -55,20 +50,6 @@ void update_centroids(raft::handle_t const& handle, auto sample_weights_view = raft::make_device_vector_view( sample_weights == nullptr ? sample_weights_uvec.data() : sample_weights, n_samples); - rmm::device_uvector l2norm_x_uvec(0, handle.get_stream()); - if (l2norm_x == nullptr) { - l2norm_x_uvec.resize(n_samples, handle.get_stream()); - raft::linalg::rowNorm(l2norm_x_uvec.data(), - X, - n_samples, - n_features, - raft::linalg::L2Norm, - true, - handle.get_stream()); - } - auto l2norm_x_view = raft::make_device_vector_view( - l2norm_x == nullptr ? l2norm_x_uvec.data() : l2norm_x, n_samples); - auto new_centroids_view = raft::make_device_matrix_view(new_centroids, n_clusters, n_features); rmm::device_uvector weight_per_cluster_uvec(0, handle.get_stream()); @@ -82,13 +63,9 @@ void update_centroids(raft::handle_t const& handle, raft::cluster::kmeans::update_centroids(handle, X_view, sample_weights_view, - l2norm_x_view, centroids_view, - min_cluster_and_dist.view(), + labels, weight_per_cluster_view, - new_centroids_view, - metric, - batch_samples, - batch_centroids); + new_centroids_view); } } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids_double.cu b/cpp/src/distance/update_centroids_double.cu index c16ff00345..fe741ddb78 100644 --- a/cpp/src/distance/update_centroids_double.cu +++ b/cpp/src/distance/update_centroids_double.cu @@ -27,13 +27,10 @@ void update_centroids(raft::handle_t const& handle, int n_features, int n_clusters, const double* sample_weights, - const double* l2norm_x, const double* centroids, + const int* labels, double* new_centroids, - double* weight_per_cluster, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids) + double* weight_per_cluster) { update_centroids(handle, X, @@ -41,13 +38,10 @@ void update_centroids(raft::handle_t const& handle, n_features, n_clusters, sample_weights, - l2norm_x, centroids, + labels, new_centroids, - weight_per_cluster, - metric, - batch_samples, - batch_centroids); + weight_per_cluster); } } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/cpp/src/distance/update_centroids_float.cu b/cpp/src/distance/update_centroids_float.cu index d7d5e6f263..ebb06376ff 100644 --- a/cpp/src/distance/update_centroids_float.cu +++ b/cpp/src/distance/update_centroids_float.cu @@ -27,13 +27,10 @@ void update_centroids(raft::handle_t const& handle, int n_features, int n_clusters, const float* sample_weights, - const float* l2norm_x, const float* centroids, + const int* labels, float* new_centroids, - float* weight_per_cluster, - raft::distance::DistanceType metric, - int batch_samples, - int batch_centroids) + float* weight_per_cluster) { update_centroids(handle, X, @@ -41,13 +38,10 @@ void update_centroids(raft::handle_t const& handle, n_features, n_clusters, sample_weights, - l2norm_x, centroids, + labels, new_centroids, - weight_per_cluster, - metric, - batch_samples, - batch_centroids); + weight_per_cluster); } } // namespace raft::cluster::kmeans::runtime \ No newline at end of file diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index ab267e7b95..50337ee0e7 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -26,8 +26,6 @@ from cython.operator cimport dereference as deref from libcpp cimport bool from libcpp cimport nullptr -from pylibraft.distance.distance_type cimport DistanceType - from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t @@ -50,13 +48,10 @@ cdef extern from "raft_distance/kmeans.hpp" \ int n_features, int n_clusters, const double *sample_weights, - const double *l2norm_x, const double *centroids, + const int* labels, double *new_centroids, - double *weight_per_cluster, - DistanceType metric, - int batch_samples, - int batch_centroids) + double *weight_per_cluster) cdef void update_centroids( const handle_t& handle, @@ -65,23 +60,18 @@ cdef extern from "raft_distance/kmeans.hpp" \ int n_features, int n_clusters, const float *sample_weights, - const float *l2norm_x, const float *centroids, + const int* labels, float *new_centroids, - float *weight_per_cluster, - DistanceType metric, - int batch_samples, - int batch_centroids) + float *weight_per_cluster) -def compute_new_centroids(X, centroids, +def compute_new_centroids(X, + centroids, + labels, new_centroids, sample_weights=None, - l2norm_x=None, weight_per_cluster=None, - batch_samples=None, - batch_centroids=None, - metric="euclidean", handle=None): """ Compute new centroids given an input matrix and existing centroids @@ -95,12 +85,12 @@ def compute_new_centroids(X, centroids, X : Input CUDA array interface compliant matrix shape (m, k) centroids : Input CUDA array interface compliant matrix shape (n_clusters, k) + labels : Input CUDA array interface compliant matrix shape + (m, 1) new_centroids : Writable CUDA array interface compliant matrix shape (n_clusters, k) sample_weights : Optional input CUDA array interface compliant matrix shape (n_clusters, 1) default: None - l2norm_x : Optional input CUDA array interface compliant matrix shape - (m, 1) default: None weight_per_cluster : Optional writable CUDA array interface compliant matrix shape (n_clusters, 1) default: None batch_samples : Optional integer specifying the batch size for X to compute @@ -134,6 +124,9 @@ def compute_new_centroids(X, centroids, centroids = cp.random.random_sample((n_clusters, n_features), dtype=cp.float32) + labels = cp.random.randint(0, high=n_clusters, size=n_samples, + dtype=cp.int32) + new_centroids = cp.empty((n_clusters, n_features), dtype=cp.float32) compute_new_centroids(X, centroids, new_centroids, handle=handle) @@ -146,23 +139,19 @@ def compute_new_centroids(X, centroids, x_cai = X.__cuda_array_interface__ centroids_cai = centroids.__cuda_array_interface__ new_centroids_cai = new_centroids.__cuda_array_interface__ + labels_cai = labels.__cuda_array_interface__ m = x_cai["shape"][0] x_k = x_cai["shape"][1] n_clusters = centroids_cai["shape"][0] - if batch_samples is None: - batch_samples = m - - if batch_centroids is None: - batch_centroids = n_clusters - centroids_k = centroids_cai["shape"][1] new_centroids_k = centroids_cai["shape"][1] x_dt = np.dtype(x_cai["typestr"]) centroids_dt = np.dtype(centroids_cai["typestr"]) new_centroids_dt = np.dtype(new_centroids_cai["typestr"]) + labels_dt = np.dtype(labels_cai["typestr"]) if x_k != centroids_k: raise ValueError("Inputs must have same number of columns. " @@ -171,6 +160,7 @@ def compute_new_centroids(X, centroids, x_ptr = x_cai["data"][0] centroids_ptr = centroids_cai["data"][0] new_centroids_ptr = new_centroids_cai["data"][0] + labels_ptr = labels_cai["data"][0] if sample_weights is not None: sample_weights_cai = sample_weights.__cuda_array_interface__ @@ -179,13 +169,6 @@ def compute_new_centroids(X, centroids, else: sample_weights_ptr = nullptr - if l2norm_x is not None: - l2norm_x_cai = l2norm_x.__cuda_array_interface__ - l2norm_x_ptr = l2norm_x_cai["data"][0] - l2norm_x_dt = np.dtype(l2norm_x_cai["typestr"]) - else: - l2norm_x_ptr = nullptr - if weight_per_cluster is not None: weight_per_cluster_cai = weight_per_cluster.__cuda_array_interface__ weight_per_cluster_ptr = weight_per_cluster_cai["data"][0] @@ -204,8 +187,6 @@ def compute_new_centroids(X, centroids, or not new_centroids_c_contiguous: raise ValueError("Inputs must all be c contiguous") - cdef DistanceType distance_type = DISTANCE_TYPES[metric] - if x_dt != centroids_dt or x_dt != new_centroids_dt: raise ValueError("Inputs must all have the same dtypes " "(float32 or float64)") @@ -217,13 +198,10 @@ def compute_new_centroids(X, centroids, x_k, n_clusters, sample_weights_ptr, - l2norm_x_ptr, centroids_ptr, + labels_ptr, new_centroids_ptr, - weight_per_cluster_ptr, - distance_type, - batch_samples, - batch_centroids) + weight_per_cluster_ptr) elif x_dt == np.float64: update_centroids(deref(h), x_ptr, @@ -231,12 +209,9 @@ def compute_new_centroids(X, centroids, x_k, n_clusters, sample_weights_ptr, - l2norm_x_ptr, centroids_ptr, + labels_ptr, new_centroids_ptr, - weight_per_cluster_ptr, - distance_type, - batch_samples, - batch_centroids) + weight_per_cluster_ptr) else: raise ValueError("dtype %s not supported" % x_dt) diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index e2f7b4a5a7..c45359d316 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -44,9 +44,7 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, centroids = X[:n_clusters] centroids_device = TestDeviceBuffer(centroids, order) - l2norm_x = np.sum(X**2, axis=1) - l2norm_x_device = TestDeviceBuffer(l2norm_x, order) \ - if additional_args else None + weight_per_cluster = np.zeros((n_clusters, ), dtype=dtype) weight_per_cluster_device = TestDeviceBuffer(weight_per_cluster, order) \ @@ -65,7 +63,9 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, pairwise_distance(X_device, centroids_device, dists_device, metric=metric) handle.sync() - labels = np.argmin(dists_device.copy_to_host(), axis=1) + labels = np.argmin(dists_device.copy_to_host(), axis=1).astype(np.int32) + labels_device = TestDeviceBuffer(labels, order) + expected_centers = np.empty((n_clusters, n_cols), dtype=dtype) expected_wX = X * sample_weights.reshape((-1, 1)) for i in range(n_clusters): @@ -76,13 +76,10 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, compute_new_centroids(X_device, centroids_device, + labels_device, new_centroids_device, sample_weights=sample_weights_device, - l2norm_x=l2norm_x_device, weight_per_cluster=weight_per_cluster_device, - batch_samples=n_rows/2, - batch_centroids=n_clusters/2, - metric=metric, handle=handle) # pylibraft functions are often asynchronous so the From e0ed9c0fad4452d5573ae69324e040db480ae183 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 26 Oct 2022 11:01:53 -0400 Subject: [PATCH 20/27] Adding some vlaidation --- python/pylibraft/pylibraft/cluster/kmeans.pyx | 12 ++-- .../pylibraft/common/input_validation.py | 58 +++++++++++++++++++ 2 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 python/pylibraft/pylibraft/common/input_validation.py diff --git a/python/pylibraft/pylibraft/cluster/kmeans.pyx b/python/pylibraft/pylibraft/cluster/kmeans.pyx index 50337ee0e7..c2d445f970 100644 --- a/python/pylibraft/pylibraft/cluster/kmeans.pyx +++ b/python/pylibraft/pylibraft/cluster/kmeans.pyx @@ -28,7 +28,7 @@ from libcpp cimport nullptr from pylibraft.common import Handle from pylibraft.common.handle cimport handle_t - +from pylibraft.common.input_validation import * from pylibraft.distance import DISTANCE_TYPES @@ -153,9 +153,11 @@ def compute_new_centroids(X, new_centroids_dt = np.dtype(new_centroids_cai["typestr"]) labels_dt = np.dtype(labels_cai["typestr"]) - if x_k != centroids_k: - raise ValueError("Inputs must have same number of columns. " - "a=%s, b=%s" % (x_k, centroids_k)) + if not do_cols_match(X, centroids): + raise ValueError("X and centroids must have same number of columns.") + + if not do_rows_match(X, labels): + raise ValueError("X and labels must have same number of rows") x_ptr = x_cai["data"][0] centroids_ptr = centroids_cai["data"][0] @@ -187,7 +189,7 @@ def compute_new_centroids(X, or not new_centroids_c_contiguous: raise ValueError("Inputs must all be c contiguous") - if x_dt != centroids_dt or x_dt != new_centroids_dt: + if not do_dtypes_match(X, centroids, new_centroids): raise ValueError("Inputs must all have the same dtypes " "(float32 or float64)") diff --git a/python/pylibraft/pylibraft/common/input_validation.py b/python/pylibraft/pylibraft/common/input_validation.py new file mode 100644 index 0000000000..fb4e4d81a0 --- /dev/null +++ b/python/pylibraft/pylibraft/common/input_validation.py @@ -0,0 +1,58 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + + +def do_dtypes_match(*cais): + last_dtype = cais[0].__cuda_array_interface__["typestr"] + for cai in cais: + typestr = cai.__cuda_array_interface__["typestr"] + if last_dtype != typestr: + return False + last_dtype = typestr + return True + + +def do_rows_match(*cais): + last_row = cais[0].__cuda_array_interface__["shape"][0] + for cai in cais: + rows = cai.__cuda_array_interface__["shape"][0] + if last_row != rows: + return False + last_row = rows + return True + + +def do_cols_match(*cais): + last_col = cais[0].__cuda_array_interface__["shape"][1] + for cai in cais: + cols = cai.__cuda_array_interface__["shape"][1] + if last_col != cols: + return False + last_col = cols + return True + +def do_shapes_match(*cais): + last_shape = cais[0].__cuda_array_interface__["shape"] + for cai in cais: + shape = cai.__cuda_array_interface__["shape"] + if last_shape != shape: + return False + last_shape = shape + return True From c717d61bfe20b65ba326a52d3735cc1274cab638 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 26 Oct 2022 12:16:45 -0400 Subject: [PATCH 21/27] Fixing style --- python/pylibraft/pylibraft/common/input_validation.py | 1 + python/pylibraft/pylibraft/test/test_kmeans.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pylibraft/pylibraft/common/input_validation.py b/python/pylibraft/pylibraft/common/input_validation.py index fb4e4d81a0..d5556a79dc 100644 --- a/python/pylibraft/pylibraft/common/input_validation.py +++ b/python/pylibraft/pylibraft/common/input_validation.py @@ -48,6 +48,7 @@ def do_cols_match(*cais): last_col = cols return True + def do_shapes_match(*cais): last_shape = cais[0].__cuda_array_interface__["shape"] for cai in cais: diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py index c45359d316..1d65470a82 100644 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ b/python/pylibraft/pylibraft/test/test_kmeans.py @@ -44,8 +44,6 @@ def test_compute_new_centroids(n_rows, n_cols, metric, n_clusters, dtype, centroids = X[:n_clusters] centroids_device = TestDeviceBuffer(centroids, order) - - weight_per_cluster = np.zeros((n_clusters, ), dtype=dtype) weight_per_cluster_device = TestDeviceBuffer(weight_per_cluster, order) \ if additional_args else None From d1173ca9f7af6175cd2ec699a02172f600f4e7b1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 27 Oct 2022 12:04:28 -0400 Subject: [PATCH 22/27] Fixing doc issue --- cpp/include/raft/cluster/kmeans.cuh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 17a8467274..0c36dd19c0 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -409,7 +409,6 @@ void min_cluster_distance(const raft::handle_t& handle, * @tparam IndexT the type of data used for indexing. * * @param[in] handle The raft handle - * @param[in] params The parameters for KMeans * @param[in] X The data in row-major format * [dim = n_samples x n_features] * @param[in] centroids Centroids data @@ -421,7 +420,10 @@ void min_cluster_distance(const raft::handle_t& handle, * [dim = n_samples] * @param[out] L2NormBuf_OR_DistBuf Resizable buffer to store L2 norm of centroids or distance * matrix - * @param[in] workspace Temporary workspace buffer which can get resized + * @param[in] metric distance metric + * @param[in] batch_samples batch size of data samples + * @param[in] batch_centroids batch size of centroids + * @param[in] workspace Temporary workspace buffer which can get resized * */ template From 4ad7294b14d0e9c8039f5ca4d77864638bdb8889 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 27 Oct 2022 13:51:17 -0400 Subject: [PATCH 23/27] Removing record.txt from raft-dask --- .gitignore | 2 +- python/raft-dask/record.txt | 24 ------------------------ 2 files changed, 1 insertion(+), 25 deletions(-) delete mode 100644 python/raft-dask/record.txt diff --git a/.gitignore b/.gitignore index 1c37c197dd..22c0e8a4a0 100644 --- a/.gitignore +++ b/.gitignore @@ -17,7 +17,7 @@ build/ build_prims/ dist/ python/**/**/*.cpp -python/raft/record.txt +python/raft-dask/record.txt python/pylibraft/record.txt log .ipynb_checkpoints diff --git a/python/raft-dask/record.txt b/python/raft-dask/record.txt deleted file mode 100644 index 9856937ae6..0000000000 --- a/python/raft-dask/record.txt +++ /dev/null @@ -1,24 +0,0 @@ -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/__init__.py -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/_version.py -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/include_test/__init__.py -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__init__.py -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/comms.py -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/utils.py -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/ucx.py -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/include_test/raft_include_test.cpython-39-x86_64-linux-gnu.so -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__init__.pxd -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/comms_utils.cpython-39-x86_64-linux-gnu.so -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/nccl.cpython-39-x86_64-linux-gnu.so -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/__pycache__/__init__.cpython-39.pyc -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/__pycache__/_version.cpython-39.pyc -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/include_test/__pycache__/__init__.cpython-39.pyc -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/__init__.cpython-39.pyc -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/comms.cpython-39.pyc -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/utils.cpython-39.pyc -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask/common/__pycache__/ucx.cpython-39.pyc -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/top_level.txt -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/PKG-INFO -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/SOURCES.txt -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/requires.txt -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/dependency_links.txt -/home/cjnolet/miniconda3/envs/cuml_2210_082622/lib/python3.9/site-packages/raft_dask-22.10.0a0+46.gd9c7aa979-py3.9.egg-info/not-zip-safe From 5dd37453aa58767c0532cf07694b03cccc550319 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 27 Oct 2022 19:24:25 -0400 Subject: [PATCH 24/27] Updating docs --- cpp/include/raft/cluster/kmeans.cuh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 0c36dd19c0..034de85824 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -362,7 +362,6 @@ void update_centroids(const raft::handle_t& handle, * @tparam IndexT the type of data used for indexing. * * @param[in] handle The raft handle - * @param[in] params The parameters for KMeans * @param[in] X The data in row-major format * [dim = n_samples x n_features] * @param[in] centroids Centroids data @@ -373,6 +372,9 @@ void update_centroids(const raft::handle_t& handle, * [dim = n_samples] * @param[out] L2NormBuf_OR_DistBuf Resizable buffer to store L2 norm of centroids or distance * matrix + * @param[in] metric Distance metric to use + * @param[in] batch_samples batch size for input data samples + * @param[in] batch_centroids batch size for input centroids * @param[in] workspace Temporary workspace buffer which can get resized * */ From fa2c8edc9b7e7908e3a44b2d0e498033f2fbc750 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 27 Oct 2022 19:26:44 -0400 Subject: [PATCH 25/27] Style --- cpp/include/raft/cluster/detail/kmeans_common.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/cluster/detail/kmeans_common.cuh b/cpp/include/raft/cluster/detail/kmeans_common.cuh index 19045b2c58..bf8b276f4b 100644 --- a/cpp/include/raft/cluster/detail/kmeans_common.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_common.cuh @@ -357,7 +357,7 @@ void minClusterAndDistanceCompute( // todo(lsugy): change batch size computation when using fusedL2NN! bool is_fused = metric == raft::distance::DistanceType::L2Expanded || metric == raft::distance::DistanceType::L2SqrtExpanded; - auto dataBatchSize = is_fused ? (IndexT)n_samples : getDataBatchSize(batch_samples, n_samples); + auto dataBatchSize = is_fused ? (IndexT)n_samples : getDataBatchSize(batch_samples, n_samples); auto centroidsBatchSize = getCentroidsBatchSize(batch_centroids, n_clusters); if (is_fused) { @@ -492,7 +492,7 @@ void minClusterDistanceCompute(const raft::handle_t& handle, bool is_fused = metric == raft::distance::DistanceType::L2Expanded || metric == raft::distance::DistanceType::L2SqrtExpanded; - auto dataBatchSize = is_fused ? (IndexT)n_samples : getDataBatchSize(batch_samples, n_samples); + auto dataBatchSize = is_fused ? (IndexT)n_samples : getDataBatchSize(batch_samples, n_samples); auto centroidsBatchSize = getCentroidsBatchSize(batch_centroids, n_clusters); if (is_fused) { From 4d8c0f51e4a0cc8af3b2281d880ff3835c1b24fd Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 27 Oct 2022 19:30:27 -0400 Subject: [PATCH 26/27] Fixing docs --- cpp/include/raft/cluster/kmeans.cuh | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 5d6a850904..3cf8c2f985 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -327,16 +327,10 @@ void cluster_cost(const raft::handle_t& handle, * @param[in] handle: Raft handle to use for managing library resources * @param[in] X: input matrix (size n_samples, n_features) * @param[in] sample_weights: number of samples currently assigned to each centroid (size n_samples) - * @param[in] l2norm_x: optional array of l2 norms for each input data sample (size n_samples) * @param[in] centroids: matrix of current centroids (size n_clusters, n_features) - * @param[out] min_cluster_and_dist: output vector to store key/value pairs of min cluster indices - * and distances (size n_clusters) - * @param[out] new_centroids: output matrix of updated centroids (size n_clusters, n_features) + * @param[in] labels: Iterator of labels (can also be a raw pointer) * @param[out] weight_per_cluster: sum of sample weights per cluster (size n_clusters) - * @param[in] metric: distance metric to use. Must be either L2Expanded, L2SqrtExpanded, - * L2Unexpanded, or L2SqrtUnexpanded - * @param[in] batch_samples: batch size for data samples when computing distances - * @param[in] batch_centroids: batch size for centroids when computing distances + * @param[out] new_centroids: output matrix of updated centroids (size n_clusters, n_features) */ template void update_centroids(const raft::handle_t& handle, @@ -413,7 +407,7 @@ void min_cluster_distance(const raft::handle_t& handle, * @param[in] handle The raft handle * @param[in] X The data in row-major format * [dim = n_samples x n_features] - * @param[in] centroids Centroids data +-c * @param[in] centroids Centroids data * [dim = n_cluster x n_features] * @param[out] minClusterAndDistance Distance vector that contains for every sample, the nearest * centroid and it's distance @@ -467,6 +461,7 @@ void min_cluster_and_distance( * [dim = n_samples_to_gather x n_features] * @param[in] n_samples_to_gather Number of sample to gather * @param[in] seed Seed for the shuffle + * @param[in] workspace Temporary workspace buffer which can get resized * */ template @@ -474,9 +469,10 @@ void shuffle_and_gather(const raft::handle_t& handle, raft::device_matrix_view in, raft::device_matrix_view out, uint32_t n_samples_to_gather, - uint64_t seed) + uint64_t seed, + rmm::device_uvector* workspace = nullptr) { - detail::shuffleAndGather(handle, in, out, n_samples_to_gather, seed); + detail::shuffleAndGather(handle, in, out, n_samples_to_gather, seed, workspace); } /** @@ -954,6 +950,7 @@ void minClusterAndDistanceCompute( * [dim = n_samples_to_gather x n_features] * @param[in] n_samples_to_gather Number of sample to gather * @param[in] seed Seed for the shuffle + * @param[in] workspace Temporary workspace buffer which can get resized * */ template @@ -961,9 +958,10 @@ void shuffleAndGather(const raft::handle_t& handle, raft::device_matrix_view in, raft::device_matrix_view out, uint32_t n_samples_to_gather, - uint64_t seed) + uint64_t seed, + rmm::device_uvector* workspace = nullptr) { - kmeans::shuffle_and_gather(handle, in, out, n_samples_to_gather, seed); + kmeans::shuffle_and_gather(handle, in, out, n_samples_to_gather, seed, workspace); } /** From e98bbdfc52cb063fda8aa3ce55a984d45cd3b715 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 28 Oct 2022 13:54:06 -0400 Subject: [PATCH 27/27] Review feedback --- cpp/include/raft/cluster/detail/kmeans_common.cuh | 2 +- cpp/include/raft/cluster/kmeans.cuh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/cluster/detail/kmeans_common.cuh b/cpp/include/raft/cluster/detail/kmeans_common.cuh index bf8b276f4b..2973be8c23 100644 --- a/cpp/include/raft/cluster/detail/kmeans_common.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_common.cuh @@ -372,7 +372,7 @@ void minClusterAndDistanceCompute( } else { // TODO: Unless pool allocator is used, passing in a workspace for this // isn't really increasing performance because this needs to do a re-allocation - // anyways. + // anyways. ref https://github.com/rapidsai/raft/issues/930 L2NormBuf_OR_DistBuf.resize(dataBatchSize * centroidsBatchSize, stream); } diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh index 3cf8c2f985..794366e7b9 100644 --- a/cpp/include/raft/cluster/kmeans.cuh +++ b/cpp/include/raft/cluster/kmeans.cuh @@ -343,6 +343,7 @@ void update_centroids(const raft::handle_t& handle, { // TODO: Passing these into the algorithm doesn't really present much of a benefit // because they are being resized anyways. + // ref https://github.com/rapidsai/raft/issues/930 rmm::device_uvector workspace(0, handle.get_stream()); detail::update_centroids(