Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix UMAP issues with large inputs #6245

Open
wants to merge 11 commits into
base: branch-25.02
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/include/cuml/manifold/common.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,8 @@

#pragma once

#include <stdint.h>

namespace ML {

// Dense input uses int64_t until FAISS is updated
Expand Down Expand Up @@ -55,8 +57,8 @@ struct knn_graph {
template <typename T>
struct manifold_inputs_t {
T* y;
int n;
int d;
uint64_t n;
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
uint64_t d;

manifold_inputs_t(T* y_, int n_, int d_) : y(y_), n(n_), d(d_) {}

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/tsne/tsne_runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#include <cuvs/distance/distance.hpp>
#include <pca/pca.cuh>
#include <stdint.h>

namespace ML {

Expand Down Expand Up @@ -167,7 +168,7 @@ class TSNE_runner {
{
distance_and_perplexity();

const auto NNZ = COO_Matrix.nnz;
const auto NNZ = static_cast<value_idx>(COO_Matrix.nnz);
auto* VAL = COO_Matrix.vals();
const auto* COL = COO_Matrix.cols();
const auto* ROW = COO_Matrix.rows();
Expand Down
32 changes: 18 additions & 14 deletions cpp/src/umap/fuzzy_simpl_set/naive.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include <cuda_runtime.h>

#include <stdint.h>
#include <stdio.h>

#include <string>
Expand Down Expand Up @@ -92,7 +93,8 @@ CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists,
{
// row-based matrix 1 thread per row
int row = (blockIdx.x * TPB_X) + threadIdx.x;
int i = row * n_neighbors; // each thread processes one row of the dist matrix
uint64_t i =
static_cast<uint64_t>(row) * n_neighbors; // each thread processes one row of the dist matrix

if (row < n) {
float target = __log2f(n_neighbors) * bandwidth;
Expand Down Expand Up @@ -190,7 +192,7 @@ CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists,
*
* Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py
*/
template <int TPB_X, typename value_idx, typename value_t>
template <uint64_t TPB_X, typename value_idx, typename value_t>
CUML_KERNEL void compute_membership_strength_kernel(
const value_idx* knn_indices,
const float* knn_dists, // nn outputs
Expand All @@ -199,14 +201,14 @@ CUML_KERNEL void compute_membership_strength_kernel(
value_t* vals,
int* rows,
int* cols, // result coo
int n,
int n_neighbors)
int n_neighbors,
uint64_t to_process)
{ // model params

// row-based matrix is best
int idx = (blockIdx.x * TPB_X) + threadIdx.x;
uint64_t idx = (blockIdx.x * TPB_X) + threadIdx.x;

if (idx < n * n_neighbors) {
if (idx < to_process) {
int row = idx / n_neighbors; // one neighbor per thread

double cur_rho = rhos[row];
Expand Down Expand Up @@ -237,8 +239,8 @@ CUML_KERNEL void compute_membership_strength_kernel(
/*
* Sets up and runs the knn dist smoothing
*/
template <int TPB_X, typename value_idx, typename value_t>
void smooth_knn_dist(int n,
template <uint64_t TPB_X, typename value_idx, typename value_t>
void smooth_knn_dist(uint64_t n,
const value_idx* knn_indices,
const float* knn_dists,
value_t* rhos,
Expand All @@ -253,7 +255,8 @@ void smooth_knn_dist(int n,

rmm::device_uvector<value_t> dist_means_dev(n_neighbors, stream);

raft::stats::mean(dist_means_dev.data(), knn_dists, 1, n_neighbors * n, false, false, stream);
raft::stats::mean(
dist_means_dev.data(), knn_dists, uint64_t{1}, n * n_neighbors, false, false, stream);
RAFT_CUDA_TRY(cudaPeekAtLastError());

value_t mean_dist = 0.0;
Expand Down Expand Up @@ -284,8 +287,8 @@ void smooth_knn_dist(int n,
* @param params UMAPParams config object
* @param stream cuda stream to use for device operations
*/
template <int TPB_X, typename value_idx, typename value_t>
void launcher(int n,
template <uint64_t TPB_X, typename value_idx, typename value_t>
void launcher(uint64_t n,
const value_idx* knn_indices,
const value_t* knn_dists,
int n_neighbors,
Expand Down Expand Up @@ -328,7 +331,8 @@ void launcher(int n,
* Compute graph of membership strengths
*/

dim3 grid_elm(raft::ceildiv(n * n_neighbors, TPB_X), 1, 1);
uint64_t to_process = static_cast<uint64_t>(in.n_rows) * n_neighbors;
dim3 grid_elm(raft::ceildiv(to_process, TPB_X), 1, 1);
dim3 blk_elm(TPB_X, 1, 1);

compute_membership_strength_kernel<TPB_X><<<grid_elm, blk_elm, 0, stream>>>(knn_indices,
Expand All @@ -338,8 +342,8 @@ void launcher(int n,
in.vals(),
in.rows(),
in.cols(),
in.n_rows,
n_neighbors);
n_neighbors,
to_process);
RAFT_CUDA_TRY(cudaPeekAtLastError());

if (ML::default_logger().should_log(ML::level_enum::debug)) {
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/umap/init_embed/random_algo.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,14 +20,16 @@

#include <raft/random/rng.cuh>

#include <stdint.h>

namespace UMAPAlgo {
namespace InitEmbed {
namespace RandomInit {

using namespace ML;

template <typename T>
void launcher(int n, int d, UMAPParams* params, T* embedding, cudaStream_t stream)
void launcher(uint64_t n, int d, UMAPParams* params, T* embedding, cudaStream_t stream)
{
uint64_t seed = params->random_state;

Expand Down
9 changes: 6 additions & 3 deletions cpp/src/umap/init_embed/spectral_algo.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,6 +29,8 @@
#include <thrust/execution_policy.h>
#include <thrust/extrema.h>

#include <stdint.h>

#include <iostream>

namespace UMAPAlgo {
Expand All @@ -44,15 +46,16 @@ using namespace ML;
*/
template <typename T>
void launcher(const raft::handle_t& handle,
int n,
uint64_t n,
int d,
raft::sparse::COO<float>* coo,
UMAPParams* params,
T* embedding)
{
cudaStream_t stream = handle.get_stream();

ASSERT(n > params->n_components, "Spectral layout requires n_samples > n_components");
ASSERT(n > static_cast<uint64_t>(params->n_components),
"Spectral layout requires n_samples > n_components");

rmm::device_uvector<T> tmp_storage(n * params->n_components, stream);

Expand Down
8 changes: 5 additions & 3 deletions cpp/src/umap/knn_graph/algo.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,6 +35,7 @@

#include <cuvs/distance/distance.hpp>
#include <cuvs/neighbors/brute_force.hpp>
#include <stdint.h>

#include <iostream>

Expand Down Expand Up @@ -126,11 +127,12 @@ inline void launcher(const raft::handle_t& handle,

RAFT_EXPECTS(graph.distances().has_value(),
"return_distances for nn descent should be set to true to be used for UMAP");
auto out_knn_dists_view = raft::make_device_matrix_view(out.knn_dists, inputsA.n, n_neighbors);
auto out_knn_dists_view =
raft::make_device_matrix_view(out.knn_dists, inputsA.n, static_cast<uint64_t>(n_neighbors));
raft::matrix::slice<float, int64_t, raft::row_major>(
handle, raft::make_const_mdspan(graph.distances().value()), out_knn_dists_view, coords);
auto out_knn_indices_view =
raft::make_device_matrix_view(out.knn_indices, inputsA.n, n_neighbors);
raft::make_device_matrix_view(out.knn_indices, inputsA.n, static_cast<uint64_t>(n_neighbors));
raft::matrix::slice<int64_t, int64_t, raft::row_major>(
handle, raft::make_const_mdspan(indices_d.view()), out_knn_indices_view, coords);
}
Expand Down
18 changes: 8 additions & 10 deletions cpp/src/umap/runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include <thrust/scan.h>
#include <thrust/system/cuda/execution_policy.h>

#include <stdint.h>

#include <memory>

namespace UMAPAlgo {
Expand Down Expand Up @@ -348,7 +350,7 @@ void _fit_supervised(const raft::handle_t& handle,
/**
*
*/
template <typename value_idx, typename value_t, typename umap_inputs, int TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, uint64_t TPB_X>
void _transform(const raft::handle_t& handle,
const umap_inputs& inputs,
umap_inputs& orig_x_inputs,
Expand Down Expand Up @@ -425,7 +427,7 @@ void _transform(const raft::handle_t& handle,
* Compute graph of membership strengths
*/

int nnz = inputs.n * params->n_neighbors;
uint64_t nnz = uint64_t{inputs.n} * params->n_neighbors;

dim3 grid_nnz(raft::ceildiv(nnz, TPB_X), 1, 1);

Expand All @@ -437,6 +439,7 @@ void _transform(const raft::handle_t& handle,

raft::sparse::COO<value_t> graph_coo(stream, nnz, inputs.n, inputs.n);

uint64_t to_process = static_cast<uint64_t>(graph_coo.n_rows) * params->n_neighbors;
FuzzySimplSetImpl::compute_membership_strength_kernel<TPB_X>
<<<grid_nnz, blk, 0, stream>>>(knn_graph.knn_indices,
knn_graph.knn_dists,
Expand All @@ -445,15 +448,13 @@ void _transform(const raft::handle_t& handle,
graph_coo.vals(),
graph_coo.rows(),
graph_coo.cols(),
graph_coo.n_rows,
params->n_neighbors);
params->n_neighbors,
to_process);
RAFT_CUDA_TRY(cudaPeekAtLastError());

rmm::device_uvector<int> row_ind(inputs.n, stream);
rmm::device_uvector<int> ia(inputs.n, stream);
rmm::device_uvector<uint64_t> row_ind(inputs.n, stream);

raft::sparse::convert::sorted_coo_to_csr(&graph_coo, row_ind.data(), stream);
raft::sparse::linalg::coo_degree(&graph_coo, ia.data(), stream);

rmm::device_uvector<value_t> vals_normed(graph_coo.nnz, stream);
RAFT_CUDA_TRY(cudaMemsetAsync(vals_normed.data(), 0, graph_coo.nnz * sizeof(value_t), stream));
Expand All @@ -471,9 +472,6 @@ void _transform(const raft::handle_t& handle,
params->n_components,
transformed,
params->n_neighbors);
RAFT_CUDA_TRY(cudaPeekAtLastError());

RAFT_CUDA_TRY(cudaMemsetAsync(ia.data(), 0.0, ia.size() * sizeof(int), stream));

RAFT_CUDA_TRY(cudaPeekAtLastError());

Expand Down
Loading
Loading