Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-25.02 into branch-25.04 #6285

Merged
merged 3 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
# Please keep pr-builder as the top job here
pr-builder:
needs:
- check-nightly-ci
- changed-files
- checks
- clang-tidy
Expand Down Expand Up @@ -43,6 +44,18 @@ jobs:
- name: Telemetry setup
if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
check-nightly-ci:
# Switch to ubuntu-latest once it defaults to a version of Ubuntu that
# provides at least Python 3.11 (see
# https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat)
runs-on: ubuntu-24.04
env:
RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Check if nightly CI is passing
uses: rapidsai/shared-actions/check_nightly_success/dispatch@main
with:
repo: cuml
changed-files:
secrets: inherit
needs: telemetry-setup
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ dependencies:
- sphinx-markdown-tables
- statsmodels
- sysroot_linux-64==2.28
- treelite==4.3.0
- treelite==4.4.1
- umap-learn==0.5.6
- xgboost>=2.1.0
name: all_cuda-118_arch-x86_64
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ dependencies:
- sphinx-markdown-tables
- statsmodels
- sysroot_linux-64==2.28
- treelite==4.3.0
- treelite==4.4.1
- umap-learn==0.5.6
- xgboost>=2.1.0
name: all_cuda-128_arch-x86_64
2 changes: 1 addition & 1 deletion conda/recipes/cuml/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ c_stdlib_version:
- "=2.28"

treelite_version:
- "=4.3.0"
- "=4.4.1"
2 changes: 1 addition & 1 deletion conda/recipes/libcuml/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ spdlog_version:
- ">=1.14.1,<1.15"

treelite_version:
- "=4.3.0"
- "=4.4.1"

# The CTK libraries below are missing from the conda-forge::cudatoolkit package
# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
Expand Down
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/get_treelite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ function(find_and_configure_treelite)
rapids_export_find_package_root(BUILD Treelite [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuml-exports)
endfunction()

find_and_configure_treelite(VERSION 4.3.0
PINNED_TAG 575e4208f2b18e40d818c338ecb95d7a26e69aab
find_and_configure_treelite(VERSION 4.4.1
PINNED_TAG 386bd0de99f5a66584c7e58221ee38ce606ad1ae
EXCLUDE_FROM_ALL ${CUML_EXCLUDE_TREELITE_FROM_ALL}
BUILD_STATIC_LIBS ${CUML_USE_TREELITE_STATIC})
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- &cython cython>=3.0.0
- &treelite treelite==4.3.0
- &treelite treelite==4.4.1

py_run_cuml:
common:
Expand Down
7 changes: 6 additions & 1 deletion python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ np = cpu_only_import('numpy')
from cuml.internals.safe_imports import gpu_only_import
rmm = gpu_only_import('rmm')
from cuml.internals.safe_imports import safe_import_from, return_false
from cuml.internals.utils import check_random_seed
import typing

IF GPUBUILD == 1:
Expand Down Expand Up @@ -209,8 +210,11 @@ class KMeans(UniversalBase,
params.init = self._params_init
params.max_iter = <int>self.max_iter
params.tol = <double>self.tol
# After transferring from one device to another `_seed` might not be set
# so we need to pass a dummy value here. Its value does not matter as the
# seed is only used during fitting
params.rng_state.seed = <int>getattr(self, "_seed", 0)
params.verbosity = <raft_level_enum>(<int>self.verbose)
params.rng_state.seed = self.random_state
params.metric = DistanceType.L2Expanded # distance metric as squared L2: @todo - support other metrics # noqa: E501
params.batch_samples = <int>self.max_samples_per_batch
params.oversampling_factor = <double>self.oversampling_factor
Expand Down Expand Up @@ -307,6 +311,7 @@ class KMeans(UniversalBase,
else None),
check_dtype=check_dtype)

self._seed = check_random_seed(self.random_state)
self.feature_names_in_ = _X_m.index

IF GPUBUILD == 1:
Expand Down
5 changes: 4 additions & 1 deletion python/cuml/cuml/cluster/kmeans_mg.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,6 +32,7 @@ from cuml.common import input_to_cuml_array

from cuml.cluster import KMeans
from cuml.cluster.kmeans_utils cimport params as KMeansParams
from cuml.internals.utils import check_random_seed


cdef extern from "cuml/cluster/kmeans_mg.hpp" \
Expand Down Expand Up @@ -129,6 +130,8 @@ class KMeansMG(KMeans):

cdef uintptr_t sample_weight_ptr = sample_weight_m.ptr

self._seed = check_random_seed(self.random_state)

if (self.init in ['scalable-k-means++', 'k-means||', 'random']):
self.cluster_centers_ = CumlArray.zeros(shape=(self.n_clusters,
self.n_cols),
Expand Down
10 changes: 3 additions & 7 deletions python/cuml/cuml/decomposition/pca.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -209,9 +209,6 @@ class PCA(UniversalBase,

``n_components = min(n_samples, n_features)``

random_state : int / None (default = None)
If you want results to be the same when you restart Python, select a
state.
svd_solver : 'full' or 'jacobi' or 'auto' (default = 'full')
Full uses a eigendecomposition of the covariance matrix then discards
components.
Expand Down Expand Up @@ -292,7 +289,7 @@ class PCA(UniversalBase,

@device_interop_preparation
def __init__(self, *, copy=True, handle=None, iterated_power=15,
n_components=None, random_state=None, svd_solver='auto',
n_components=None, svd_solver='auto',
tol=1e-7, verbose=False, whiten=False,
output_type=None):
# parameters
Expand All @@ -302,7 +299,6 @@ class PCA(UniversalBase,
self.copy = copy
self.iterated_power = iterated_power
self.n_components = n_components
self.random_state = random_state
self.svd_solver = svd_solver
self.tol = tol
self.whiten = whiten
Expand Down Expand Up @@ -739,7 +735,7 @@ class PCA(UniversalBase,
def _get_param_names(cls):
return super()._get_param_names() + \
["copy", "iterated_power", "n_components", "svd_solver", "tol",
"whiten", "random_state"]
"whiten"]

def _check_is_fitted(self, attr):
if not hasattr(self, attr) or (getattr(self, attr) is None):
Expand Down
3 changes: 2 additions & 1 deletion python/cuml/cuml/ensemble/randomforestclassifier.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import cuml.internals
from cuml.common.doc_utils import generate_docstring
from cuml.common.doc_utils import insert_into_docstring
from cuml.common import input_to_cuml_array
from cuml.internals.utils import check_random_seed

from cuml.internals.logger cimport level_enum
from cuml.ensemble.randomforest_common import BaseRandomForestModel
Expand Down Expand Up @@ -451,7 +452,7 @@ class RandomForestClassifier(BaseRandomForestModel,
if self.random_state is None:
seed_val = <uintptr_t>NULL
else:
seed_val = <uintptr_t>self.random_state
seed_val = <uintptr_t>check_random_seed(self.random_state)

rf_params = set_rf_params(<int> self.max_depth,
<int> self.max_leaves,
Expand Down
3 changes: 2 additions & 1 deletion python/cuml/cuml/ensemble/randomforestregressor.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ from cuml.internals.logger cimport level_enum
from cuml.common.doc_utils import generate_docstring
from cuml.common.doc_utils import insert_into_docstring
from cuml.common import input_to_cuml_array
from cuml.internals.utils import check_random_seed

from cuml.ensemble.randomforest_common import BaseRandomForestModel
from cuml.ensemble.randomforest_common import _obtain_fil_model
Expand Down Expand Up @@ -438,7 +439,7 @@ class RandomForestRegressor(BaseRandomForestModel,
if self.random_state is None:
seed_val = <uintptr_t>NULL
else:
seed_val = <uintptr_t>self.random_state
seed_val = <uintptr_t>check_random_seed(self.random_state)

rf_params = set_rf_params(<int> self.max_depth,
<int> self.max_leaves,
Expand Down
39 changes: 39 additions & 0 deletions python/cuml/cuml/internals/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import numbers
import numpy as np


def check_random_seed(seed):
"""Turn a np.random.RandomState instance into a seed.
Parameters
----------
seed : None | int | instance of RandomState
If seed is None, return a random int as seed.
If seed is an int, return it.
If seed is a RandomState instance, derive a seed from it.
Otherwise raise ValueError.
"""
if seed is None:
seed = np.random.RandomState(None)

if isinstance(seed, numbers.Integral):
return seed
if isinstance(seed, np.random.RandomState):
return seed.randint(
low=0, high=np.iinfo(np.uint32).max, dtype=np.uint32
)
raise ValueError("%r cannot be used to create a seed." % seed)
4 changes: 2 additions & 2 deletions python/cuml/cuml/manifold/t_sne.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ from cuml.internals.base import UniversalBase
from pylibraft.common.handle cimport handle_t
from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop
from cuml.internals.utils import check_random_seed
from cuml.internals import logger
from cuml.internals cimport logger


from cuml.internals.array import CumlArray
from cuml.internals.array_sparse import SparseCumlArray
from cuml.common.sparse_utils import is_sparse
Expand Down Expand Up @@ -596,7 +596,7 @@ class TSNE(UniversalBase,
def _build_tsne_params(self, algo):
cdef long long seed = -1
if self.random_state is not None:
seed = self.random_state
seed = check_random_seed(self.random_state)

cdef TSNEParams* params = new TSNEParams()
params.dim = <int> self.n_components
Expand Down
Loading
Loading