From 2b5aa3e1d132e3b5441dcff6c1361abcc46be60f Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 26 Oct 2023 07:35:15 +0800
Subject: [PATCH 01/20] Fix type hint in split function. (#5625)

Authors:
  - Jiaming Yuan (https://github.com/trivialfis)

Approvers:
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5625
---
 python/cuml/model_selection/_split.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/python/cuml/model_selection/_split.py b/python/cuml/model_selection/_split.py
index 21800959fd..cb58db4f5f 100644
--- a/python/cuml/model_selection/_split.py
+++ b/python/cuml/model_selection/_split.py
@@ -13,12 +13,15 @@
 # limitations under the License.
 #
 
-from typing import Union
-from cuml.internals.safe_imports import gpu_only_import_from
+from typing import Optional, Union
+
 from cuml.common import input_to_cuml_array
 from cuml.internals.array import array_to_memory_order
-from cuml.internals.safe_imports import cpu_only_import
-from cuml.internals.safe_imports import gpu_only_import
+from cuml.internals.safe_imports import (
+    cpu_only_import,
+    gpu_only_import,
+    gpu_only_import_from,
+)
 
 cudf = gpu_only_import("cudf")
 cp = gpu_only_import("cupy")
@@ -138,7 +141,6 @@ def _stratify_split(
         if hasattr(X, "__cuda_array_interface__") or isinstance(
             X, cupyx.scipy.sparse.csr_matrix
         ):
-
             X_train_i = cp.array(
                 X[perm_indices_class_i[: n_i[i]]], order=x_order
             )
@@ -244,11 +246,11 @@ def _approximate_mode(class_counts, n_draws, rng):
 def train_test_split(
     X,
     y=None,
-    test_size: Union[float, int] = None,
-    train_size: Union[float, int] = None,
+    test_size: Optional[Union[float, int]] = None,
+    train_size: Optional[Union[float, int]] = None,
     shuffle: bool = True,
-    random_state: Union[
-        int, cp.random.RandomState, np.random.RandomState
+    random_state: Optional[
+        Union[int, cp.random.RandomState, np.random.RandomState]
     ] = None,
     stratify=None,
 ):

From ff635fc2530c83729960621fcbea2e7bd7dc2244 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Thu, 26 Oct 2023 22:19:22 +0200
Subject: [PATCH 02/20] Temporarily avoid pydata-sphinx-theme version 0.14.2.
 (#5629)

See https://github.com/pydata/pydata-sphinx-theme/issues/1539 for additional context.

Authors:
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - William Hicks (https://github.com/wphicks)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cuml/pull/5629
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-120_arch-x86_64.yaml | 2 +-
 dependencies.yaml                                | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 6038499d0c..d8aed3b10c 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -53,7 +53,7 @@ dependencies:
 - numpydoc
 - nvcc_linux-64=11.8
 - pip
-- pydata-sphinx-theme
+- pydata-sphinx-theme!=0.14.2
 - pylibraft==23.12.*
 - pynndescent==0.5.8
 - pytest
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 2f81709f6e..ba1b625c25 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -49,7 +49,7 @@ dependencies:
 - numba>=0.57
 - numpydoc
 - pip
-- pydata-sphinx-theme
+- pydata-sphinx-theme!=0.14.2
 - pylibraft==23.12.*
 - pynndescent==0.5.8
 - pytest
diff --git a/dependencies.yaml b/dependencies.yaml
index 86307617b4..0b95d00971 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -294,7 +294,8 @@ dependencies:
           - ipykernel
           - nbsphinx
           - numpydoc
-          - pydata-sphinx-theme
+          # https://github.com/pydata/pydata-sphinx-theme/issues/1539
+          - pydata-sphinx-theme!=0.14.2
           - recommonmark
           - &scikit_learn scikit-learn==1.2
           - sphinx<6

From 79aa4909117dac55648ddc2e6ebde89869147b78 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 30 Oct 2023 13:17:12 -0500
Subject: [PATCH 03/20] Unpin `dask` and `distributed` for `23.12` development
 (#5627)

This PR relaxes `dask` and `distributed` versions pinning for `23.12` development.

xref: https://github.com/rapidsai/cudf/pull/14320

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Simon Adorf (https://github.com/csadorf)
  - https://github.com/jakirkham
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cuml/pull/5627
---
 ci/test_wheel.sh                                 | 2 +-
 conda/environments/all_cuda-118_arch-x86_64.yaml | 6 +++---
 conda/environments/all_cuda-120_arch-x86_64.yaml | 6 +++---
 conda/recipes/cuml/meta.yaml                     | 6 +++---
 dependencies.yaml                                | 6 +++---
 python/README.md                                 | 4 ++--
 python/pyproject.toml                            | 4 ++--
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index bf3b3845f7..f6c61eabac 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -13,7 +13,7 @@ if [[ "$(arch)" == "aarch64" ]]; then
 fi
 
 # Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.12
+python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.12
 
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install $(echo ./dist/cuml*.whl)[test]
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index d8aed3b10c..abbf54a4c2 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -16,12 +16,12 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core==2023.9.2
+- dask-core>=2023.9.2
 - dask-cuda==23.12.*
 - dask-cudf==23.12.*
 - dask-ml
-- dask==2023.9.2
-- distributed==2023.9.2
+- dask>=2023.9.2
+- distributed>=2023.9.2
 - doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index ba1b625c25..d2c8747d35 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -18,12 +18,12 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core==2023.9.2
+- dask-core>=2023.9.2
 - dask-cuda==23.12.*
 - dask-cudf==23.12.*
 - dask-ml
-- dask==2023.9.2
-- distributed==2023.9.2
+- dask>=2023.9.2
+- distributed>=2023.9.2
 - doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
index 817776fa13..15e9a708b4 100644
--- a/conda/recipes/cuml/meta.yaml
+++ b/conda/recipes/cuml/meta.yaml
@@ -76,9 +76,9 @@ requirements:
     - cudf ={{ minor_version }}
     - cupy >=12.0.0
     - dask-cudf ={{ minor_version }}
-    - dask ==2023.9.2
-    - dask-core==2023.9.2
-    - distributed ==2023.9.2
+    - dask >=2023.9.2
+    - dask-core>=2023.9.2
+    - distributed >=2023.9.2
     - joblib >=0.11
     - libcuml ={{ version }}
     - libcumlprims ={{ minor_version }}
diff --git a/dependencies.yaml b/dependencies.yaml
index 0b95d00971..4387a85fdd 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -175,10 +175,10 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cudf==23.12.*
-          - dask==2023.9.2
+          - dask>=2023.9.2
           - dask-cuda==23.12.*
           - dask-cudf==23.12.*
-          - distributed==2023.9.2
+          - distributed>=2023.9.2
           - joblib>=0.11
           - numba>=0.57
             # TODO: Is scipy really a hard dependency, or should
@@ -192,7 +192,7 @@ dependencies:
           - cupy>=12.0.0
       - output_types: conda
         packages:
-          - dask-core==2023.9.2
+          - dask-core>=2023.9.2
       - output_types: pyproject
         packages:
           - *treelite_runtime
diff --git a/python/README.md b/python/README.md
index 41a1d366cd..342ad177ab 100644
--- a/python/README.md
+++ b/python/README.md
@@ -70,8 +70,8 @@ Packages required for multigpu algorithms*:
 - ucx-py version matching the cuML version
 - dask-cudf version matching the cuML version
 - nccl>=2.5
-- dask==2023.9.2
-- distributed==2023.9.2
+- dask>=2023.9.2
+- distributed>=2023.9.2
 
 * this can be avoided with `--singlegpu` argument flag.
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 32f1b7a59e..013658ff4c 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -61,8 +61,8 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "dask-cuda==23.12.*",
     "dask-cudf==23.12.*",
-    "dask==2023.9.2",
-    "distributed==2023.9.2",
+    "dask>=2023.9.2",
+    "distributed>=2023.9.2",
     "joblib>=0.11",
     "numba>=0.57",
     "raft-dask==23.12.*",

From 04348ed9e8994fa2e481168e92fd76da965af0e7 Mon Sep 17 00:00:00 2001
From: Peter Andreas Entschev <peter@entschev.com>
Date: Mon, 30 Oct 2023 23:28:49 +0100
Subject: [PATCH 04/20] Increase `Nanny` close timeout in `LocalCUDACluster`
 tests (#5636)

Tests in CI may fail at times, possibly under high loads only, due to `Nanny` close timeout, whose internal mechanism to establish timeout to kill processes may leave too little time for the process to shutdown properly.

Dask-CUDA introduced a new `IncreasedCloseTimeoutNanny` class intended to be used with `LocalCUDACluster` in tests to reduce chances such timeouts occur. This new class is now used in tests to improve the situation in CI.

Authors:
  - Peter Andreas Entschev (https://github.com/pentschev)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5636
---
 python/cuml/benchmark/automated/dask/conftest.py | 8 +++++++-
 python/cuml/tests/dask/conftest.py               | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/python/cuml/benchmark/automated/dask/conftest.py b/python/cuml/benchmark/automated/dask/conftest.py
index 4e406ed5a6..8d2bb4e49c 100644
--- a/python/cuml/benchmark/automated/dask/conftest.py
+++ b/python/cuml/benchmark/automated/dask/conftest.py
@@ -18,6 +18,7 @@
 
 from dask_cuda import initialize
 from dask_cuda import LocalCUDACluster
+from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
 from dask.distributed import Client
 
 enable_tcp_over_ucx = True
@@ -28,7 +29,11 @@
 @pytest.fixture(scope="module")
 def cluster():
 
-    cluster = LocalCUDACluster(protocol="tcp", scheduler_port=0)
+    cluster = LocalCUDACluster(
+        protocol="tcp",
+        scheduler_port=0,
+        worker_class=IncreasedCloseTimeoutNanny,
+    )
     yield cluster
     cluster.close()
 
@@ -54,6 +59,7 @@ def ucx_cluster():
         enable_tcp_over_ucx=enable_tcp_over_ucx,
         enable_nvlink=enable_nvlink,
         enable_infiniband=enable_infiniband,
+        worker_class=IncreasedCloseTimeoutNanny,
     )
     yield cluster
     cluster.close()
diff --git a/python/cuml/tests/dask/conftest.py b/python/cuml/tests/dask/conftest.py
index 29f09a44c9..3c6311dc03 100644
--- a/python/cuml/tests/dask/conftest.py
+++ b/python/cuml/tests/dask/conftest.py
@@ -4,6 +4,7 @@
 
 from dask_cuda import initialize
 from dask_cuda import LocalCUDACluster
+from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
 from dask.distributed import Client
 
 enable_tcp_over_ucx = True
@@ -14,7 +15,11 @@
 @pytest.fixture(scope="module")
 def cluster():
 
-    cluster = LocalCUDACluster(protocol="tcp", scheduler_port=0)
+    cluster = LocalCUDACluster(
+        protocol="tcp",
+        scheduler_port=0,
+        worker_class=IncreasedCloseTimeoutNanny,
+    )
     yield cluster
     cluster.close()
 
@@ -40,6 +45,7 @@ def ucx_cluster():
         enable_tcp_over_ucx=enable_tcp_over_ucx,
         enable_nvlink=enable_nvlink,
         enable_infiniband=enable_infiniband,
+        worker_class=IncreasedCloseTimeoutNanny,
     )
     yield cluster
     cluster.close()

From eb48a6eabb9111f46ba65542dfecaf8f0d4f0f55 Mon Sep 17 00:00:00 2001
From: Nick Becker <nickb500@gmail.com>
Date: Mon, 30 Oct 2023 21:49:42 -0400
Subject: [PATCH 05/20] Update interoperability docs (#5633)

This PR makes cosmetic updates to some of the interoperability documentation

Authors:
  - Nick Becker (https://github.com/beckernick)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/5633
---
 .../execution_device_interoperability.ipynb   | 55 ++++++++++---------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/docs/source/execution_device_interoperability.ipynb b/docs/source/execution_device_interoperability.ipynb
index f267a9198c..1738ee99d0 100644
--- a/docs/source/execution_device_interoperability.ipynb
+++ b/docs/source/execution_device_interoperability.ipynb
@@ -6,14 +6,16 @@
    "source": [
     "# cuML on GPU and CPU\n",
     "\n",
-    "cuML is a Scikit-learn-based suite of fast, GPU-accelerated machine learning algorithms designed for data science and analytical tasks. Starting with version 23.10, a new version of cuML can also be run on CPU systems, increasing its ease of use (without code changes) in the following manners: \n",
+    "cuML is a Scikit-learn-like suite of fast, GPU-accelerated machine learning algorithms designed for data science and analytical tasks.\n",
     "\n",
-    "- Allow users to prototype in systems without GPUs. \n",
-    "- Allow library integrations without the need of dispatching and boilerplate code. \n",
-    "- Allow users to train on one type of system and infer with the other in a subset of estimators (that will grow with each version). \n",
-    "- Provide compatibility with the GPU/CPU open source pydata ecosystem.\n",
+    "Starting with version 23.10, cuML provides both GPU-based and CPU-based execution capabilities with zero code change required to switch between them. This unified CPU/GPU cuML: \n",
     "\n",
-    "The majority of estimators of cuML can run in both CPU and GPU systems, with a subset of them allowing exporting models between GPU and CPU systems. The following table shows support for the most common estimators: \n",
+    "- Allows users to prototype in systems without GPUs. \n",
+    "- Allows library integrations without the need for dispatching and boilerplate code. \n",
+    "- Allows users to train on one type of system and infer with the other for a subset of estimators (that will expand over time). \n",
+    "- Provides compatibility with the broader GPU/CPU open source pydata ecosystem.\n",
+    "\n",
+    "The majority of estimators of cuML can run in both CPU and GPU systems, with a subset of them supporting exporting models between GPU and CPU systems. The following table shows support for the most common estimators: \n",
     "\n",
     "| Category | Algorithm | Supports Execution on CPU | Supports Exporting between CPU and GPU | \n",
     "| --- | --- | --- | --- |\n",
@@ -45,7 +47,9 @@
     "| **Time Series** | Holt-Winters Exponential Smoothing | No | No |\n",
     "|  | Auto-regressive Integrated Moving Average (ARIMA) | No | No |\n",
     "\n",
-    "This allows the same code to be guaranteed to run in both GPU and CPU systems. Version 23.12 is scheduled to add the following algorithms: Random Forest and Support Vector Machine estimators. \n",
+    "This allows the same code to be guaranteed to run in both GPU and CPU systems. Version 23.12 is scheduled to add the following algorithms:\n",
+    "- Random Forest\n",
+    "- Support Vector Machine estimators\n",
     "\n"
    ]
   },
@@ -57,7 +61,7 @@
     "\n",
     "## Installation\n",
     "\n",
-    "For GPU systems, cuML still follows the [RAPIDS requirements] and nothing has changed for installing it. The cuML package and wheels are universal and can run in both GPU and CPU modes. For installing in CPU systems, similar to other packages it can be installed from conda/mamba with:\n",
+    "For GPU systems, cuML still follows the [RAPIDS requirements](https://rapids.ai/#quick-start). The cuML package and wheels are universal and can run in both GPU and CPU modes. To use cuML in CPU-only systems, you can install using conda/mamba with:\n",
     "\n",
     "```bash\n",
     "mamba install -c rapidsai -c nvidia -c conda-forge cuml-cpu=23.10 \n",
@@ -65,7 +69,7 @@
     "```\n",
     "\n",
     "- cuML 23.10 supports Linux and WSL2 on GPU and CPU systems using conda. \n",
-    "- cuML 23.12 will bring support for pip wheels and macos support for CPU execution. \n",
+    "- cuML 23.12 will bring support for pip wheels and MacOS support for CPU execution. \n",
     "\n",
     "### How to Use\n",
     "\n",
@@ -73,7 +77,7 @@
     "\n",
     "#### 1. Using CPU Package directly\n",
     "\n",
-    "The CPU package, `cuml-cpu` is a subset of the `cuml` package, so besides the difference in installation there is no changes needed to the code of supported estimators to run code. For example, the following script can be run both in a system with GPU and `cuml`, as well as a system without GPU and `cuml-cpu`:"
+    "The CPU package, `cuml-cpu` is a subset of the `cuml` package, so there are zero code changes required to run the code when using a CPU-only system. For example, the following script can be run both in a system with GPU and `cuml`, as well as a system without GPU and `cuml-cpu`:"
    ]
   },
   {
@@ -110,7 +114,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This allows to prototype on CPU systems and then run code on GPU servers, or the other way around. Some estimators support training on one type of system and then exporting models to the other type, as can be seen in [the corresponding section](#Cross-Device-Training-and-Inference-Serialization)."
+    "This allows easy prototyping on CPU systems and running production code on GPU servers, or the other way around. Some estimators support training on one type of system and then exporting models to the other type, as noted above and explained by example in [the corresponding section](#Cross-Device-Training-and-Inference-Serialization)."
    ]
   },
   {
@@ -119,7 +123,7 @@
    "source": [
     "#### 2. Managing Execution Platform with GPU package\n",
     "\n",
-    "Additionally to allowing the same code to be run in CPU systems, users can control which device executes parts of the code. So in addition to the first example that can just be run in a CPU system with `cuml-cpu`, a system with the full cuML can execute in CPU mode as well. \n",
+    "In addition to allowing the zero-code change execution in CPU systems, users can also manually control which device executes parts of the code when using a system with the full cuML.\n",
     "\n",
     "For example, using the following data: "
    ]
@@ -155,7 +159,7 @@
    "source": [
     "There are two ways to control the execution of the code:\n",
     "\n",
-    "#### a) `using_device_type` context manager:"
+    "#### a) `using_device_type` context manager"
    ]
   },
   {
@@ -177,9 +181,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This allows to prototype but also to run different estimators on different devices, for example in the case where data is small so that moving the data around wouldn't allow the GPU to accelerate an estimator.  \n",
+    "This makes it easy to prototype and run different estimators on different devices, for example in the case where data is small so that moving the data around wouldn't allow the GPU to accelerate an estimator.  \n",
     "\n",
-    "Additionally, it allows to run estimators using unsupported parameter: "
+    "It also allows running estimators using unsupported parameters: "
    ]
   },
   {
@@ -201,14 +205,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "An upcoming feature will allow for this to also dispatch automatically. This can be very useful for library integrators, so that if users use parameters not supported on GPUs, the code automatically will dispatch to a CPU implementation. "
+    "An upcoming feature will allow for this dispatch to occur automatically under-the-hood. This can be very useful for when integrating cuML into other libraries, so that if users use parameters not supported on GPUs, the code automatically will dispatch to a CPU implementation. "
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### b) Global configuration. "
+    "#### b) Global configuration with `set_global_device_type`"
    ]
   },
   {
@@ -248,7 +252,7 @@
    "source": [
     "## Cross Device Training and Inference Serialization\n",
     "\n",
-    "As stated before, a subset of the estimators that can be executed on the CPU, also allow to serialize estimators trained on one type of device (CPU or GPU) and then deserialize it on the other one. \n",
+    "As stated above, a subset of the estimators support training on one type of device (CPU or GPU), serializing the trained model, and then deserializing and executing it on the other type of device. \n",
     "\n",
     "To do this, a simple API is provided. For example, To train a model on GPU but deploy it on CPU, first, train the estimator on device and save it to disk:"
    ]
@@ -291,20 +295,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Conclusions\n",
+    "## Conclusion\n",
     "\n",
-    "cuML's CPU capabilities are designed to facilitate different usecases, lower the requirements to use the capabilities of cuML, as well as increasing the flexibility and capabilities of integration and deployment of the library. \n",
+    "cuML's CPU capabilities are designed to facilitate different use cases, lower the barriers to using the capabilities of cuML, an streamline integrating cuML into other tools and deploying models. \n",
     "\n",
-    "Upcoming versions of cuML will increase the supported estimators, both for CPU execution as well as serializing/exporting models between systems with and without GPUs. "
+    "Upcoming versions of cuML will expand the supported estimators, both for CPU execution as well as serializing/exporting models between systems with and without GPUs. "
    ]
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "35840739db47a5016f18b089945bf3e154a2dca6d71cfb13687d370b69a146e3"
-  },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.10.12 ('cuml_dev')",
    "language": "python",
    "name": "python3"
   },
@@ -318,11 +319,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {
-    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+    "hash": "975233ed6ddd7eb5f50db124c7eb6e9abd7f2428099fbb1c703209662350014b"
    }
   }
  },

From a01181f46e5dab6eb87f86910822169ab6bdcd89 Mon Sep 17 00:00:00 2001
From: yosider <yosider.on.earth@gmail.com>
Date: Tue, 31 Oct 2023 23:37:41 +0900
Subject: [PATCH 06/20] Fix trying to get pointer to None in svm/linear.pyx
 (#5615)

This PR fixes the following bug in `svm/linear.pyx`:

After fitting `LinearSVC` and saving it using pickle, attempting to load and run `predict()` (or `decision_function()` and `predict_proba()` that call `__sync_model()`) results in the following error:
```
AttributeError                            Traceback (most recent call last)
----> 1 lin_svc_load.predict(X)

File .../lib/python3.10/site-packages/cuml/internals/api_decorators.py:188, in _make_decorator_function.<locals>.decorator_function.<locals>.decorator_closure.<locals>.wrapper(*args, **kwargs)
    185     set_api_output_dtype(output_dtype)
    187 if process_return:
--> 188     ret = func(*args, **kwargs)
    189 else:
    190     return func(*args, **kwargs)

File linear.pyx:693, in cuml.svm.linear.LinearSVM.predict()

File linear.pyx:667, in cuml.svm.linear.LinearSVM.__sync_model()

File linear.pyx:303, in cuml.svm.linear.LinearSVMWrapper.__cinit__()

AttributeError: 'NoneType' object has no attribute 'ptr'
```

I identified that the issue is caused by attempting to obtain a pointer to a variable with a None value in `LinearSVMWrapper.__cinit__()`. After fixing this and rebuilding, the code worked as expected.

Minimal code to reproduce the bug:
```python
import pickle
import numpy as np
from cuml.svm import LinearSVC

X = np.random.randn(10, 2)
y = np.random.randint(0, 2, 10)

svc = LinearSVC()
svc.fit(X, y)

with open("tmp.pkl", "wb") as f:
    pickle.dump(svc, f)
with open("tmp.pkl", "rb") as f:
    svc_load = pickle.load(f)

svc_load.predict(X)
```

If this is not the appropriate way to fix the issue, I would appreciate any corrections.

Authors:
  - yosider (https://github.com/yosider)
  - Simon Adorf (https://github.com/csadorf)
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5615
---
 python/cuml/svm/linear.pyx       |  4 ++--
 python/cuml/tests/test_pickle.py | 37 ++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/python/cuml/svm/linear.pyx b/python/cuml/svm/linear.pyx
index a0bcf5d3a5..470e867f60 100644
--- a/python/cuml/svm/linear.pyx
+++ b/python/cuml/svm/linear.pyx
@@ -300,8 +300,8 @@ cdef class LinearSVMWrapper:
         if self.dtype != np.float32 and self.dtype != np.float64:
             raise TypeError('Input data type must be float32 or float64')
 
-        cdef uintptr_t Xptr = <uintptr_t>X.ptr
-        cdef uintptr_t yptr = <uintptr_t>y.ptr
+        cdef uintptr_t Xptr = <uintptr_t>X.ptr if X is not None else 0
+        cdef uintptr_t yptr = <uintptr_t>y.ptr if y is not None else 0
         cdef uintptr_t swptr = <uintptr_t>sampleWeight.ptr \
             if sampleWeight is not None else 0
         cdef size_t nCols = 0
diff --git a/python/cuml/tests/test_pickle.py b/python/cuml/tests/test_pickle.py
index 9d99aab208..950b00a612 100644
--- a/python/cuml/tests/test_pickle.py
+++ b/python/cuml/tests/test_pickle.py
@@ -740,6 +740,43 @@ def assert_model(pickled_model, data):
     pickle_save_load(tmpdir, create_mod, assert_model)
 
 
+@pytest.mark.parametrize("datatype", [np.float32, np.float64])
+@pytest.mark.parametrize(
+    "params", [{"probability": True}, {"probability": False}]
+)
+@pytest.mark.parametrize("multiclass", [True, False])
+def test_linear_svc_pickle(tmpdir, datatype, params, multiclass):
+    result = {}
+
+    def create_mod():
+        model = cuml.svm.LinearSVC(**params)
+        iris = load_iris()
+        iris_selection = np.random.RandomState(42).choice(
+            [True, False], 150, replace=True, p=[0.75, 0.25]
+        )
+        X_train = iris.data[iris_selection]
+        y_train = iris.target[iris_selection]
+        if not multiclass:
+            y_train = (y_train > 0).astype(datatype)
+        data = [X_train, y_train]
+        result["model"] = model.fit(X_train, y_train)
+        return model, data
+
+    def assert_model(pickled_model, data):
+        if result["model"].probability:
+            print("Comparing probabilistic LinearSVC")
+            compare_probabilistic_svm(
+                result["model"], pickled_model, data[0], data[1], 0, 0
+            )
+        else:
+            print("comparing base LinearSVC")
+            pred_before = result["model"].predict(data[0])
+            pred_after = pickled_model.predict(data[0])
+            assert array_equal(pred_before, pred_after)
+
+    pickle_save_load(tmpdir, create_mod, assert_model)
+
+
 @pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize("nrows", [unit_param(500)])
 @pytest.mark.parametrize("ncols", [unit_param(16)])

From 4c36b19ae1e91af6746f01da3645a63fd9f66233 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 31 Oct 2023 09:45:45 -0700
Subject: [PATCH 07/20] Allow nightly dependencies and set up consistent
 nightly versions for conda and pip packages (#5607)

This PR applies analogous changes to https://github.com/rapidsai/rmm/pull/1335 and https://github.com/rapidsai/rmm/pull/1347.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Divye Gala (https://github.com/divyegala)
  - Simon Adorf (https://github.com/csadorf)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/5607
---
 VERSION                                 |  1 +
 ci/build_cpp.sh                         |  4 ++-
 ci/build_python.sh                      | 10 ++++++
 ci/build_wheel.sh                       | 41 ++++++++++++++++++++-----
 ci/release/apply_wheel_modifications.sh | 22 -------------
 ci/release/update-version.sh            |  7 +++--
 conda/recipes/cuml-cpu/meta.yaml        |  4 +--
 conda/recipes/cuml/meta.yaml            |  4 +--
 conda/recipes/libcuml/meta.yaml         |  4 +--
 python/cuml/VERSION                     |  1 +
 python/cuml/__init__.py                 |  4 +--
 python/cuml/_version.py                 | 21 +++++++++++++
 python/pyproject.toml                   |  5 ++-
 python/setup.py                         |  2 +-
 14 files changed, 86 insertions(+), 44 deletions(-)
 create mode 100644 VERSION
 delete mode 100755 ci/release/apply_wheel_modifications.sh
 create mode 120000 python/cuml/VERSION
 create mode 100644 python/cuml/_version.py

diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000000..a193fff41e
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+23.12.00
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index bc6b2f23b4..0a6a649fd0 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -9,8 +9,10 @@ export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
 
+version=$(rapids-generate-version)
+
 rapids-logger "Begin cpp build"
 
-rapids-conda-retry mambabuild conda/recipes/libcuml
+RAPIDS_PACKAGE_VERSION=${version} rapids-conda-retry mambabuild conda/recipes/libcuml
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 9bee12371c..1332062770 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -9,6 +9,16 @@ export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
 
+package_name="cuml"
+package_dir="python"
+
+version=$(rapids-generate-version)
+git_commit=$(git rev-parse HEAD)
+export RAPIDS_PACKAGE_VERSION=${version} 
+
+echo "${version}" > VERSION
+sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/_version.py"
+
 rapids-logger "Begin py build"
 
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 979a17014b..0231726b82 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -3,20 +3,47 @@
 
 set -euo pipefail
 
+package_name="cuml"
+package_dir="python"
+
 source rapids-configure-sccache
 source rapids-date-string
 
-# Use gha-tools rapids-pip-wheel-version to generate wheel version then
-# update the necessary files
-version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
+version=$(rapids-generate-version)
+git_commit=$(git rev-parse HEAD)
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-ci/release/apply_wheel_modifications.sh ${version_override} "-${RAPIDS_PY_CUDA_SUFFIX}"
-echo "The package name and/or version was modified in the package source. The git diff is:"
-git diff
+# This is the version of the suffix with a preceding hyphen. It's used
+# everywhere except in the final wheel name.
+PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
+
+# Patch project metadata files to include the CUDA version suffix and version override.
+pyproject_file="${package_dir}/pyproject.toml"
+
+sed -i "/^name.*cuml/ s/= \"cuml\"/= \"cuml${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
+echo "${version}" > VERSION
+sed -i "/^__git_commit__ / s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/_version.py"
+
+# For nightlies we want to ensure that we're pulling in alphas as well. The
+# easiest way to do so is to augment the spec with a constraint containing a
+# min alpha version that doesn't affect the version bounds but does allow usage
+# of alpha versions for that dependency without --pre
+alpha_spec=''
+if ! rapids-is-release-build; then
+    alpha_spec=',>=0.0.0a0'
+fi
+
+for dep in cudf pylibraft raft-dask rmm; do
+    sed -r -i "s/${dep}==(.*)\"/${dep}${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
+done
+
+if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
+    sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" ${pyproject_file}
+    sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
+fi
 
-cd python/
+cd ${package_dir}
 
 SKBUILD_CONFIGURE_OPTIONS="-DCUML_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DDISABLE_DEPRECATION_WARNINGS=ON -DCPM_cumlprims_mg_SOURCE=${GITHUB_WORKSPACE}/cumlprims_mg/" \
   python -m pip wheel . \
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
deleted file mode 100755
index fb5971fa5e..0000000000
--- a/ci/release/apply_wheel_modifications.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Usage: bash apply_wheel_modifications.sh <new_version>
-
-VERSION=${1}
-CUDA_SUFFIX=${2}
-
-# pyproject.toml versions
-sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/pyproject.toml
-
-# pyproject.toml cuda suffixes
-sed -i "s/^name = \"cuml\"/name = \"cuml${CUDA_SUFFIX}\"/g" python/pyproject.toml
-sed -i "s/cudf/cudf${CUDA_SUFFIX}/g" python/pyproject.toml
-sed -i "s/pylibraft/pylibraft${CUDA_SUFFIX}/g" python/pyproject.toml
-sed -i "s/raft-dask/raft-dask${CUDA_SUFFIX}/g" python/pyproject.toml
-sed -i "s/rmm/rmm${CUDA_SUFFIX}/g" python/pyproject.toml
-
-if [[ $CUDA_SUFFIX == "-cu12" ]]; then
-    sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" python/pyproject.toml
-    sed -i "s/cupy-cuda11x/cupy-cuda12x/g" python/pyproject.toml
-fi
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 4162f19ce9..9b07a58476 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -35,9 +35,10 @@ function sed_runner() {
 }
 
 
-# __init__.py and pyproject.toml versions
-sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cuml/__init__.py
-sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pyproject.toml
+# Centralized version file update
+echo "${NEXT_FULL_TAG}" > VERSION
+
+# pyproject.toml versions
 sed_runner "s/rmm==.*\",/rmm==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/pyproject.toml
 sed_runner "s/cudf==.*\",/cudf==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/pyproject.toml
 sed_runner "s/pylibraft==.*\",/pylibraft==${NEXT_SHORT_TAG_PEP440}.*\",/g" python/pyproject.toml
diff --git a/conda/recipes/cuml-cpu/meta.yaml b/conda/recipes/cuml-cpu/meta.yaml
index d4497a65fb..cb88ac22b7 100644
--- a/conda/recipes/cuml-cpu/meta.yaml
+++ b/conda/recipes/cuml-cpu/meta.yaml
@@ -2,7 +2,7 @@
 
 # Usage:
 #   conda build . -c conda-forge -c numba -c rapidsai -c pytorch
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v')  + environ.get('VERSION_SUFFIX', '') %}
 {% set py_version = environ['CONDA_PY'] %}
 {% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
@@ -11,7 +11,7 @@ package:
   version: {{ version }}
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
index 15e9a708b4..d4767a00bc 100644
--- a/conda/recipes/cuml/meta.yaml
+++ b/conda/recipes/cuml/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
@@ -12,7 +12,7 @@ package:
   version: {{ version }}
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   number: {{ GIT_DESCRIBE_NUMBER }}
diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml
index 34d3521869..b190a39625 100644
--- a/conda/recipes/libcuml/meta.yaml
+++ b/conda/recipes/libcuml/meta.yaml
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
-{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
@@ -11,7 +11,7 @@ package:
   name: libcuml-split
 
 source:
-  git_url: ../../..
+  path: ../../..
 
 build:
   ignore_run_exports_from:
diff --git a/python/cuml/VERSION b/python/cuml/VERSION
new file mode 120000
index 0000000000..558194c5a5
--- /dev/null
+++ b/python/cuml/VERSION
@@ -0,0 +1 @@
+../../VERSION
\ No newline at end of file
diff --git a/python/cuml/__init__.py b/python/cuml/__init__.py
index 6c25b5d2d8..62ab93c1b4 100644
--- a/python/cuml/__init__.py
+++ b/python/cuml/__init__.py
@@ -114,9 +114,7 @@
 from cuml.solvers.cd import CD
 from cuml.solvers.sgd import SGD
 from cuml.solvers.qn import QN
-
-# Version configuration
-__version__ = "23.12.00"
+from cuml._version import __version__, __git_commit__
 
 
 def __getattr__(name):
diff --git a/python/cuml/_version.py b/python/cuml/_version.py
new file mode 100644
index 0000000000..87cb6a74d5
--- /dev/null
+++ b/python/cuml/_version.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import importlib.resources
+
+__version__ = (
+    importlib.resources.files("cuml").joinpath("VERSION").read_text().strip()
+)
+__git_commit__ = ""
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 013658ff4c..346f270395 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -48,7 +48,7 @@ filterwarnings = [
 
 [project]
 name = "cuml"
-version = "23.12.00"
+dynamic = ["version"]
 description = "cuML - RAPIDS ML Algorithms"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -104,6 +104,9 @@ Documentation = "https://docs.rapids.ai/api/cuml/stable/"
 [tool.setuptools]
 license-files = ["LICENSE"]
 
+[tool.setuptools.dynamic]
+version = {file = "cuml/VERSION"}
+
 [tool.black]
 line-length = 79
 target-version = ["py39"]
diff --git a/python/setup.py b/python/setup.py
index 8e6615a2ef..5a30d78201 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -109,6 +109,6 @@ def clean_folder(path):
 packages = find_packages(include=["cuml*"])
 setup(
     packages=packages,
-    package_data={key: ["*.pxd"] for key in packages},
+    package_data={key: ["VERSION", "*.pxd"] for key in packages},
     zip_safe=False,
 )

From c392704f275dcc60e5f00efb6e425574b3d5d8ae Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Tue, 31 Oct 2023 20:08:20 +0100
Subject: [PATCH 08/20] Update instructions for creating a conda build
 environment (#5628)

Authors:
  - Simon Adorf (https://github.com/csadorf)
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/5628
---
 BUILD.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BUILD.md b/BUILD.md
index e2d54310af..afdcfc86dd 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -19,8 +19,8 @@ It is recommended to use conda for environment/package management. If doing so,
 
 ```bash
 conda create -n cuml_dev python=3.10
+conda env update -n cuml_dev --file=conda/environments/all_cuda-118_arch-x86_64.yaml
 conda activate cuml_dev
-conda env update --file=conda/environments/all_cuda-118_arch-x86_64.yaml
 ```
 
 ## Installing from Source:

From 02960433bfe597dca371f6e4a0d645fae041d132 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 3 Nov 2023 12:39:56 -0700
Subject: [PATCH 09/20] Use drop_duplicates instead of unique for cudf's pandas
 compatibility mode (#5639)

In pandas, `Series.unique` returns a numpy array (for non-extension types) while `Series.drop_duplicates` returns a `Series`. The two results should otherwise contain the same set of values. In cudf, historically both methods returned a `Series`, and at these stages in cuml's pipeline it knows that it is working with cudf objects. However, if cudf has pandas compatibility mode enabled, then `unique` will return an array to match pandas behavior. In this scenario, the method chaining no longer works because cupy is calling methods on the result of `unique` assuming that it will be a `Series`. To fix this, cuml needs to call `drop_duplicates` instead.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5639
---
 python/cuml/feature_extraction/_vectorizers.py | 4 +++-
 python/cuml/preprocessing/LabelEncoder.py      | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cuml/feature_extraction/_vectorizers.py b/python/cuml/feature_extraction/_vectorizers.py
index 78172ec690..0133195b20 100644
--- a/python/cuml/feature_extraction/_vectorizers.py
+++ b/python/cuml/feature_extraction/_vectorizers.py
@@ -598,7 +598,9 @@ def fit_transform(self, raw_documents, y=None):
         if self._fixed_vocabulary:
             self.vocabulary_ = self.vocabulary
         else:
-            self.vocabulary_ = tokenized_df["token"].unique().sort_values()
+            self.vocabulary_ = (
+                tokenized_df["token"].drop_duplicates().sort_values()
+            )
 
         count_df = self._count_vocab(tokenized_df)
 
diff --git a/python/cuml/preprocessing/LabelEncoder.py b/python/cuml/preprocessing/LabelEncoder.py
index 882e552511..c8221ff951 100644
--- a/python/cuml/preprocessing/LabelEncoder.py
+++ b/python/cuml/preprocessing/LabelEncoder.py
@@ -180,7 +180,7 @@ def fit(self, y, _classes=None):
         if _classes is not None:
             self.classes_ = _classes
         else:
-            self.classes_ = y.unique().sort_values(
+            self.classes_ = y.drop_duplicates().sort_values(
                 ignore_index=True
             )  # dedupe and sort
 

From b79c09f6ea59cf9dfe48da888281c4048db560e5 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 8 Nov 2023 21:15:06 +0100
Subject: [PATCH 10/20] CI: Fix expected ValueError and dask-glm
 incompatibility (#5644)

- Expect all supported types and shapes to work with host mem type.
- Pin dask-glm to 0.3.0.

Authors:
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/5644
---
 conda/environments/all_cuda-118_arch-x86_64.yaml |  2 +-
 conda/environments/all_cuda-120_arch-x86_64.yaml |  2 +-
 dependencies.yaml                                |  6 ++----
 python/cuml/tests/test_array.py                  | 13 +++----------
 python/pyproject.toml                            |  2 +-
 5 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index abbf54a4c2..8d4b9ab3ce 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -77,5 +77,5 @@ dependencies:
 - treelite==3.9.1
 - umap-learn==0.5.3
 - pip:
-  - git+https://github.com/dask/dask-glm@main
+  - dask-glm==0.3.0
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index d2c8747d35..af119ecb72 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -73,5 +73,5 @@ dependencies:
 - treelite==3.9.1
 - umap-learn==0.5.3
 - pip:
-  - git+https://github.com/dask/dask-glm@main
+  - dask-glm==0.3.0
 name: all_cuda-120_arch-x86_64
diff --git a/dependencies.yaml b/dependencies.yaml
index 4387a85fdd..568781a45f 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -346,15 +346,13 @@ dependencies:
         packages:
           - pip
           - pip:
-              # TODO: Figure out what to do with this dependency
-              # since the repo is now archived.
-              - git+https://github.com/dask/dask-glm@main
+              - dask-glm==0.3.0
           # TODO: remove pin once a release that includes fixes for the error
           # is released: https://github.com/rapidsai/cuml/issues/5514
           - hdbscan<=0.8.30
       - output_types: pyproject
         packages:
-          - dask-glm @ git+https://github.com/dask/dask-glm@main
+          - dask-glm==0.3.0
             # TODO: Can we stop pulling from the master branch now that there was a release in October?
           - hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
   test_notebooks:
diff --git a/python/cuml/tests/test_array.py b/python/cuml/tests/test_array.py
index e2e02367c3..c4c479506c 100644
--- a/python/cuml/tests/test_array.py
+++ b/python/cuml/tests/test_array.py
@@ -201,10 +201,10 @@ def test_array_init_from_bytes(data_type, dtype, shape, order, mem_type):
     mem_type=cuml_array_mem_types(),
 )
 @settings(deadline=None)
-def test_array_init_bad(input_type, dtype, shape, order, mem_type):
+def test_array_mem_type(input_type, dtype, shape, order, mem_type):
     """
-    This test ensures that we assert on incorrect combinations of arguments
-    when creating CumlArray
+    Test whether we can create CumlArray from all supported types and array
+    shapes on all supported mem types.
     """
     mem_type = MemoryType.from_str(mem_type)
 
@@ -214,13 +214,6 @@ def test_array_init_bad(input_type, dtype, shape, order, mem_type):
         # Ensure the array is creatable
         array = CumlArray(input_array)
 
-        with pytest.raises(ValueError):
-            bad_dtype = np.float16 if dtype != np.float16 else np.float32
-            CumlArray(input_array, dtype=bad_dtype)
-
-        with pytest.raises(ValueError):
-            CumlArray(input_array, shape=(*array.shape, 1))
-
         input_mem_type = determine_array_memtype(input_array)
         if input_mem_type.is_device_accessible:
             joint_mem_type = input_mem_type
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 346f270395..ed9b4fd45c 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -79,7 +79,7 @@ classifiers = [
 
 [project.optional-dependencies]
 test = [
-    "dask-glm @ git+https://github.com/dask/dask-glm@main",
+    "dask-glm==0.3.0",
     "dask-ml",
     "hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master",
     "hypothesis>=6.0,<7",

From 9fed69eb3ca676e607f656ec6831decfe0d816f0 Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Thu, 9 Nov 2023 16:27:05 +0100
Subject: [PATCH 11/20] Add rich HTML representation to estimators (#5630)

This adds a Jupyter (and other notebook) rich display hook that produces a HTML widget to represent an estimator in notebooks.

This adds the basics of having estimators displayed as HTML widgets in notebooks and other editors that use the Jupyter notebook "rich display" system.

<img width="304" alt="Screenshot 2023-10-26 at 15 33 37" src="https://github.com/rapidsai/cuml/assets/1448859/9eccb547-d37f-44b9-b284-fe284707764c">

This doesn't yet contain the cool feature of changing colour depending on fit status or the link to the documentation. For that we'd have to depend on a newer version of scikit-learn (or vendor the logic). In this case "newer" actually means "the next version to be released".

WDYT?

Authors:
  - Tim Head (https://github.com/betatim)
  - Simon Adorf (https://github.com/csadorf)
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5630
---
 python/cuml/internals/base.pyx | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/cuml/internals/base.pyx b/python/cuml/internals/base.pyx
index 4fb03fdac9..09dda66064 100644
--- a/python/cuml/internals/base.pyx
+++ b/python/cuml/internals/base.pyx
@@ -28,6 +28,8 @@ from cuml.internals.safe_imports import (
 np = cpu_only_import('numpy')
 nvtx_annotate = gpu_only_import_from("nvtx", "annotate", alt=null_decorator)
 
+from sklearn.utils import estimator_html_repr
+
 import cuml
 import cuml.common
 import cuml.internals.logger as logger
@@ -443,6 +445,12 @@ class Base(TagsMixin,
             return {'preserves_dtype': [self.dtype]}
         return {}
 
+    def _repr_mimebundle_(self, **kwargs):
+        """Prepare representations used by jupyter kernels to display estimator"""
+        output = {"text/plain": repr(self)}
+        output["text/html"] = estimator_html_repr(self)
+        return output
+
     def set_nvtx_annotations(self):
         for func_name in ['fit', 'transform', 'predict', 'fit_transform',
                           'fit_predict']:

From 6d5118595db3b9e6800937f1e664e3ce41feedb7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 9 Nov 2023 12:59:54 -0800
Subject: [PATCH 12/20] Flatten cupy array before feeding to cudf.Series
 (#5651)

Previously it seems that cudf was silently flattening 2D arrays when passing them to the cudf.Series constructor, but that is no longer supported here so the test code needs to be updated.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5651
---
 python/cuml/tests/test_input_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuml/tests/test_input_utils.py b/python/cuml/tests/test_input_utils.py
index a3aef28446..fbbcde2105 100644
--- a/python/cuml/tests/test_input_utils.py
+++ b/python/cuml/tests/test_input_utils.py
@@ -396,7 +396,7 @@ def get_input(
         result = cudf.DataFrame(rand_mat, index=index)
 
     if type == "cudf-series":
-        result = cudf.Series(rand_mat, index=index)
+        result = cudf.Series(rand_mat.reshape(nrows), index=index)
 
     if type == "pandas":
         result = pdDF(cp.asnumpy(rand_mat), index=index)

From 325a7e65e0b3dd9dd0e0e6fbd061fc0f11fcd87c Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 13 Nov 2023 10:27:26 -0600
Subject: [PATCH 13/20] Use new `rapids-dask-dependency` metapackage for
 managing `dask` versions (#5649)

Currently dask versions are pinned as part of every release cycle and then unpinned for the next development cycle across all of RAPIDS. This introduces a great deal of churn. To centralize the dependency, we have created a metapackage to manage the required dask version and this PR introduces that metapackage as a dependency of cuml.


xref: https://github.com/rapidsai/cudf/pull/14364

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Simon Adorf (https://github.com/csadorf)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cuml/pull/5649
---
 ci/build_wheel.sh                                |  4 ++++
 ci/release/update-version.sh                     | 11 ++++-------
 ci/test_wheel.sh                                 |  3 ---
 conda/environments/all_cuda-118_arch-x86_64.yaml |  4 +---
 conda/environments/all_cuda-120_arch-x86_64.yaml |  4 +---
 conda/recipes/cuml/meta.yaml                     |  4 +---
 dependencies.yaml                                |  6 +-----
 python/README.md                                 |  7 +++----
 python/pyproject.toml                            |  3 +--
 9 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 0231726b82..e4941ad1a8 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -38,6 +38,10 @@ for dep in cudf pylibraft raft-dask rmm; do
     sed -r -i "s/${dep}==(.*)\"/${dep}${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
 done
 
+for dep in dask-cuda rapids-dask-dependency; do
+    sed -r -i "s/${dep}==(.*)\"/${dep}==\1${alpha_spec}\"/g" ${pyproject_file}
+done
+
 if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
     sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" ${pyproject_file}
     sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 9b07a58476..6e8f43cea7 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -73,6 +73,7 @@ DEPENDENCIES=(
   librmm
   pylibraft
   raft-dask
+  rapids-dask-dependency
   rmm
 )
 for FILE in dependencies.yaml conda/environments/*.yaml; do
@@ -81,17 +82,13 @@ for FILE in dependencies.yaml conda/environments/*.yaml; do
   done
 done
 
-sed_runner "s|/branch-.*?/|/branch-${NEXT_SHORT_TAG}/|g" README.md
-sed_runner "s|/branch-.*?/|/branch-${NEXT_SHORT_TAG}/|g" python/README.md
+sed_runner "s|/branch-[^/]*/|/branch-${NEXT_SHORT_TAG}/|g" README.md
+sed_runner "s|/branch-[^/]*/|/branch-${NEXT_SHORT_TAG}/|g" python/README.md
+sed_runner "/- rapids-dask-dependency==/ s/==.*/==${NEXT_SHORT_TAG}\.*/g" python/README.md
 
 # Wheel builds clone cumlprims_mg, update its branch
 sed_runner "s/extra-repo-sha: branch-.*/extra-repo-sha: branch-${NEXT_SHORT_TAG}/g" .github/workflows/*.yaml
 
-# Wheel builds install dask-cuda from source, update its branch
-for FILE in .github/workflows/*.yaml; do
-  sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE};
-done
-
 # CI files
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index f6c61eabac..d1cb6e8e27 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -12,9 +12,6 @@ if [[ "$(arch)" == "aarch64" ]]; then
     python -m pip install cmake
 fi
 
-# Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.12
-
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install $(echo ./dist/cuml*.whl)[test]
 
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 8d4b9ab3ce..b650ab412b 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -16,12 +16,9 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.9.2
 - dask-cuda==23.12.*
 - dask-cudf==23.12.*
 - dask-ml
-- dask>=2023.9.2
-- distributed>=2023.9.2
 - doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
@@ -63,6 +60,7 @@ dependencies:
 - pytest-xdist
 - python>=3.9,<3.11
 - raft-dask==23.12.*
+- rapids-dask-dependency==23.12.*
 - recommonmark
 - rmm==23.12.*
 - scikit-build>=0.13.1
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index af119ecb72..ffe3e3d0ff 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -18,12 +18,9 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.9.2
 - dask-cuda==23.12.*
 - dask-cudf==23.12.*
 - dask-ml
-- dask>=2023.9.2
-- distributed>=2023.9.2
 - doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
@@ -59,6 +56,7 @@ dependencies:
 - pytest-xdist
 - python>=3.9,<3.11
 - raft-dask==23.12.*
+- rapids-dask-dependency==23.12.*
 - recommonmark
 - rmm==23.12.*
 - scikit-build>=0.13.1
diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
index d4767a00bc..bcafb63bb6 100644
--- a/conda/recipes/cuml/meta.yaml
+++ b/conda/recipes/cuml/meta.yaml
@@ -76,15 +76,13 @@ requirements:
     - cudf ={{ minor_version }}
     - cupy >=12.0.0
     - dask-cudf ={{ minor_version }}
-    - dask >=2023.9.2
-    - dask-core>=2023.9.2
-    - distributed >=2023.9.2
     - joblib >=0.11
     - libcuml ={{ version }}
     - libcumlprims ={{ minor_version }}
     - pylibraft ={{ minor_version }}
     - python x.x
     - raft-dask ={{ minor_version }}
+    - rapids-dask-dependency ={{ minor_version }}
     - treelite {{ treelite_version }}
 
 tests:
diff --git a/dependencies.yaml b/dependencies.yaml
index 568781a45f..d6dfc19714 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -175,10 +175,8 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cudf==23.12.*
-          - dask>=2023.9.2
           - dask-cuda==23.12.*
           - dask-cudf==23.12.*
-          - distributed>=2023.9.2
           - joblib>=0.11
           - numba>=0.57
             # TODO: Is scipy really a hard dependency, or should
@@ -186,13 +184,11 @@ dependencies:
             # installation/run_constrained for conda)?
           - scipy>=1.8.0
           - raft-dask==23.12.*
+          - rapids-dask-dependency==23.12.*
           - *treelite
       - output_types: [conda, requirements]
         packages:
           - cupy>=12.0.0
-      - output_types: conda
-        packages:
-          - dask-core>=2023.9.2
       - output_types: pyproject
         packages:
           - *treelite_runtime
diff --git a/python/README.md b/python/README.md
index 342ad177ab..4a19e16e99 100644
--- a/python/README.md
+++ b/python/README.md
@@ -38,7 +38,7 @@ example `setup.py --singlegpu`) are:
 
 RAFT's Python and Cython is located in the [RAFT repository](https://github.com/rapidsai/raft/python). It was designed to be included in projects as opposed to be distributed by itself, so at build time, **setup.py creates a symlink from cuML, located in `/python/cuml/raft/` to the Python folder of RAFT**.
 
-For developers that need to modify RAFT code, please refer to the [RAFT Developer Guide](https://github.com/rapidsai/raft/blob/branch-23.04/BUILD.md#developer-guide) for recommendations.
+For developers that need to modify RAFT code, please refer to the [RAFT Developer Guide](https://github.com/rapidsai/raft/blob/branch-23.12/docs/source/build.md) for recommendations.
 
 To configure RAFT at build time:
 
@@ -50,7 +50,7 @@ The RAFT Python code gets included in the cuML build and distributable artifacts
 
 ### Build Requirements
 
-cuML's convenience [development yaml files](https://github.com/rapidsai/cuml/tree/branch-23.04/environments) includes all dependencies required to build cuML.
+cuML's convenience [development yaml files](https://github.com/rapidsai/cuml/tree/branch-23.12/environments) includes all dependencies required to build cuML.
 
 To build cuML's Python package, the following dependencies are required:
 
@@ -70,8 +70,7 @@ Packages required for multigpu algorithms*:
 - ucx-py version matching the cuML version
 - dask-cudf version matching the cuML version
 - nccl>=2.5
-- dask>=2023.9.2
-- distributed>=2023.9.2
+- rapids-dask-dependency==23.12.*
 
 * this can be avoided with `--singlegpu` argument flag.
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
index ed9b4fd45c..34cad4c705 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -61,11 +61,10 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "dask-cuda==23.12.*",
     "dask-cudf==23.12.*",
-    "dask>=2023.9.2",
-    "distributed>=2023.9.2",
     "joblib>=0.11",
     "numba>=0.57",
     "raft-dask==23.12.*",
+    "rapids-dask-dependency==23.12.*",
     "scipy>=1.8.0",
     "treelite==3.9.1",
     "treelite_runtime==3.9.1",

From a5b839f8f2e2ca6647947286b98e39c2d7399ab9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 14 Nov 2023 09:03:22 -0800
Subject: [PATCH 14/20] Simplify some logic in LabelEncoder (#5648)

I accidentally committed but forgot to push some changes requested by @csadorf in https://github.com/rapidsai/cuml/pull/5639.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5648
---
 python/cuml/preprocessing/LabelEncoder.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cuml/preprocessing/LabelEncoder.py b/python/cuml/preprocessing/LabelEncoder.py
index c8221ff951..aceed2766a 100644
--- a/python/cuml/preprocessing/LabelEncoder.py
+++ b/python/cuml/preprocessing/LabelEncoder.py
@@ -171,19 +171,19 @@ def fit(self, y, _classes=None):
             A fitted instance of itself to allow method chaining
 
         """
-        if _classes is None:
-            y = self._to_cudf_series(y)
-
         self._validate_keywords()
 
-        self.dtype = y.dtype if y.dtype != cp.dtype("O") else str
-        if _classes is not None:
-            self.classes_ = _classes
-        else:
-            self.classes_ = y.drop_duplicates().sort_values(
-                ignore_index=True
+        if _classes is None:
+            y = (
+                self._to_cudf_series(y)
+                .drop_duplicates()
+                .sort_values(ignore_index=True)
             )  # dedupe and sort
+            self.classes_ = y
+        else:
+            self.classes_ = _classes
 
+        self.dtype = y.dtype if y.dtype != cp.dtype("O") else str
         self._fitted = True
         return self
 

From f2e9459ad41e068adc1090fd4ce4849c6284650b Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Tue, 14 Nov 2023 20:58:00 +0100
Subject: [PATCH 15/20] Adjust assumption regarding valid cudf.Series
 dimensional input. (#5654)

cudf.Series does not accept any multi-dimensional input anymore.

Authors:
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - William Hicks (https://github.com/wphicks)

URL: https://github.com/rapidsai/cuml/pull/5654
---
 python/cuml/testing/strategies.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cuml/testing/strategies.py b/python/cuml/testing/strategies.py
index 39b2068e5d..a8f849866a 100644
--- a/python/cuml/testing/strategies.py
+++ b/python/cuml/testing/strategies.py
@@ -187,9 +187,7 @@ def create_cuml_array_input(input_type, dtype, shape, order):
 
     input_type = "cupy" if input_type is None else input_type
 
-    multidimensional = (
-        isinstance(shape, tuple) and len([d for d in shape if d > 1]) > 1
-    )
+    multidimensional = isinstance(shape, tuple) and len(shape) > 1
     assume(
         not (
             input_type == "series"

From 8b07b000b550ef2af88910355943d728d0489b8b Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 17 Nov 2023 10:36:46 -0600
Subject: [PATCH 16/20] Enable build concurrency for nightly and merge
 triggers. (#5658)

---
 .github/workflows/build.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 776c7ae761..63bc954711 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -22,7 +22,7 @@ on:
         default: nightly
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
   cancel-in-progress: true
 
 jobs:

From c48eaa18abf6f083cf8b347c3061cf7492616af6 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Sat, 18 Nov 2023 10:10:50 +0100
Subject: [PATCH 17/20] CI: Pin clang-tidy to 15.0.7. (#5661)

Most recent supported version by libcudacxx.

Compilation introduced as a transitive dependency from rmm.

Authors:
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Dante Gama Dessavre (https://github.com/dantegd)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cuml/pull/5661
---
 conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml | 4 ++--
 cpp/scripts/run-clang-tidy.py                           | 2 +-
 dependencies.yaml                                       | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
index 515abd8929..3f63d4b3f6 100644
--- a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
@@ -8,8 +8,8 @@ channels:
 - nvidia
 dependencies:
 - c-compiler
-- clang-tools==16.0.6
-- clang==16.0.6
+- clang-tools==15.0.7
+- clang==15.0.7
 - cmake>=3.26.4
 - cuda-version=11.8
 - cudatoolkit
diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py
index 678534b899..67189573f9 100755
--- a/cpp/scripts/run-clang-tidy.py
+++ b/cpp/scripts/run-clang-tidy.py
@@ -25,7 +25,7 @@
 
 import tomli
 
-EXPECTED_VERSION = "16.0.6"
+EXPECTED_VERSION = "15.0.7"
 VERSION_REGEX = re.compile(r"  LLVM version ([0-9.]+)")
 GPU_ARCH_REGEX = re.compile(r"sm_(\d+)")
 SPACES = re.compile(r"\s+")
diff --git a/dependencies.yaml b/dependencies.yaml
index d6dfc19714..1a875c75ec 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -95,8 +95,9 @@ dependencies:
     common:
       - output_types: [conda, requirements]
         packages:
-          - clang==16.0.6
-          - clang-tools==16.0.6
+          # clang 15 required by libcudacxx.
+          - clang==15.0.7
+          - clang-tools==15.0.7
           - ninja
           - tomli
   common_build:

From f79d40fe92560033be518d1a2a12d35e42cba9c8 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Tue, 21 Nov 2023 16:20:50 +0100
Subject: [PATCH 18/20] Avoid hard import of sklearn in base module. (#5663)

Fixes #5662.

Authors:
  - Simon Adorf (https://github.com/csadorf)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: https://github.com/rapidsai/cuml/pull/5663
---
 python/cuml/internals/base.pyx | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/python/cuml/internals/base.pyx b/python/cuml/internals/base.pyx
index 09dda66064..c00ed17f98 100644
--- a/python/cuml/internals/base.pyx
+++ b/python/cuml/internals/base.pyx
@@ -28,7 +28,11 @@ from cuml.internals.safe_imports import (
 np = cpu_only_import('numpy')
 nvtx_annotate = gpu_only_import_from("nvtx", "annotate", alt=null_decorator)
 
-from sklearn.utils import estimator_html_repr
+try:
+    from sklearn.utils import estimator_html_repr
+except ImportError:
+    estimator_html_repr = None
+
 
 import cuml
 import cuml.common
@@ -447,9 +451,10 @@ class Base(TagsMixin,
 
     def _repr_mimebundle_(self, **kwargs):
         """Prepare representations used by jupyter kernels to display estimator"""
-        output = {"text/plain": repr(self)}
-        output["text/html"] = estimator_html_repr(self)
-        return output
+        if estimator_html_repr is not None:
+            output = {"text/plain": repr(self)}
+            output["text/html"] = estimator_html_repr(self)
+            return output
 
     def set_nvtx_annotations(self):
         for func_name in ['fit', 'transform', 'predict', 'fit_transform',

From 1570ed736402e0edaca479bfb3135e41bca1269a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Malte=20F=C3=B6rster?=
 <97973773+mfoerste4@users.noreply.github.com>
Date: Tue, 21 Nov 2023 22:19:39 +0100
Subject: [PATCH 19/20] Enable multiclass svm for sparse input (#5588)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit enables multiclass SVM for sparse input. Previously this was deactivated as the `input_to_host_array` functionality does not support sparse arrays, but the data has to be piped through sklearn classes which requires host data.

@dantegd , this is a local workaround to enable a `input_to_host_array` for sparse data without the complexity of providing the whole functionality of that function. Please have a look whether this is an acceptable solution for this use case.

FYI, @tfeher

Authors:
  - Malte Förster (https://github.com/mfoerste4)
  - Tamas Bela Feher (https://github.com/tfeher)
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Tamas Bela Feher (https://github.com/tfeher)
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5588
---
 cpp/src/svm/kernelcache.cuh          | 12 ++++++++++--
 python/cuml/common/__init__.py       |  4 +++-
 python/cuml/internals/input_utils.py | 14 ++++++++++++++
 python/cuml/multiclass/multiclass.py | 19 +++++++++++++++----
 python/cuml/svm/svc.pyx              | 10 ++++------
 python/cuml/tests/test_pickle.py     |  3 ---
 python/cuml/tests/test_svm.py        |  9 +++++++--
 7 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/cpp/src/svm/kernelcache.cuh b/cpp/src/svm/kernelcache.cuh
index e20c4d5d12..4583581d2d 100644
--- a/cpp/src/svm/kernelcache.cuh
+++ b/cpp/src/svm/kernelcache.cuh
@@ -130,8 +130,16 @@ class BatchCache : public raft::cache::Cache<math_t> {
     RAFT_CUDA_TRY(cudaMemsetAsync(tmp_buffer, 0, n_ws * 2 * sizeof(int), stream));
 
     // Init cub buffers
-    cub::DeviceRadixSort::SortKeys(
-      NULL, d_temp_storage_size, tmp_buffer, tmp_buffer, n_ws, 0, sizeof(int) * 8, stream);
+    cub::DeviceRadixSort::SortPairs(NULL,
+                                    d_temp_storage_size,
+                                    tmp_buffer,
+                                    tmp_buffer,
+                                    tmp_buffer,
+                                    tmp_buffer,
+                                    n_ws,
+                                    0,
+                                    sizeof(int) * 8,
+                                    stream);
     d_temp_storage.resize(d_temp_storage_size, stream);
   }
 
diff --git a/python/cuml/common/__init__.py b/python/cuml/common/__init__.py
index 6a46462878..e267bf668b 100644
--- a/python/cuml/common/__init__.py
+++ b/python/cuml/common/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 
 from cuml.internals.input_utils import input_to_cuml_array
 from cuml.internals.input_utils import input_to_host_array
+from cuml.internals.input_utils import input_to_host_array_with_sparse_support
 
 from cuml.internals.memory_utils import rmm_cupy_ary
 from cuml.internals.memory_utils import set_global_output_type
@@ -59,6 +60,7 @@
     "has_scipy",
     "input_to_cuml_array",
     "input_to_host_array",
+    "input_to_host_array_with_sparse_support",
     "rmm_cupy_ary",
     "set_global_output_type",
     "using_device_type",
diff --git a/python/cuml/internals/input_utils.py b/python/cuml/internals/input_utils.py
index bb9e8bc3e3..edcbffabaa 100644
--- a/python/cuml/internals/input_utils.py
+++ b/python/cuml/internals/input_utils.py
@@ -497,6 +497,20 @@ def input_to_host_array(
     return out_data._replace(array=out_data.array.to_output("numpy"))
 
 
+def input_to_host_array_with_sparse_support(X):
+    _array_type, is_sparse = determine_array_type_full(X)
+    if is_sparse:
+        if _array_type == "cupy":
+            return SparseCumlArray(X).to_output(output_type="scipy")
+        elif _array_type == "cuml":
+            return X.to_output(output_type="scipy")
+        elif _array_type == "numpy":
+            return X
+        else:
+            raise ValueError(f"Unsupported sparse array type: {_array_type}.")
+    return input_to_host_array(X).array
+
+
 def convert_dtype(X, to_dtype=np.float32, legacy=True, safe_dtype=True):
     """
     Convert X to be of dtype `dtype`, raising a TypeError
diff --git a/python/cuml/multiclass/multiclass.py b/python/cuml/multiclass/multiclass.py
index e97de7256b..65b378a17b 100644
--- a/python/cuml/multiclass/multiclass.py
+++ b/python/cuml/multiclass/multiclass.py
@@ -20,7 +20,15 @@
 from cuml.internals.import_utils import has_sklearn
 from cuml.internals.mixins import ClassifierMixin
 from cuml.common.doc_utils import generate_docstring
-from cuml.common import input_to_host_array
+from cuml.common import (
+    input_to_host_array,
+    input_to_host_array_with_sparse_support,
+)
+from cuml.internals.input_utils import (
+    input_to_cupy_array,
+    determine_array_type_full,
+)
+from cuml.internals.array_sparse import SparseCumlArray
 from cuml.internals import _deprecate_pos_args
 
 
@@ -142,7 +150,9 @@ def fit(self, X, y) -> "MulticlassClassifier":
                 + ", must be one of "
                 '{"ovr", "ovo"}'
             )
-        X = input_to_host_array(X).array
+
+        X = input_to_host_array_with_sparse_support(X)
+
         y = input_to_host_array(y).array
         with cuml.internals.exit_internal_api():
             self.multiclass_estimator.fit(X, y)
@@ -160,7 +170,8 @@ def predict(self, X) -> CumlArray:
         """
         Predict using multi class classifier.
         """
-        X = input_to_host_array(X).array
+        X = input_to_host_array_with_sparse_support(X)
+
         with cuml.internals.exit_internal_api():
             return self.multiclass_estimator.predict(X)
 
@@ -177,7 +188,7 @@ def decision_function(self, X) -> CumlArray:
         """
         Calculate the decision function.
         """
-        X = input_to_host_array(X).array
+        X = input_to_host_array_with_sparse_support(X)
         with cuml.internals.exit_internal_api():
             return self.multiclass_estimator.decision_function(X)
 
diff --git a/python/cuml/svm/svc.pyx b/python/cuml/svm/svc.pyx
index 2fff2672fd..d5d5c35e3e 100644
--- a/python/cuml/svm/svc.pyx
+++ b/python/cuml/svm/svc.pyx
@@ -35,7 +35,7 @@ from cuml.common.doc_utils import generate_docstring
 from cuml.internals.logger import warn
 from pylibraft.common.handle cimport handle_t
 from pylibraft.common.interruptible import cuda_interruptible
-from cuml.common import input_to_cuml_array, input_to_host_array
+from cuml.common import input_to_cuml_array, input_to_host_array, input_to_host_array_with_sparse_support
 from cuml.internals.input_utils import input_to_cupy_array, determine_array_type_full
 from cuml.preprocessing import LabelEncoder
 from libcpp cimport nullptr
@@ -449,7 +449,7 @@ class SVC(SVMBase,
 
         # Currently CalibratedClassifierCV expects data on the host, see
         # https://github.com/rapidsai/cuml/issues/2608
-        X = input_to_host_array(X).array
+        X = input_to_host_array_with_sparse_support(X)
         y = input_to_host_array(y).array
 
         if not has_sklearn():
@@ -485,8 +485,6 @@ class SVC(SVMBase,
             return self._fit_proba(X, y, sample_weight)
 
         if self.n_classes_ > 2:
-            if is_sparse:
-                raise ValueError("Multiclass SVM does not support sparse input.")
             return self._fit_multiclass(X, y, sample_weight)
 
         if is_sparse:
@@ -594,7 +592,7 @@ class SVC(SVMBase,
         if self.probability:
             self._check_is_fitted('prob_svc')
 
-            X = input_to_host_array(X).array
+            X = input_to_host_array_with_sparse_support(X)
 
             with cuml.internals.exit_internal_api():
                 preds = self.prob_svc.predict(X)
@@ -628,7 +626,7 @@ class SVC(SVMBase,
         if self.probability:
             self._check_is_fitted('prob_svc')
 
-            X = input_to_host_array(X).array
+            X = input_to_host_array_with_sparse_support(X)
 
             # Exit the internal API when calling sklearn code (forces numpy
             # conversion)
diff --git a/python/cuml/tests/test_pickle.py b/python/cuml/tests/test_pickle.py
index 950b00a612..e1cfc84609 100644
--- a/python/cuml/tests/test_pickle.py
+++ b/python/cuml/tests/test_pickle.py
@@ -705,9 +705,6 @@ def assert_second_model(pickled_model, X):
 def test_svc_pickle(tmpdir, datatype, params, multiclass, sparse):
     result = {}
 
-    if sparse and multiclass:
-        pytest.skip("Multiclass SVC does not support sparse input")
-
     if sparse and params["probability"]:
         pytest.skip("Probabilistic SVC does not support sparse input")
 
diff --git a/python/cuml/tests/test_svm.py b/python/cuml/tests/test_svm.py
index a0f82f5dc5..5ae8895be1 100644
--- a/python/cuml/tests/test_svm.py
+++ b/python/cuml/tests/test_svm.py
@@ -41,8 +41,8 @@
 np = cpu_only_import("numpy")
 cuda = gpu_only_import_from("numba", "cuda")
 
-
 cudf = gpu_only_import("cudf")
+scipy_sparse = cpu_only_import("scipy.sparse")
 
 IS_ARM = platform.processor() == "aarch64"
 
@@ -176,13 +176,18 @@ def test_svm_skl_cmp_datasets(params, dataset, n_rows, n_cols):
 
 
 @pytest.mark.parametrize("params", [{"kernel": "rbf", "C": 1, "gamma": 1}])
+@pytest.mark.parametrize("sparse", [True, False])
 def test_svm_skl_cmp_multiclass(
-    params, dataset="classification2", n_rows=100, n_cols=6
+    params, sparse, dataset="classification2", n_rows=100, n_cols=6
 ):
     X_train, X_test, y_train, y_test = make_dataset(
         dataset, n_rows, n_cols, n_classes=3, n_informative=6
     )
 
+    if sparse:
+        X_train = scipy_sparse.csr_matrix(X_train)
+        X_test = scipy_sparse.csr_matrix(X_test)
+
     # Default to numpy for testing
     with cuml.using_output_type("numpy"):
 

From 21fbf04d3c60aa0e0958b22c1e1130b24bb31ef9 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Wed, 22 Nov 2023 05:32:44 +0800
Subject: [PATCH 20/20] Basic implementation of `OrdinalEncoder`. (#5646)

- Implement `OrdinalEncoder`.
- Implement dask version.
- Fix dask transformers with DataFrame input by using `dask_cudf` to construct return df.

Some other scikit-learn features are not available yet, for instance, `encoded_missing_value`, `min_frequency`, and `max_categories`.

The implementation is mostly based on the existing one hot encoder and label encoder.

I'm a bit confused by the `output_type` parameter and not sure how strictly it's enforced. I looked around, it seems some estimators can ignore this parameter in their returns. Would be great if there's a guideline on how to handle this parameter, along with https://github.com/rapidsai/cuml/issues/5645 .

Close https://github.com/rapidsai/cuml/issues/4456 .

Authors:
  - Jiaming Yuan (https://github.com/trivialfis)
  - Simon Adorf (https://github.com/csadorf)

Approvers:
  - Simon Adorf (https://github.com/csadorf)

URL: https://github.com/rapidsai/cuml/pull/5646
---
 python/cuml/common/doc_utils.py               |   3 +-
 python/cuml/dask/common/base.py               |   3 +-
 python/cuml/dask/preprocessing/__init__.py    |   3 +-
 python/cuml/dask/preprocessing/encoders.py    | 173 +++++--
 python/cuml/preprocessing/__init__.py         |   3 +-
 python/cuml/preprocessing/encoders.py         | 465 +++++++++++++-----
 .../cuml/preprocessing/ordinalencoder_mg.py   |  49 ++
 .../tests/dask/test_dask_ordinal_encoder.py   | 117 +++++
 python/cuml/tests/test_ordinal_encoder.py     | 133 +++++
 9 files changed, 777 insertions(+), 172 deletions(-)
 create mode 100644 python/cuml/preprocessing/ordinalencoder_mg.py
 create mode 100644 python/cuml/tests/dask/test_dask_ordinal_encoder.py
 create mode 100644 python/cuml/tests/test_ordinal_encoder.py

diff --git a/python/cuml/common/doc_utils.py b/python/cuml/common/doc_utils.py
index 5421bbb6d3..03054f0664 100644
--- a/python/cuml/common/doc_utils.py
+++ b/python/cuml/common/doc_utils.py
@@ -94,6 +94,8 @@
     "    Ignored when return_sparse=False.\n"
     "    If True, values in the inverse transform below this parameter\n"
     "    are clipped to 0.",
+    None: "{name} : None\n"
+    "    Ignored. This parameter exists for compatibility only.",
 }
 
 _parameter_possible_values = [
@@ -222,7 +224,6 @@ def deco(func):
         if (
             "X" in params or "y" in params or parameters
         ) and not skip_parameters_heading:
-
             func.__doc__ += "\nParameters\n----------\n"
 
         # Check if we want to prepend the parameters
diff --git a/python/cuml/dask/common/base.py b/python/cuml/dask/common/base.py
index 718056e01c..a9949310be 100644
--- a/python/cuml/dask/common/base.py
+++ b/python/cuml/dask/common/base.py
@@ -36,6 +36,7 @@
 np = cpu_only_import("numpy")
 
 
+dask_cudf = gpu_only_import("dask_cudf")
 dcDataFrame = gpu_only_import_from("dask_cudf.core", "DataFrame")
 
 
@@ -343,7 +344,7 @@ def _run_parallel_func(
             if output_futures:
                 return self.client.compute(preds)
             else:
-                output = dask.dataframe.from_delayed(preds)
+                output = dask_cudf.from_delayed(preds)
                 return output if delayed else output.persist()
         else:
             raise ValueError(
diff --git a/python/cuml/dask/preprocessing/__init__.py b/python/cuml/dask/preprocessing/__init__.py
index 17380238ef..f5959467ae 100644
--- a/python/cuml/dask/preprocessing/__init__.py
+++ b/python/cuml/dask/preprocessing/__init__.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 #
 
+from cuml.dask.preprocessing.encoders import OneHotEncoder, OrdinalEncoder
 from cuml.dask.preprocessing.label import LabelBinarizer
-from cuml.dask.preprocessing.encoders import OneHotEncoder
 from cuml.dask.preprocessing.LabelEncoder import LabelEncoder
 
 __all__ = [
     "LabelBinarizer",
     "OneHotEncoder",
+    "OrdinalEncoder",
     "LabelEncoder",
 ]
diff --git a/python/cuml/dask/preprocessing/encoders.py b/python/cuml/dask/preprocessing/encoders.py
index 0033f89eca..8bf2503578 100644
--- a/python/cuml/dask/preprocessing/encoders.py
+++ b/python/cuml/dask/preprocessing/encoders.py
@@ -12,23 +12,46 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from dask_cudf.core import Series as daskSeries
+from collections.abc import Sequence
+
 from cuml.common import with_cupy_rmm
+from cuml.dask.common.base import (
+    BaseEstimator,
+    DelayedInverseTransformMixin,
+    DelayedTransformMixin,
+)
+from cuml.internals.safe_imports import gpu_only_import_from, gpu_only_import
+from dask_cudf.core import Series as daskSeries
+from toolz import first
 
-from cuml.dask.common.base import BaseEstimator
-from cuml.dask.common.base import DelayedTransformMixin
-from cuml.dask.common.base import DelayedInverseTransformMixin
+dask_cudf = gpu_only_import("dask_cudf")
+dcDataFrame = gpu_only_import_from("dask_cudf.core", "DataFrame")
 
-from toolz import first
 
-from collections.abc import Sequence
-from cuml.internals.safe_imports import gpu_only_import_from
+class DelayedFitTransformMixin:
+    def fit_transform(self, X, delayed=True):
+        """Fit the encoder to X, then transform X. Equivalent to fit(X).transform(X).
 
-dcDataFrame = gpu_only_import_from("dask_cudf.core", "DataFrame")
+        Parameters
+        ----------
+        X : Dask cuDF DataFrame or CuPy backed Dask Array
+            The data to encode.
+        delayed : bool (default = True)
+            Whether to execute as a delayed task or eager.
+
+        Returns
+        -------
+        out : Dask cuDF DataFrame or CuPy backed Dask Array
+            Distributed object containing the transformed data
+        """
+        return self.fit(X).transform(X, delayed=delayed)
 
 
 class OneHotEncoder(
-    BaseEstimator, DelayedTransformMixin, DelayedInverseTransformMixin
+    BaseEstimator,
+    DelayedTransformMixin,
+    DelayedInverseTransformMixin,
+    DelayedFitTransformMixin,
 ):
     """
     Encode categorical features as a one-hot numeric array.
@@ -83,13 +106,9 @@ class OneHotEncoder(
         will be denoted as None.
     """
 
-    def __init__(self, *, client=None, verbose=False, **kwargs):
-        super().__init__(client=client, verbose=verbose, **kwargs)
-
     @with_cupy_rmm
     def fit(self, X):
-        """
-        Fit a multi-node multi-gpu OneHotEncoder to X.
+        """Fit a multi-node multi-gpu OneHotEncoder to X.
 
         Parameters
         ----------
@@ -111,10 +130,9 @@ def fit(self, X):
 
         return self
 
-    def fit_transform(self, X, delayed=True):
-        """
-        Fit OneHotEncoder to X, then transform X.
-        Equivalent to fit(X).transform(X).
+    @with_cupy_rmm
+    def transform(self, X, delayed=True):
+        """Transform X using one-hot encoding.
 
         Parameters
         ----------
@@ -126,52 +144,137 @@ def fit_transform(self, X, delayed=True):
         Returns
         -------
         out : Dask cuDF DataFrame or CuPy backed Dask Array
-            Distributed object containing the transformed data
+            Distributed object containing the transformed input.
         """
-        return self.fit(X).transform(X, delayed=delayed)
+        return self._transform(
+            X,
+            n_dims=2,
+            delayed=delayed,
+            output_dtype=self._get_internal_model().dtype,
+            output_collection_type="cupy",
+        )
 
     @with_cupy_rmm
-    def transform(self, X, delayed=True):
-        """
-        Transform X using one-hot encoding.
+    def inverse_transform(self, X, delayed=True):
+        """Convert the data back to the original representation. In case unknown
+        categories are encountered (all zeros in the one-hot encoding), ``None`` is used
+        to represent this category.
 
         Parameters
         ----------
-        X : Dask cuDF DataFrame or CuPy backed Dask Array
-            The data to encode.
+        X : CuPy backed Dask Array, shape [n_samples, n_encoded_features]
+            The transformed data.
         delayed : bool (default = True)
             Whether to execute as a delayed task or eager.
 
         Returns
         -------
-        out : Dask cuDF DataFrame or CuPy backed Dask Array
-            Distributed object containing the transformed input.
+        X_tr : Dask cuDF DataFrame or CuPy backed Dask Array
+            Distributed object containing the inverse transformed array.
+        """
+        dtype = self._get_internal_model().dtype
+        return self._inverse_transform(
+            X,
+            n_dims=2,
+            delayed=delayed,
+            output_dtype=dtype,
+            output_collection_type=self.datatype,
+        )
+
+
+class OrdinalEncoder(
+    BaseEstimator,
+    DelayedTransformMixin,
+    DelayedInverseTransformMixin,
+    DelayedFitTransformMixin,
+):
+    """Encode categorical features as an integer array.
+
+    The input to this transformer should be an :py:class:`dask_cudf.DataFrame` or a
+    :py:class:`dask.array.Array` backed by cupy, denoting the unique values taken on by
+    categorical (discrete) features. The features are converted to ordinal
+    integers. This results in a single column of integers (0 to n_categories - 1) per
+    feature.
+
+    Parameters
+    ----------
+    categories : :py:class:`cupy.ndarray` or :py:class`cudf.DataFrameq, default='auto'
+        Categories (unique values) per feature. All categories are expected to
+        fit on one GPU.
+        - 'auto' : Determine categories automatically from the training data.
+        - DataFrame/ndarray : ``categories[col]`` holds the categories expected
+          in the feature col.
+    handle_unknown : {'error', 'ignore'}, default='error'
+        Whether to raise an error or ignore if an unknown categorical feature is
+        present during transform (default is to raise). When this parameter is set
+        to 'ignore' and an unknown category is encountered during transform, the
+        resulting encoded value would be null when output type is cudf
+        dataframe.
+    verbose : int or boolean, default=False
+        Sets logging level. It must be one of `cuml.common.logger.level_*`.  See
+        :ref:`verbosity-levels` for more info.
+    """
+
+    @with_cupy_rmm
+    def fit(self, X):
+        """Fit Ordinal to X.
+
+        Parameters
+        ----------
+        X : :py:class:`dask_cudf.DataFrame` or a CuPy backed :py:class:`dask.array.Array`.
+            shape = (n_samples, n_features) The data to determine the categories of each
+            feature.
+
+        Returns
+        -------
+        self
+        """
+        from cuml.preprocessing.ordinalencoder_mg import OrdinalEncoderMG
+
+        el = first(X) if isinstance(X, Sequence) else X
+        self.datatype = (
+            "cudf" if isinstance(el, (dcDataFrame, daskSeries)) else "cupy"
+        )
+
+        self._set_internal_model(OrdinalEncoderMG(**self.kwargs).fit(X))
+
+        return self
+
+    @with_cupy_rmm
+    def transform(self, X, delayed=True):
+        """Transform X using ordinal encoding.
+
+        Parameters
+        ----------
+        X : :py:class:`dask_cudf.DataFrame` or cupy backed dask array.  The data to
+            encode.
+
+        Returns
+        -------
+        X_out :
+            Transformed input.
         """
         return self._transform(
             X,
             n_dims=2,
             delayed=delayed,
             output_dtype=self._get_internal_model().dtype,
-            output_collection_type="cupy",
+            output_collection_type=self.datatype,
         )
 
     @with_cupy_rmm
     def inverse_transform(self, X, delayed=True):
-        """
-        Convert the data back to the original representation.
-        In case unknown categories are encountered (all zeros in the
-        one-hot encoding), ``None`` is used to represent this category.
+        """Convert the data back to the original representation.
 
         Parameters
         ----------
-        X : CuPy backed Dask Array, shape [n_samples, n_encoded_features]
-            The transformed data.
+        X : :py:class:`dask_cudf.DataFrame` or cupy backed dask array.
         delayed : bool (default = True)
             Whether to execute as a delayed task or eager.
 
         Returns
         -------
-        X_tr : Dask cuDF DataFrame or CuPy backed Dask Array
+        X_tr :
             Distributed object containing the inverse transformed array.
         """
         dtype = self._get_internal_model().dtype
diff --git a/python/cuml/preprocessing/__init__.py b/python/cuml/preprocessing/__init__.py
index 368c570b09..fc07aba50c 100644
--- a/python/cuml/preprocessing/__init__.py
+++ b/python/cuml/preprocessing/__init__.py
@@ -16,7 +16,7 @@
 from cuml.model_selection import train_test_split
 from cuml.preprocessing.LabelEncoder import LabelEncoder
 from cuml.preprocessing.label import LabelBinarizer, label_binarize
-from cuml.preprocessing.encoders import OneHotEncoder
+from cuml.preprocessing.encoders import OneHotEncoder, OrdinalEncoder
 from cuml.preprocessing.TargetEncoder import TargetEncoder
 from cuml.preprocessing import text
 
@@ -63,6 +63,7 @@
     "MissingIndicator",
     "Normalizer",
     "OneHotEncoder",
+    "OrdinalEncoder",
     "PolynomialFeatures",
     "PowerTransformer",
     "QuantileTransformer",
diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py
index 32a8defc69..272655b552 100644
--- a/python/cuml/preprocessing/encoders.py
+++ b/python/cuml/preprocessing/encoders.py
@@ -13,31 +13,165 @@
 # limitations under the License.
 #
 import warnings
+from typing import List, Optional, TypeVar
+
 import cuml.internals.logger as logger
-from cuml.internals.safe_imports import gpu_only_import_from
 from cudf import DataFrame, Series
-from cuml.preprocessing import LabelEncoder
 from cuml import Base
+from cuml.common.doc_utils import generate_docstring
 from cuml.common.exceptions import NotFittedError
-from cuml.internals.safe_imports import gpu_only_import
-from cuml.internals.safe_imports import cpu_only_import
+from cuml.internals.safe_imports import (
+    cpu_only_import,
+    gpu_only_import,
+    gpu_only_import_from,
+)
+from cuml.preprocessing import LabelEncoder
 
 np = cpu_only_import("numpy")
+cudf = gpu_only_import("cudf")
 cp = gpu_only_import("cupy")
 cupyx = gpu_only_import("cupyx")
 
 GenericIndex = gpu_only_import_from("cudf", "GenericIndex")
 
 
-class OneHotEncoder(Base):
+class CheckFeaturesMixIn:
+    def _check_n_features(self, X, reset: bool = False):
+        n_features = X.shape[1]
+        if reset:
+            self.n_features_in_ = n_features
+            if hasattr(X, "columns"):
+                self.feature_names_in_ = [str(c) for c in X.columns]
+        else:
+            if not hasattr(self, "n_features_in_"):
+                raise RuntimeError(
+                    "The reset parameter is False but there is no "
+                    "n_features_in_ attribute. Is this estimator fitted?"
+                )
+            if n_features != self.n_features_in_:
+                raise ValueError(
+                    "X has {} features, but this {} is expecting {} features "
+                    "as input.".format(
+                        n_features,
+                        self.__class__.__name__,
+                        self.n_features_in_,
+                    )
+                )
+
+
+class BaseEncoder(Base, CheckFeaturesMixIn):
+    """Base implementation for encoding categorical values, uses
+    :py:class:`~cuml.preprocessing.LabelEncoder` for obtaining unique values.
+
+    Parameters
+    ----------
+
+    handle : cuml.Handle
+        Specifies the cuml.handle that holds internal CUDA state for
+        computations in this model. Most importantly, this specifies the CUDA
+        stream that will be used for the model's computations, so users can
+        run different models concurrently in different streams by creating
+        handles in several streams.
+        If it is None, a new one is created.
+    verbose : int or boolean, default=False
+        Sets logging level. It must be one of `cuml.common.logger.level_*`.
+        See :ref:`verbosity-levels` for more info.
+    output_type : {'input', 'array', 'dataframe', 'series', 'df_obj', \
+        'numba', 'cupy', 'numpy', 'cudf', 'pandas'}, default=None
+        Return results and set estimator attributes to the indicated output
+        type. If None, the output type set at the module level
+        (`cuml.global_settings.output_type`) will be used. See
+        :ref:`output-data-type-configuration` for more info.
+    """
+
+    def _set_input_type(self, value):
+        if self.input_type is None:
+            self.input_type = value
+
+    def _check_input(self, X, is_categories=False):
+        """If input is cupy, convert it to a DataFrame with 0 copies."""
+        if isinstance(X, cp.ndarray):
+            self._set_input_type("array")
+            if is_categories:
+                X = X.transpose()
+            return DataFrame(X)
+        else:
+            self._set_input_type("df")
+            return X
+
+    def _check_input_fit(self, X, is_categories=False):
+        """Helper function used in fit, can be overridden in subclasses."""
+        self._check_n_features(X, reset=True)
+        return self._check_input(X, is_categories=is_categories)
+
+    def _unique(self, inp):
+        """Helper function used in fit. Can be overridden in subclasses."""
+
+        # Default implementation passes input through directly since this is
+        # performed in `LabelEncoder.fit()`
+        return inp
+
+    def _fit(self, X, need_drop: bool):
+        X = self._check_input_fit(X)
+        if type(self.categories) is str and self.categories == "auto":
+            self._features = X.columns
+            self._encoders = {
+                feature: LabelEncoder(
+                    handle=self.handle,
+                    verbose=self.verbose,
+                    output_type=self.output_type,
+                    handle_unknown=self.handle_unknown,
+                ).fit(self._unique(X[feature]))
+                for feature in self._features
+            }
+        else:
+            self.categories = self._check_input_fit(self.categories, True)
+            self._features = self.categories.columns
+            if len(self._features) != X.shape[1]:
+                raise ValueError(
+                    "Shape mismatch: if categories is not 'auto',"
+                    " it has to be of shape (n_features, _)."
+                )
+            self._encoders = dict()
+            for feature in self._features:
+                le = LabelEncoder(
+                    handle=self.handle,
+                    verbose=self.verbose,
+                    output_type=self.output_type,
+                    handle_unknown=self.handle_unknown,
+                )
+
+                self._encoders[feature] = le.fit(self.categories[feature])
+
+                if self.handle_unknown == "error":
+                    if self._has_unknown(
+                        X[feature], self._encoders[feature].classes_
+                    ):
+                        msg = (
+                            "Found unknown categories in column {0}"
+                            " during fit".format(feature)
+                        )
+                        raise KeyError(msg)
+
+        if need_drop:
+            self.drop_idx_ = self._compute_drop_idx()
+        self._fitted = True
+
+    @property
+    def categories_(self):
+        """Returns categories used for the one hot encoding in the correct order."""
+        return [self._encoders[f].classes_ for f in self._features]
+
+
+class OneHotEncoder(BaseEncoder):
     """
     Encode categorical features as a one-hot numeric array.
-    The input to this estimator should be a cuDF.DataFrame or a cupy.ndarray,
-    denoting the unique values taken on by categorical (discrete) features.
-    The features are encoded using a one-hot (aka 'one-of-K' or 'dummy')
-    encoding scheme. This creates a binary column for each category and
-    returns a sparse matrix or dense array (depending on the ``sparse``
-    parameter).
+    The input to this estimator should be a :py:class:`cuDF.DataFrame` or a
+    :py:class:`cupy.ndarray`, denoting the unique values taken on by categorical
+    (discrete) features.  The features are encoded using a one-hot (aka 'one-of-K' or
+    'dummy') encoding scheme. This creates a binary column for each category and returns
+    a sparse matrix or dense array (depending on the ``sparse`` parameter).
+
     By default, the encoder derives the categories based on the unique values
     in each feature. Alternatively, you can also specify the `categories`
     manually.
@@ -105,7 +239,6 @@ class OneHotEncoder(Base):
         ``drop_idx_[i]`` is the index in ``categories_[i]`` of the category to
         be dropped for each feature. None if all the transformed features will
         be retained.
-
     """
 
     def __init__(
@@ -165,7 +298,7 @@ def _check_is_fitted(self):
             raise NotFittedError(msg)
 
     def _compute_drop_idx(self):
-        """Helper to compute indices to drop from category to drop"""
+        """Helper to compute indices to drop from category to drop."""
         if self.drop is None:
             return None
         elif isinstance(self.drop, str) and self.drop == "first":
@@ -209,141 +342,46 @@ def _compute_drop_idx(self):
             )
             raise ValueError(msg.format(type(self.drop)))
 
-    @property
-    def categories_(self):
-        """
-        Returns categories used for the one hot encoding in the correct order.
-        """
-        return [self._encoders[f].classes_ for f in self._features]
-
-    def _set_input_type(self, value):
-        if self.input_type is None:
-            self.input_type = value
-
-    def _check_input(self, X, is_categories=False):
-        """
-        If input is cupy, convert it to a DataFrame with 0 copies
-        """
-        if isinstance(X, cp.ndarray):
-            self._set_input_type("array")
-            if is_categories:
-                X = X.transpose()
-            return DataFrame(X)
-        else:
-            self._set_input_type("df")
-            return X
-
     def _check_input_fit(self, X, is_categories=False):
         """Helper function used in fit. Can be overridden in subclasses."""
         return self._check_input(X, is_categories=is_categories)
 
-    def _unique(self, inp):
-        """Helper function used in fit. Can be overridden in subclasses."""
-
-        # Default implementation passes input through directly since this is
-        # performed in `LabelEncoder.fit()`
-        return inp
-
     def _has_unknown(self, X_cat, encoder_cat):
-        """Check if X_cat has categories that are not present in encoder_cat"""
+        """Check if X_cat has categories that are not present in encoder_cat."""
         return not X_cat.isin(encoder_cat).all()
 
+    @generate_docstring(y=None)
     def fit(self, X, y=None):
-        """
-        Fit OneHotEncoder to X.
-
-        Parameters
-        ----------
-        X : cuDF.DataFrame or cupy.ndarray, shape = (n_samples, n_features)
-            The data to determine the categories of each feature.
-        y : None
-            Ignored. This parameter exists for compatibility only.
-
-        Returns
-        -------
-        self
-
-        """
+        """Fit OneHotEncoder to X."""
         self._validate_keywords()
-        X = self._check_input_fit(X)
-        if type(self.categories) is str and self.categories == "auto":
-            self._features = X.columns
-            self._encoders = {
-                feature: LabelEncoder(
-                    handle=self.handle,
-                    verbose=self.verbose,
-                    output_type=self.output_type,
-                    handle_unknown=self.handle_unknown,
-                ).fit(self._unique(X[feature]))
-                for feature in self._features
-            }
-        else:
-            self.categories = self._check_input_fit(self.categories, True)
-            self._features = self.categories.columns
-            if len(self._features) != X.shape[1]:
-                raise ValueError(
-                    "Shape mismatch: if categories is not 'auto',"
-                    " it has to be of shape (n_features, _)."
-                )
-            self._encoders = dict()
-            for feature in self._features:
-
-                le = LabelEncoder(
-                    handle=self.handle,
-                    verbose=self.verbose,
-                    output_type=self.output_type,
-                    handle_unknown=self.handle_unknown,
-                )
-
-                self._encoders[feature] = le.fit(self.categories[feature])
-
-                if self.handle_unknown == "error":
-                    if self._has_unknown(
-                        X[feature], self._encoders[feature].classes_
-                    ):
-                        msg = (
-                            "Found unknown categories in column {0}"
-                            " during fit".format(feature)
-                        )
-                        raise KeyError(msg)
-
-        self.drop_idx_ = self._compute_drop_idx()
-        self._fitted = True
+        self._fit(X, True)
         return self
 
+    @generate_docstring(
+        y=None,
+        return_values={
+            "name": "X_out",
+            "description": "Transformed input.",
+            "type": "sparse matrix if sparse=True else a 2-d array",
+        },
+    )
     def fit_transform(self, X, y=None):
         """
-        Fit OneHotEncoder to X, then transform X.
-        Equivalent to fit(X).transform(X).
-
-        Parameters
-        ----------
-        X : cudf.DataFrame or cupy.ndarray, shape = (n_samples, n_features)
-            The data to encode.
-
-        Returns
-        -------
-        X_out : sparse matrix if sparse=True else a 2-d array
-            Transformed input.
+        Fit OneHotEncoder to X, then transform X.  Equivalent to fit(X).transform(X).
 
         """
         X = self._check_input(X)
         return self.fit(X).transform(X)
 
+    @generate_docstring(
+        return_values={
+            "name": "X_out",
+            "description": "Transformed input.",
+            "type": "sparse matrix if sparse=True else a 2-d array",
+        }
+    )
     def transform(self, X):
-        """
-        Transform X using one-hot encoding.
-
-        Parameters
-        ----------
-        X : cudf.DataFrame or cupy.ndarray
-            The data to encode.
-
-        Returns
-        -------
-        X_out : sparse matrix if sparse=True else a 2-d array
-            Transformed input.
-        """
+        """Transform X using one-hot encoding."""
         self._check_is_fitted()
         X = self._check_input(X)
 
@@ -425,10 +463,9 @@ def transform(self, X):
             )
 
     def inverse_transform(self, X):
-        """
-        Convert the data back to the original representation.
-        In case unknown categories are encountered (all zeros in the
-        one-hot encoding), ``None`` is used to represent this category.
+        """Convert the data back to the original representation. In case unknown
+        categories are encountered (all zeros in the one-hot encoding), ``None`` is used
+        to represent this category.
 
         The return type is the same as the type of the input used by the first
         call to fit on this estimator instance.
@@ -544,3 +581,165 @@ def get_param_names(self):
             "dtype",
             "handle_unknown",
         ]
+
+
+def _slice_feat(X, i):
+    if hasattr(X, "iloc"):
+        return X[i]
+    return X[:, i]
+
+
+def _get_output(
+    output_type: Optional[str],
+    input_type: Optional[str],
+    out: DataFrame,
+    dtype,
+):
+    if output_type == "input":
+        if input_type == "array":
+            output_type = "cupy"
+        elif input_type == "df":
+            output_type = "cudf"
+
+    if output_type is None:
+        output_type = "cupy"
+
+    if output_type == "cudf":
+        return out
+    elif output_type == "cupy":
+        return out.astype(dtype).to_cupy(na_value=np.nan)
+    elif output_type == "numpy":
+        return cp.asnumpy(out.to_cupy(na_value=np.nan, dtype=dtype))
+    elif output_type == "pandas":
+        return out.to_pandas()
+    else:
+        raise ValueError("Unsupported output type.")
+
+
+class OrdinalEncoder(BaseEncoder):
+    def __init__(
+        self,
+        *,
+        categories="auto",
+        dtype=np.float64,
+        handle_unknown="error",
+        handle=None,
+        verbose=False,
+        output_type=None,
+    ) -> None:
+        """Encode categorical features as an integer array.
+
+        The input to this transformer should be an :py:class:`cudf.DataFrame` or a
+        :py:class:`cupy.ndarray`, denoting the unique values taken on by categorical
+        (discrete) features. The features are converted to ordinal integers. This
+        results in a single column of integers (0 to n_categories - 1) per feature.
+
+        Parameters
+        ----------
+        categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto'
+                     Categories (unique values) per feature:
+            - 'auto' : Determine categories automatically from the training data.
+            - DataFrame/ndarray : ``categories[col]`` holds the categories expected
+              in the feature col.
+        handle_unknown : {'error', 'ignore'}, default='error'
+            Whether to raise an error or ignore if an unknown categorical feature is
+            present during transform (default is to raise). When this parameter is set
+            to 'ignore' and an unknown category is encountered during transform, the
+            resulting encoded value would be null when output type is cudf
+            dataframe.
+        handle : cuml.Handle
+            Specifies the cuml.handle that holds internal CUDA state for computations in
+            this model. Most importantly, this specifies the CUDA stream that will be
+            used for the model's computations, so users can run different models
+            concurrently in different streams by creating handles in several streams.
+
+            If it is None, a new one is created.
+        verbose : int or boolean, default=False
+            Sets logging level. It must be one of `cuml.common.logger.level_*`.  See
+            :ref:`verbosity-levels` for more info.
+        output_type : {'input', 'array', 'dataframe', 'series', 'df_obj', \
+            'numba', 'cupy', 'numpy', 'cudf', 'pandas'}, default=None
+            Return results and set estimator attributes to the indicated output
+            type. If None, the output type set at the module level
+            (`cuml.global_settings.output_type`) will be used. See
+            :ref:`output-data-type-configuration` for more info.
+        """
+        super().__init__(
+            handle=handle, verbose=verbose, output_type=output_type
+        )
+
+        self.categories = categories
+        self.dtype = dtype
+        self.handle_unknown = handle_unknown
+
+        self.input_type = None
+
+    @generate_docstring(y=None)
+    def fit(self, X, y=None) -> "OrdinalEncoder":
+        """Fit Ordinal to X."""
+        self._fit(X, need_drop=False)
+        return self
+
+    @generate_docstring(
+        return_values={
+            "name": "X_out",
+            "description": "Transformed input.",
+            "type": "Type is specified by the `output_type` parameter.",
+        }
+    )
+    def transform(self, X):
+        """Transform X using ordinal encoding."""
+        self._check_n_features(X, reset=False)
+
+        result = {}
+        for feature in self._features:
+            Xi = _slice_feat(X, feature)
+            col_idx = self._encoders[feature].transform(Xi)
+            result[feature] = col_idx
+
+        r = DataFrame(result)
+        return _get_output(self.output_type, self.input_type, r, self.dtype)
+
+    @generate_docstring(
+        y=None,
+        return_values={
+            "name": "X_out",
+            "description": "Transformed input.",
+            "type": "Type is specified by the `output_type` parameter.",
+        },
+    )
+    def fit_transform(self, X, y=None):
+        """Fit OrdinalEncoder to X, then transform X. Equivalent to fit(X).transform(X)."""
+        X = self._check_input(X)
+        return self.fit(X).transform(X)
+
+    def inverse_transform(self, X):
+        """Convert the data back to the original representation.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape [n_samples, n_encoded_features]
+            The transformed data.
+
+        Returns
+        -------
+        X_tr : Type is specified by the `output_type` parameter.
+            Inverse transformed array.
+        """
+        self._check_n_features(X, reset=False)
+
+        result = {}
+        for feature in self._features:
+            Xi = _slice_feat(X, feature)
+            inv = self._encoders[feature].inverse_transform(Xi)
+            result[feature] = inv
+
+        r = DataFrame(result)
+        return _get_output(self.output_type, self.input_type, r, self.dtype)
+
+    def get_param_names(self):
+        return super().get_param_names() + [
+            "categories",
+            "dtype",
+            "handle_unknown",
+        ]
diff --git a/python/cuml/preprocessing/ordinalencoder_mg.py b/python/cuml/preprocessing/ordinalencoder_mg.py
new file mode 100644
index 0000000000..8b47f67819
--- /dev/null
+++ b/python/cuml/preprocessing/ordinalencoder_mg.py
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import cupy as cp
+import dask
+from cuml.dask.common.dask_arr_utils import to_dask_cudf
+from cuml.internals.safe_imports import gpu_only_import, gpu_only_import_from
+from cuml.preprocessing.encoders import OrdinalEncoder
+
+cp = gpu_only_import("cupy")
+DataFrame = gpu_only_import_from("cudf", "DataFrame")
+
+
+class OrdinalEncoderMG(OrdinalEncoder):
+    def __init__(self, *, client=None, **kwargs):
+        super().__init__(**kwargs)
+        self.client = client
+
+    def _check_input_fit(self, X, is_categories=False):
+        """Helper function to check input of fit within the multi-gpu model"""
+        if isinstance(X, (dask.array.core.Array, cp.ndarray)):
+            self._set_input_type("array")
+            if is_categories:
+                X = X.transpose()
+            if isinstance(X, cp.ndarray):
+                return DataFrame(X)
+            else:
+                return to_dask_cudf(X, client=self.client)
+        else:
+            self._set_input_type("df")
+            return X
+
+    def _unique(self, inp):
+        return inp.unique().compute()
+
+    def _has_unknown(self, X_cat, encoder_cat):
+        return not X_cat.isin(encoder_cat).all().compute()
diff --git a/python/cuml/tests/dask/test_dask_ordinal_encoder.py b/python/cuml/tests/dask/test_dask_ordinal_encoder.py
new file mode 100644
index 0000000000..36b5fa92d3
--- /dev/null
+++ b/python/cuml/tests/dask/test_dask_ordinal_encoder.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import cupy as cp
+import dask_cudf
+import numpy as np
+import pandas as pd
+import pytest
+from cudf import DataFrame
+from cuml.dask.preprocessing import OrdinalEncoder
+from distributed import Client
+
+
+@pytest.mark.mg
+def test_ordinal_encoder_df(client: Client) -> None:
+    X = DataFrame({"cat": ["M", "F", "F"], "int": [1, 3, 2]})
+    X = dask_cudf.from_cudf(X, npartitions=2)
+
+    enc = OrdinalEncoder()
+    enc.fit(X)
+    Xt = enc.transform(X)
+
+    X_1 = DataFrame({"cat": ["F", "F"], "int": [1, 2]})
+    X_1 = dask_cudf.from_cudf(X_1, npartitions=2)
+
+    enc = OrdinalEncoder(client=client)
+    enc.fit(X)
+    Xt_1 = enc.transform(X_1)
+
+    Xt_r = Xt.compute()
+    Xt_1_r = Xt_1.compute()
+    assert Xt_1_r.iloc[0, 0] == Xt_r.iloc[1, 0]
+    assert Xt_1_r.iloc[1, 0] == Xt_r.iloc[1, 0]
+    assert Xt_1_r.iloc[0, 1] == Xt_r.iloc[0, 1]
+    assert Xt_1_r.iloc[1, 1] == Xt_r.iloc[2, 1]
+
+    # Turn Int64Index to RangeIndex for testing equality
+    inv_Xt = enc.inverse_transform(Xt).compute().reset_index(drop=True)
+    inv_Xt_1 = enc.inverse_transform(Xt_1).compute().reset_index(drop=True)
+
+    X_r = X.compute()
+    X_1_r = X_1.compute()
+
+    assert inv_Xt.equals(X_r)
+    assert inv_Xt_1.equals(X_1_r)
+
+    assert enc.n_features_in_ == 2
+
+
+@pytest.mark.mg
+def test_ordinal_encoder_array(client: Client) -> None:
+    X = DataFrame({"A": [4, 1, 1], "B": [1, 3, 2]})
+    X = dask_cudf.from_cudf(X, npartitions=2).values
+
+    enc = OrdinalEncoder()
+    enc.fit(X)
+    Xt = enc.transform(X)
+
+    X_1 = DataFrame({"A": [1, 1], "B": [1, 2]})
+    X_1 = dask_cudf.from_cudf(X_1, npartitions=2).values
+
+    enc = OrdinalEncoder(client=client)
+    enc.fit(X)
+    Xt_1 = enc.transform(X_1)
+
+    Xt_r = Xt.compute()
+    Xt_1_r = Xt_1.compute()
+    assert Xt_1_r[0, 0] == Xt_r[1, 0]
+    assert Xt_1_r[1, 0] == Xt_r[1, 0]
+    assert Xt_1_r[0, 1] == Xt_r[0, 1]
+    assert Xt_1_r[1, 1] == Xt_r[2, 1]
+
+    inv_Xt = enc.inverse_transform(Xt)
+    inv_Xt_1 = enc.inverse_transform(Xt_1)
+
+    cp.testing.assert_allclose(X.compute(), inv_Xt.compute())
+    cp.testing.assert_allclose(X_1.compute(), inv_Xt_1.compute())
+
+    assert enc.n_features_in_ == 2
+
+
+@pytest.mark.mg
+@pytest.mark.parametrize("as_array", [True, False], ids=["cupy", "cudf"])
+def test_handle_unknown(client, as_array: bool) -> None:
+    X = DataFrame({"data": [0, 1]})
+    Y = DataFrame({"data": [3, 1]})
+
+    X = dask_cudf.from_cudf(X, npartitions=2)
+    Y = dask_cudf.from_cudf(Y, npartitions=2)
+
+    if as_array:
+        X = X.values
+        Y = Y.values
+
+    enc = OrdinalEncoder(handle_unknown="error")
+    enc = enc.fit(X)
+    with pytest.raises(KeyError):
+        enc.transform(Y).compute()
+
+    enc = OrdinalEncoder(handle_unknown="ignore")
+    enc = enc.fit(X)
+    encoded = enc.transform(Y).compute()
+    if as_array:
+        np.isnan(encoded[0, 0])
+    else:
+        assert pd.isna(encoded.iloc[0, 0])
diff --git a/python/cuml/tests/test_ordinal_encoder.py b/python/cuml/tests/test_ordinal_encoder.py
new file mode 100644
index 0000000000..c9379a43be
--- /dev/null
+++ b/python/cuml/tests/test_ordinal_encoder.py
@@ -0,0 +1,133 @@
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.preprocessing import OrdinalEncoder as skOrdinalEncoder
+
+from cuml.internals.safe_imports import gpu_only_import_from
+from cuml.preprocessing import OrdinalEncoder
+
+DataFrame = gpu_only_import_from("cudf", "DataFrame")
+
+
+@pytest.fixture
+def test_sample():
+    X = DataFrame({"cat": ["M", "F", "F"], "num": [1, 3, 2]})
+    return X
+
+
+def test_ordinal_encoder_df(test_sample) -> None:
+    X = test_sample
+    enc = OrdinalEncoder()
+    enc.fit(X)
+    Xt = enc.transform(X)
+
+    X_1 = DataFrame({"cat": ["F", "F"], "num": [1, 2]})
+    Xt_1 = enc.transform(X_1)
+
+    assert Xt_1.iloc[0, 0] == Xt.iloc[1, 0]
+    assert Xt_1.iloc[1, 0] == Xt.iloc[1, 0]
+    assert Xt_1.iloc[0, 1] == Xt.iloc[0, 1]
+    assert Xt_1.iloc[1, 1] == Xt.iloc[2, 1]
+
+    inv_Xt = enc.inverse_transform(Xt)
+    inv_Xt_1 = enc.inverse_transform(Xt_1)
+
+    assert inv_Xt.equals(X)
+    assert inv_Xt_1.equals(X_1)
+
+    assert enc.n_features_in_ == 2
+
+
+def test_ordinal_encoder_array() -> None:
+    X = DataFrame({"A": [4, 1, 1], "B": [1, 3, 2]}).values
+    enc = OrdinalEncoder()
+    enc.fit(X)
+    Xt = enc.transform(X)
+
+    X_1 = DataFrame({"A": [1, 1], "B": [1, 2]}).values
+    Xt_1 = enc.transform(X_1)
+
+    assert Xt_1[0, 0] == Xt[1, 0]
+    assert Xt_1[1, 0] == Xt[1, 0]
+    assert Xt_1[0, 1] == Xt[0, 1]
+    assert Xt_1[1, 1] == Xt[2, 1]
+
+    inv_Xt = enc.inverse_transform(Xt)
+    inv_Xt_1 = enc.inverse_transform(Xt_1)
+
+    cp.testing.assert_allclose(X, inv_Xt)
+    cp.testing.assert_allclose(X_1, inv_Xt_1)
+
+    assert enc.n_features_in_ == 2
+
+
+def test_ordinal_array() -> None:
+    X = cp.arange(32).reshape(16, 2)
+
+    enc = OrdinalEncoder()
+    enc.fit(X)
+    Xt = enc.transform(X)
+
+    Xh = cp.asnumpy(X)
+    skenc = skOrdinalEncoder()
+    skenc.fit(Xh)
+    Xt_sk = skenc.transform(Xh)
+
+    cp.testing.assert_allclose(Xt, Xt_sk)
+
+
+def test_output_type(test_sample) -> None:
+    X = test_sample
+    enc = OrdinalEncoder(output_type="cupy").fit(X)
+    assert isinstance(enc.transform(X), cp.ndarray)
+    enc = OrdinalEncoder(output_type="cudf").fit(X)
+    assert isinstance(enc.transform(X), DataFrame)
+    enc = OrdinalEncoder(output_type="pandas").fit(X)
+    assert isinstance(enc.transform(X), pd.DataFrame)
+    enc = OrdinalEncoder(output_type="numpy").fit(X)
+    assert isinstance(enc.transform(X), np.ndarray)
+    # output_type == "input"
+    enc = OrdinalEncoder().fit(X)
+    assert isinstance(enc.transform(X), DataFrame)
+
+
+def test_feature_names(test_sample) -> None:
+    enc = OrdinalEncoder().fit(test_sample)
+    assert enc.feature_names_in_ == ["cat", "num"]
+
+
+@pytest.mark.parametrize("as_array", [True, False], ids=["cupy", "cudf"])
+def test_handle_unknown(as_array: bool) -> None:
+    X = DataFrame({"data": [0, 1]})
+    Y = DataFrame({"data": [3, 1]})
+
+    if as_array:
+        X = X.values
+        Y = Y.values
+
+    enc = OrdinalEncoder(handle_unknown="error")
+    enc = enc.fit(X)
+    with pytest.raises(KeyError):
+        enc.transform(Y)
+
+    enc = OrdinalEncoder(handle_unknown="ignore")
+    enc = enc.fit(X)
+    encoded = enc.transform(Y)
+    if as_array:
+        np.isnan(encoded[0, 0])
+    else:
+        assert pd.isna(encoded.iloc[0, 0])