Merge branch 'branch-23.12' into update_cccl

rapidsai · Dec 11, 2023 · 3e758fd · 3e758fd
2 parents f9471f5 + 8f29a16
commit 3e758fd
Show file tree

Hide file tree

Showing 46 changed files with 1,358 additions and 297 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -22,7 +22,7 @@ on:
         default: nightly
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
   cancel-in-progress: true
 
 jobs:

diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
@@ -6,6 +6,6 @@ jobs:
   triage:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/labeler@main
+    - uses: actions/labeler@v4
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,52 @@
+# cuML 23.12.00 (6 Dec 2023)
+
+## 🚨 Breaking Changes
+
+- [LogisticRegressionMG] Support sparse vectors ([#5632](https://github.com/rapidsai/cuml/pull/5632)) [@lijinf2](https://github.com/lijinf2)
+
+## 🐛 Bug Fixes
+
+- Update actions/labeler to v4 ([#5686](https://github.com/rapidsai/cuml/pull/5686)) [@raydouglass](https://github.com/raydouglass)
+- updated docs around `make_column_transformer` change from `.preprocessing` to `.compose` ([#5680](https://github.com/rapidsai/cuml/pull/5680)) [@taureandyernv](https://github.com/taureandyernv)
+- Skip dask pytest NN hang in CUDA 11.4 CI ([#5665](https://github.com/rapidsai/cuml/pull/5665)) [@dantegd](https://github.com/dantegd)
+- Avoid hard import of sklearn in base module. ([#5663](https://github.com/rapidsai/cuml/pull/5663)) [@csadorf](https://github.com/csadorf)
+- CI: Pin clang-tidy to 15.0.7. ([#5661](https://github.com/rapidsai/cuml/pull/5661)) [@csadorf](https://github.com/csadorf)
+- Adjust assumption regarding valid cudf.Series dimensional input. ([#5654](https://github.com/rapidsai/cuml/pull/5654)) [@csadorf](https://github.com/csadorf)
+- Flatten cupy array before feeding to cudf.Series ([#5651](https://github.com/rapidsai/cuml/pull/5651)) [@vyasr](https://github.com/vyasr)
+- CI: Fix expected ValueError and dask-glm incompatibility ([#5644](https://github.com/rapidsai/cuml/pull/5644)) [@csadorf](https://github.com/csadorf)
+- Use drop_duplicates instead of unique for cudf&#39;s pandas compatibility mode ([#5639](https://github.com/rapidsai/cuml/pull/5639)) [@vyasr](https://github.com/vyasr)
+- Temporarily avoid pydata-sphinx-theme version 0.14.2. ([#5629](https://github.com/rapidsai/cuml/pull/5629)) [@csadorf](https://github.com/csadorf)
+- Fix type hint in split function. ([#5625](https://github.com/rapidsai/cuml/pull/5625)) [@trivialfis](https://github.com/trivialfis)
+- Fix trying to get pointer to None in svm/linear.pyx ([#5615](https://github.com/rapidsai/cuml/pull/5615)) [@yosider](https://github.com/yosider)
+- Reduce parallelism to avoid OOMs in wheel tests ([#5611](https://github.com/rapidsai/cuml/pull/5611)) [@vyasr](https://github.com/vyasr)
+
+## 📖 Documentation
+
+- Update interoperability docs ([#5633](https://github.com/rapidsai/cuml/pull/5633)) [@beckernick](https://github.com/beckernick)
+- Update instructions for creating a conda build environment ([#5628](https://github.com/rapidsai/cuml/pull/5628)) [@csadorf](https://github.com/csadorf)
+
+## 🚀 New Features
+
+- Basic implementation of `OrdinalEncoder`. ([#5646](https://github.com/rapidsai/cuml/pull/5646)) [@trivialfis](https://github.com/trivialfis)
+
+## 🛠️ Improvements
+
+- Build concurrency for nightly and merge triggers ([#5658](https://github.com/rapidsai/cuml/pull/5658)) [@bdice](https://github.com/bdice)
+- [LogisticRegressionMG][FEA] Support training when dataset contains only one class ([#5655](https://github.com/rapidsai/cuml/pull/5655)) [@lijinf2](https://github.com/lijinf2)
+- Use new `rapids-dask-dependency` metapackage for managing `dask` versions ([#5649](https://github.com/rapidsai/cuml/pull/5649)) [@galipremsagar](https://github.com/galipremsagar)
+- Simplify some logic in LabelEncoder ([#5648](https://github.com/rapidsai/cuml/pull/5648)) [@vyasr](https://github.com/vyasr)
+- Increase `Nanny` close timeout in `LocalCUDACluster` tests ([#5636](https://github.com/rapidsai/cuml/pull/5636)) [@pentschev](https://github.com/pentschev)
+- [LogisticRegressionMG] Support sparse vectors ([#5632](https://github.com/rapidsai/cuml/pull/5632)) [@lijinf2](https://github.com/lijinf2)
+- Add rich HTML representation to estimators ([#5630](https://github.com/rapidsai/cuml/pull/5630)) [@betatim](https://github.com/betatim)
+- Unpin `dask` and `distributed` for `23.12` development ([#5627](https://github.com/rapidsai/cuml/pull/5627)) [@galipremsagar](https://github.com/galipremsagar)
+- Update `shared-action-workflows` references ([#5621](https://github.com/rapidsai/cuml/pull/5621)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Use branch-23.12 workflows. ([#5618](https://github.com/rapidsai/cuml/pull/5618)) [@bdice](https://github.com/bdice)
+- Update rapids-cmake functions to non-deprecated signatures ([#5616](https://github.com/rapidsai/cuml/pull/5616)) [@robertmaynard](https://github.com/robertmaynard)
+- Allow nightly dependencies and set up consistent nightly versions for conda and pip packages ([#5607](https://github.com/rapidsai/cuml/pull/5607)) [@vyasr](https://github.com/vyasr)
+- Forward-merge branch-23.10 to branch-23.12 ([#5596](https://github.com/rapidsai/cuml/pull/5596)) [@bdice](https://github.com/bdice)
+- Build CUDA 12.0 ARM conda packages. ([#5595](https://github.com/rapidsai/cuml/pull/5595)) [@bdice](https://github.com/bdice)
+- Enable multiclass svm for sparse input ([#5588](https://github.com/rapidsai/cuml/pull/5588)) [@mfoerste4](https://github.com/mfoerste4)
+
 # cuML 23.10.00 (11 Oct 2023)
 
 ## 🚨 Breaking Changes

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
@@ -38,6 +38,10 @@ for dep in cudf pylibraft raft-dask rmm; do
     sed -r -i "s/${dep}==(.*)\"/${dep}${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
 done
 
+for dep in dask-cuda rapids-dask-dependency; do
+    sed -r -i "s/${dep}==(.*)\"/${dep}==\1${alpha_spec}\"/g" ${pyproject_file}
+done
+
 if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
     sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" ${pyproject_file}
     sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -73,6 +73,7 @@ DEPENDENCIES=(
   librmm
   pylibraft
   raft-dask
+  rapids-dask-dependency
   rmm
 )
 for FILE in dependencies.yaml conda/environments/*.yaml; do
@@ -81,17 +82,13 @@ for FILE in dependencies.yaml conda/environments/*.yaml; do
   done
 done
 
-sed_runner "s|/branch-.*?/|/branch-${NEXT_SHORT_TAG}/|g" README.md
-sed_runner "s|/branch-.*?/|/branch-${NEXT_SHORT_TAG}/|g" python/README.md
+sed_runner "s|/branch-[^/]*/|/branch-${NEXT_SHORT_TAG}/|g" README.md
+sed_runner "s|/branch-[^/]*/|/branch-${NEXT_SHORT_TAG}/|g" python/README.md
+sed_runner "/- rapids-dask-dependency==/ s/==.*/==${NEXT_SHORT_TAG}\.*/g" python/README.md
 
 # Wheel builds clone cumlprims_mg, update its branch
 sed_runner "s/extra-repo-sha: branch-.*/extra-repo-sha: branch-${NEXT_SHORT_TAG}/g" .github/workflows/*.yaml
 
-# Wheel builds install dask-cuda from source, update its branch
-for FILE in .github/workflows/*.yaml; do
-  sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE};
-done
-
 # CI files
 for FILE in .github/workflows/*.yaml; do
   sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"

diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
@@ -12,9 +12,6 @@ if [[ "$(arch)" == "aarch64" ]]; then
     python -m pip install cmake
 fi
 
-# Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/[email protected]
-
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install $(echo ./dist/cuml*.whl)[test]
 

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -16,12 +16,9 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.9.2
 - dask-cuda==23.12.*
 - dask-cudf==23.12.*
 - dask-ml
-- dask>=2023.9.2
-- distributed>=2023.9.2
 - doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
@@ -63,6 +60,7 @@ dependencies:
 - pytest-xdist
 - python>=3.9,<3.11
 - raft-dask==23.12.*
+- rapids-dask-dependency==23.12.*
 - recommonmark
 - rmm==23.12.*
 - scikit-build>=0.13.1
@@ -77,5 +75,5 @@ dependencies:
 - treelite==3.9.1
 - umap-learn==0.5.3
 - pip:
-  - git+https://github.com/dask/dask-glm@main
+  - dask-glm==0.3.0
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -18,12 +18,9 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.9.2
 - dask-cuda==23.12.*
 - dask-cudf==23.12.*
 - dask-ml
-- dask>=2023.9.2
-- distributed>=2023.9.2
 - doxygen=1.9.1
 - gcc_linux-64=11.*
 - gmock>=1.13.0
@@ -59,6 +56,7 @@ dependencies:
 - pytest-xdist
 - python>=3.9,<3.11
 - raft-dask==23.12.*
+- rapids-dask-dependency==23.12.*
 - recommonmark
 - rmm==23.12.*
 - scikit-build>=0.13.1
@@ -73,5 +71,5 @@ dependencies:
 - treelite==3.9.1
 - umap-learn==0.5.3
 - pip:
-  - git+https://github.com/dask/dask-glm@main
+  - dask-glm==0.3.0
 name: all_cuda-120_arch-x86_64
diff --git a/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml b/conda/environments/clang_tidy_cuda-118_arch-x86_64.yaml
@@ -8,8 +8,8 @@ channels:
 - nvidia
 dependencies:
 - c-compiler
-- clang-tools==16.0.6
-- clang==16.0.6
+- clang-tools==15.0.7
+- clang==15.0.7
 - cmake>=3.26.4
 - cuda-version=11.8
 - cudatoolkit

diff --git a/conda/recipes/cuml/meta.yaml b/conda/recipes/cuml/meta.yaml
@@ -76,15 +76,13 @@ requirements:
     - cudf ={{ minor_version }}
     - cupy >=12.0.0
     - dask-cudf ={{ minor_version }}
-    - dask >=2023.9.2
-    - dask-core>=2023.9.2
-    - distributed >=2023.9.2
     - joblib >=0.11
     - libcuml ={{ version }}
     - libcumlprims ={{ minor_version }}
     - pylibraft ={{ minor_version }}
     - python x.x
     - raft-dask ={{ minor_version }}
+    - rapids-dask-dependency ={{ minor_version }}
     - treelite {{ treelite_version }}
 
 tests:

diff --git a/cpp/include/cuml/linear_model/qn_mg.hpp b/cpp/include/cuml/linear_model/qn_mg.hpp
@@ -63,6 +63,37 @@ void qnFit(raft::handle_t& handle,
            float* f,
            int* num_iters);
 
+/**
+ * @brief support sparse vectors (Compressed Sparse Row format) for MNMG logistic regression fit
+ * using quasi newton methods
+ * @param[in] handle: the internal cuml handle object
+ * @param[in] input_values: vector holding non-zero values of all partitions for that rank
+ * @param[in] input_cols: vector holding column indices of non-zero values of all partitions for
+ * that rank
+ * @param[in] input_row_ids: vector holding row pointers of non-zero values of all partitions for
+ * that rank
+ * @param[in] X_nnz: the number of non-zero values of that rank
+ * @param[in] input_desc: PartDescriptor object for the input
+ * @param[in] labels: labels data
+ * @param[out] coef: learned coefficients
+ * @param[in] pams: model parameters
+ * @param[in] n_classes: number of outputs (number of classes or `1` for regression)
+ * @param[out] f: host pointer holding the final objective value
+ * @param[out] num_iters: host pointer holding the actual number of iterations taken
+ */
+void qnFitSparse(raft::handle_t& handle,
+                 std::vector<Matrix::Data<float>*>& input_values,
+                 int* input_cols,
+                 int* input_row_ids,
+                 int X_nnz,
+                 Matrix::PartDescriptor& input_desc,
+                 std::vector<Matrix::Data<float>*>& labels,
+                 float* coef,
+                 const qn_params& pams,
+                 int n_classes,
+                 float* f,
+                 int* num_iters);
+
 };  // namespace opg
 };  // namespace GLM
 };  // namespace ML
diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py
@@ -25,7 +25,7 @@
 
 import tomli
 
-EXPECTED_VERSION = "16.0.6"
+EXPECTED_VERSION = "15.0.7"
 VERSION_REGEX = re.compile(r"  LLVM version ([0-9.]+)")
 GPU_ARCH_REGEX = re.compile(r"sm_(\d+)")
 SPACES = re.compile(r"\s+")

diff --git a/cpp/src/glm/qn/mg/glm_base_mg.cuh b/cpp/src/glm/qn/mg/glm_base_mg.cuh
@@ -16,6 +16,7 @@
 
 #include <raft/core/comms.hpp>
 #include <raft/core/handle.hpp>
+#include <raft/linalg/add.cuh>
 #include <raft/linalg/multiply.cuh>
 #include <raft/util/cudart_utils.hpp>
 
@@ -112,34 +113,42 @@ struct GLMWithDataMG : ML::GLM::detail::GLMWithData<T, GLMObjective> {
                       T* dev_scalar,
                       cudaStream_t stream)
   {
+    raft::comms::comms_t const& communicator = raft::resource::get_comms(*(this->handle_p));
     SimpleDenseMat<T> W(wFlat.data, this->C, this->dims);
     SimpleDenseMat<T> G(gradFlat.data, this->C, this->dims);
     SimpleVec<T> lossVal(dev_scalar, 1);
 
+    // Ensure the same coefficients on all GPU
+    communicator.bcast(wFlat.data, this->C * this->dims, 0, stream);
+    communicator.sync_stream(stream);
+
     // apply regularization
     auto regularizer_obj = this->objective;
     auto lossFunc        = regularizer_obj->loss;
     auto reg             = regularizer_obj->reg;
     G.fill(0, stream);
-    float reg_host = 0;
+    T reg_host = 0;
     if (reg->l2_penalty != 0) {
       reg->reg_grad(dev_scalar, G, W, lossFunc->fit_intercept, stream);
       raft::update_host(&reg_host, dev_scalar, 1, stream);
-      // note: avoid syncing here because there's a sync before reg_host is used.
+      raft::resource::sync_stream(*(this->handle_p));
     }
 
     // apply linearFwd, getLossAndDz, linearBwd
     ML::GLM::detail::linearFwd(
       lossFunc->handle, *(this->Z), *(this->X), W);  // linear part: forward pass
 
-    raft::comms::comms_t const& communicator = raft::resource::get_comms(*(this->handle_p));
-
     lossFunc->getLossAndDZ(dev_scalar, *(this->Z), *(this->y), stream);  // loss specific part
 
     // normalize local loss before allreduce sum
     T factor = 1.0 * (*this->y).len / this->n_samples;
     raft::linalg::multiplyScalar(dev_scalar, dev_scalar, factor, 1, stream);
 
+    // GPUs calculates reg_host independently and may get values that show tiny divergence.
+    // Take the averaged reg_host to avoid the divergence.
+    T reg_factor = reg_host / this->n_ranks;
+    raft::linalg::addScalar(dev_scalar, dev_scalar, reg_factor, 1, stream);
+
     communicator.allreduce(dev_scalar, dev_scalar, 1, raft::comms::op_t::SUM, stream);
     communicator.sync_stream(stream);
 
@@ -154,11 +163,9 @@ struct GLMWithDataMG : ML::GLM::detail::GLMWithData<T, GLMObjective> {
     communicator.allreduce(G.data, G.data, this->C * this->dims, raft::comms::op_t::SUM, stream);
     communicator.sync_stream(stream);
 
-    float loss_host;
+    T loss_host;
     raft::update_host(&loss_host, dev_scalar, 1, stream);
     raft::resource::sync_stream(*(this->handle_p));
-    loss_host += reg_host;
-    lossVal.fill(loss_host + reg_host, stream);
 
     return loss_host;
   }

diff --git a/cpp/src/glm/qn/mg/qn_mg.cuh b/cpp/src/glm/qn/mg/qn_mg.cuh
@@ -101,7 +101,7 @@ inline void qn_fit_x_mg(const raft::handle_t& handle,
 
   switch (pams.loss) {
     case QN_LOSS_LOGISTIC: {
-      ASSERT(C == 2, "qn_mg.cuh: logistic loss invalid C");
+      ASSERT(C > 0, "qn_mg.cuh: logistic loss invalid C");
       ML::GLM::detail::LogisticLoss<T> loss(handle, D, pams.fit_intercept);
       ML::GLM::opg::qn_fit_mg<T, decltype(loss)>(
         handle, pams, loss, X, y, Z, w0_data, f, num_iters, n_samples, rank, n_ranks);