From 00ce1d0b91409b2a9136302df1511d28ad185447 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 3 Jul 2023 01:42:12 +0800 Subject: [PATCH 1/2] [skl] Enable cat feature support without specifying tree method. --- python-package/xgboost/sklearn.py | 3 +-- tests/python/test_with_sklearn.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 4cc8a174cd7a..3ae791a7ba54 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -935,8 +935,7 @@ def _duplicated(parameter: str) -> None: callbacks = self.callbacks if self.callbacks is not None else callbacks tree_method = params.get("tree_method", None) - cat_support = {"gpu_hist", "approx", "hist"} - if self.enable_categorical and tree_method not in cat_support: + if self.enable_categorical and tree_method == "exact": raise ValueError( "Experimental support for categorical data is not implemented for" " current tree method yet." diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 26d18493cc7c..45281a2bfb23 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1390,7 +1390,6 @@ def test_categorical(): X, y = tm.make_categorical(n_samples=32, n_features=2, n_categories=3, onehot=False) ft = ["c"] * X.shape[1] reg = xgb.XGBRegressor( - tree_method="hist", feature_types=ft, max_cat_to_onehot=1, enable_categorical=True, @@ -1409,7 +1408,7 @@ def test_categorical(): onehot, y = tm.make_categorical( n_samples=32, n_features=2, n_categories=3, onehot=True ) - reg = xgb.XGBRegressor(tree_method="hist") + reg = xgb.XGBRegressor() reg.fit(onehot, y, eval_set=[(onehot, y)]) from_enc = reg.evals_result()["validation_0"]["rmse"] predt_enc = reg.predict(onehot) From bc4772b67cac344f78a392b7d57b7995901196e2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 3 Jul 2023 02:24:56 +0800 Subject: [PATCH 2/2] Dask test. --- tests/test_distributed/test_with_dask/test_with_dask.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_distributed/test_with_dask/test_with_dask.py b/tests/test_distributed/test_with_dask/test_with_dask.py index d6075481f9af..cab4188a831c 100644 --- a/tests/test_distributed/test_with_dask/test_with_dask.py +++ b/tests/test_distributed/test_with_dask/test_with_dask.py @@ -308,7 +308,7 @@ def test_dask_sparse(client: "Client") -> None: def run_categorical(client: "Client", tree_method: str, X, X_onehot, y) -> None: - parameters = {"tree_method": tree_method, "max_cat_to_onehot": 9999} # force onehot + parameters = {"tree_method": tree_method, "max_cat_to_onehot": 9999} # force onehot rounds = 10 m = xgb.dask.DaskDMatrix(client, X_onehot, y, enable_categorical=True) by_etl_results = xgb.dask.train( @@ -364,9 +364,9 @@ def check_model_output(model: xgb.dask.Booster) -> None: check_model_output(reg.get_booster()) reg = xgb.dask.DaskXGBRegressor( - enable_categorical=True, n_estimators=10 + enable_categorical=True, n_estimators=10, tree_method="exact" ) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="categorical data"): reg.fit(X, y) # check partition based reg = xgb.dask.DaskXGBRegressor(