From ef877166a56946eca669d5eb8b4ea19eefc519c3 Mon Sep 17 00:00:00 2001 From: fis Date: Sun, 27 Sep 2020 23:46:06 +0800 Subject: [PATCH 01/14] Initial commit. --- python-package/xgboost/core.py | 39 +++++++++++++++++++++++++++++++ python-package/xgboost/sklearn.py | 5 +++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 03d70e0a4ece..2677e6f32395 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -12,6 +12,8 @@ import sys import json import warnings +from functools import wraps +from inspect import signature, Parameter import numpy as np import scipy.sparse @@ -369,6 +371,43 @@ def next(self, input_data): raise NotImplementedError() +def _deprecate_positional_args(f): + """Decorator for methods that issues warnings for positional arguments + + Modifed from sklearn utils.validation. + + Parameters + ---------- + f : function + function to check arguments on + """ + sig = signature(f) + kwonly_args = [] + all_args = [] + + for name, param in sig.parameters.items(): + if param.kind == Parameter.POSITIONAL_OR_KEYWORD: + all_args.append(name) + elif param.kind == Parameter.KEYWORD_ONLY: + kwonly_args.append(name) + + @wraps(f) + def inner_f(*args, **kwargs): + extra_args = len(args) - len(all_args) + if extra_args > 0: + # ignore first 'self' argument for instance methods + args_msg = ['{}={}'.format(name, arg) + for name, arg in zip(kwonly_args[:extra_args], + args[-extra_args:])] + warnings.warn("Pass {} as keyword args. From version 0.25 " + "passing these as positional arguments will " + "result in an error".format(", ".join(args_msg)), + FutureWarning) + kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) + return f(**kwargs) + return inner_f + + class DMatrix: # pylint: disable=too-many-instance-attributes """Data Matrix used in XGBoost. diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index b199ff2d67f5..083cffc73167 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -5,7 +5,7 @@ import warnings import json import numpy as np -from .core import Booster, DMatrix, XGBoostError +from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args from .training import train from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array @@ -445,6 +445,7 @@ def load_model(self, fname): # Delete the attribute after load self.get_booster().set_attr(scikit_learn=None) + @_deprecate_positional_args def fit(self, X, y, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, @@ -778,6 +779,7 @@ def __init__(self, objective="binary:logistic", use_label_encoder=True, **kwargs self.use_label_encoder = use_label_encoder super().__init__(objective=objective, **kwargs) + @_deprecate_positional_args def fit(self, X, y, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, @@ -1167,6 +1169,7 @@ def __init__(self, objective='rank:pairwise', **kwargs): if "rank:" not in self.objective: raise ValueError("please use XGBRanker for ranking task") + @_deprecate_positional_args def fit(self, X, y, group, sample_weight=None, base_margin=None, eval_set=None, sample_weight_eval_set=None, eval_group=None, eval_metric=None, From f8bb6743067ea6d30133bb2a17e0f6856921a100 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 14:26:37 +0800 Subject: [PATCH 02/14] Apply on init method. --- python-package/xgboost/sklearn.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 083cffc73167..dc9fbb1ee78f 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -775,6 +775,7 @@ def intercept_(self): ''') class XGBClassifier(XGBModel, XGBClassifierBase): # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes + @_deprecate_positional_args def __init__(self, objective="binary:logistic", use_label_encoder=True, **kwargs): self.use_label_encoder = use_label_encoder super().__init__(objective=objective, **kwargs) @@ -1065,6 +1066,7 @@ def evals_result(self): ''') class XGBRFClassifier(XGBClassifier): # pylint: disable=missing-docstring + @_deprecate_positional_args def __init__(self, learning_rate=1, subsample=0.8, @@ -1093,6 +1095,7 @@ def get_num_boosting_rounds(self): ['estimators', 'model', 'objective']) class XGBRegressor(XGBModel, XGBRegressorBase): # pylint: disable=missing-docstring + @_deprecate_positional_args def __init__(self, objective="reg:squarederror", **kwargs): super().__init__(objective=objective, **kwargs) @@ -1105,6 +1108,7 @@ def __init__(self, objective="reg:squarederror", **kwargs): ''') class XGBRFRegressor(XGBRegressor): # pylint: disable=missing-docstring + @_deprecate_positional_args def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8, reg_lambda=1e-5, **kwargs): super().__init__(learning_rate=learning_rate, subsample=subsample, @@ -1161,6 +1165,7 @@ def get_num_boosting_rounds(self): ''') class XGBRanker(XGBModel): # pylint: disable=missing-docstring,too-many-arguments,invalid-name + @_deprecate_positional_args def __init__(self, objective='rank:pairwise', **kwargs): super().__init__(objective=objective, **kwargs) if callable(self.objective): From b489c04e48d884899690a8c8b26f617e32a6f3b9 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 14:28:57 +0800 Subject: [PATCH 03/14] Better msg. --- python-package/xgboost/core.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 2677e6f32395..c8759bccc0c4 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -396,15 +396,17 @@ def inner_f(*args, **kwargs): extra_args = len(args) - len(all_args) if extra_args > 0: # ignore first 'self' argument for instance methods - args_msg = ['{}={}'.format(name, arg) - for name, arg in zip(kwonly_args[:extra_args], - args[-extra_args:])] - warnings.warn("Pass {} as keyword args. From version 0.25 " - "passing these as positional arguments will " - "result in an error".format(", ".join(args_msg)), - FutureWarning) + args_msg = [ + '{}={}'.format(name, arg) for name, arg in zip( + kwonly_args[:extra_args], args[-extra_args:]) + ] + warnings.warn( + "Pass {} as keyword args. Passing these as positional " + "arguments will be considered as error in uture releases.". + format(", ".join(args_msg)), FutureWarning) kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) return f(**kwargs) + return inner_f From 064140a09992b9b8b8cf55aa99d842e9c7bbadc6 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 14:30:09 +0800 Subject: [PATCH 04/14] Setinfo. --- python-package/xgboost/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index c8759bccc0c4..57dae95ed42b 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -502,6 +502,7 @@ def __del__(self): _check_call(_LIB.XGDMatrixFree(self.handle)) self.handle = None + @_deprecate_positional_args def set_info(self, label=None, weight=None, base_margin=None, group=None, From b802c0812820a294770fbc2b5a07c262934c866f Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 15:11:12 +0800 Subject: [PATCH 05/14] Keyword only. --- python-package/xgboost/core.py | 2 +- python-package/xgboost/sklearn.py | 10 +++++----- tests/python/test_with_sklearn.py | 5 +++++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 57dae95ed42b..3d3ec0955c69 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -503,7 +503,7 @@ def __del__(self): self.handle = None @_deprecate_positional_args - def set_info(self, + def set_info(self, *, label=None, weight=None, base_margin=None, group=None, label_lower_bound=None, diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index dc9fbb1ee78f..3cca98b3adcb 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -776,7 +776,7 @@ def intercept_(self): class XGBClassifier(XGBModel, XGBClassifierBase): # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes @_deprecate_positional_args - def __init__(self, objective="binary:logistic", use_label_encoder=True, **kwargs): + def __init__(self, *, objective="binary:logistic", use_label_encoder=True, **kwargs): self.use_label_encoder = use_label_encoder super().__init__(objective=objective, **kwargs) @@ -1067,7 +1067,7 @@ def evals_result(self): class XGBRFClassifier(XGBClassifier): # pylint: disable=missing-docstring @_deprecate_positional_args - def __init__(self, + def __init__(self, *, learning_rate=1, subsample=0.8, colsample_bynode=0.8, @@ -1096,7 +1096,7 @@ def get_num_boosting_rounds(self): class XGBRegressor(XGBModel, XGBRegressorBase): # pylint: disable=missing-docstring @_deprecate_positional_args - def __init__(self, objective="reg:squarederror", **kwargs): + def __init__(self, *, objective="reg:squarederror", **kwargs): super().__init__(objective=objective, **kwargs) @@ -1109,7 +1109,7 @@ def __init__(self, objective="reg:squarederror", **kwargs): class XGBRFRegressor(XGBRegressor): # pylint: disable=missing-docstring @_deprecate_positional_args - def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8, + def __init__(self, *, learning_rate=1, subsample=0.8, colsample_bynode=0.8, reg_lambda=1e-5, **kwargs): super().__init__(learning_rate=learning_rate, subsample=subsample, colsample_bynode=colsample_bynode, @@ -1166,7 +1166,7 @@ def get_num_boosting_rounds(self): class XGBRanker(XGBModel): # pylint: disable=missing-docstring,too-many-arguments,invalid-name @_deprecate_positional_args - def __init__(self, objective='rank:pairwise', **kwargs): + def __init__(self, *, objective='rank:pairwise', **kwargs): super().__init__(objective=objective, **kwargs) if callable(self.objective): raise ValueError( diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 7ffcb67b8216..e0a04dcfde48 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -889,6 +889,11 @@ def test_parameter_validation(): assert len(output) == 0 +def test_deprecate_position_arg(): + with pytest.warns(FutureWarning): + xgb.XGBRegressor(3, learning_rate=0.1) + + @pytest.mark.skipif(**tm.no_pandas()) def test_pandas_input(): import pandas as pd From 362e14916c1b8a467398552aea97b11ba520a4db Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 15:18:00 +0800 Subject: [PATCH 06/14] Remove skl warnings. --- tests/python/test_with_sklearn.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index e0a04dcfde48..3343aeb65467 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -32,7 +32,7 @@ def test_binary_classification(): from sklearn.datasets import load_digits from sklearn.model_selection import KFold - digits = load_digits(2) + digits = load_digits(n_class=2) y = digits['target'] X = digits['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) @@ -166,7 +166,7 @@ def test_stacking_classification(): def test_feature_importances_weight(): from sklearn.datasets import load_digits - digits = load_digits(2) + digits = load_digits(n_class=2) y = digits['target'] X = digits['data'] xgb_model = xgb.XGBClassifier(random_state=0, @@ -204,7 +204,7 @@ def test_feature_importances_weight(): def test_feature_importances_gain(): from sklearn.datasets import load_digits - digits = load_digits(2) + digits = load_digits(n_class=2) y = digits['target'] X = digits['data'] xgb_model = xgb.XGBClassifier( @@ -243,7 +243,7 @@ def test_feature_importances_gain(): def test_select_feature(): from sklearn.datasets import load_digits from sklearn.feature_selection import SelectFromModel - digits = load_digits(2) + digits = load_digits(n_class=2) y = digits['target'] X = digits['data'] cls = xgb.XGBClassifier() @@ -376,7 +376,7 @@ def logregobj(y_true, y_pred): hess = y_pred * (1.0 - y_pred) return grad, hess - digits = load_digits(2) + digits = load_digits(n_class=2) y = digits['target'] X = digits['data'] kf = KFold(n_splits=2, shuffle=True, random_state=rng) @@ -473,7 +473,7 @@ def test_sklearn_nfolds_cv(): from sklearn.datasets import load_digits from sklearn.model_selection import StratifiedKFold - digits = load_digits(3) + digits = load_digits(n_class=3) X = digits['data'] y = digits['target'] dm = xgb.DMatrix(X, label=y) @@ -505,7 +505,7 @@ def test_sklearn_nfolds_cv(): def test_split_value_histograms(): from sklearn.datasets import load_digits - digits_2class = load_digits(2) + digits_2class = load_digits(n_class=2) X = digits_2class['data'] y = digits_2class['target'] @@ -591,7 +591,7 @@ def test_sklearn_clone(): def test_sklearn_get_default_params(): from sklearn.datasets import load_digits - digits_2class = load_digits(2) + digits_2class = load_digits(n_class=2) X = digits_2class['data'] y = digits_2class['target'] cls = xgb.XGBClassifier() From 189a78975d6a44edc2625959407096dee5107695 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 15:50:07 +0800 Subject: [PATCH 07/14] Deduplicate evaluation set configuration. --- python-package/xgboost/sklearn.py | 103 ++++++++++++++---------------- 1 file changed, 47 insertions(+), 56 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 3cca98b3adcb..157044a91dcf 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -248,6 +248,39 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100, self.gpu_id = gpu_id self.validate_parameters = validate_parameters + def _wrap_evaluation_matrices(self, X, y, group, + sample_weight, train_dmatrix: DMatrix, + eval_set, sample_weight_eval_set, eval_group, + label_transform=lambda x: x): + '''Convert array_like evaluation matrices into DMatrix''' + if eval_group is None and eval_set is not None: + eval_group = [None] * len(eval_set) + + if eval_set is not None: + if sample_weight_eval_set is None: + sample_weight_eval_set = [None] * len(eval_set) + else: + assert len(sample_weight_eval_set) == len(eval_set) + + evals = [] + for i in range(len(eval_set)): + if eval_set[i][0] is X and eval_set[i][1] is y and \ + sample_weight_eval_set[i] is sample_weight and eval_group[i] is group: + evals.append(train_dmatrix) + else: + m = DMatrix(eval_set[i][0], + label=label_transform(eval_set[i][1]), + missing=self.missing, weight=sample_weight_eval_set[i], + nthread=self.n_jobs) + m.set_info(group=eval_group[i]) + evals.append(m) + nevals = len(evals) + eval_names = ["validation_{}".format(i) for i in range(nevals)] + evals = list(zip(evals, eval_names)) + else: + evals = () + return evals + def _more_tags(self): '''Tags used for scikit-learn data validation.''' return {'allow_nan': True, 'no_validation': True} @@ -524,22 +557,10 @@ def fit(self, X, y, sample_weight=None, base_margin=None, evals_result = {} - if eval_set is not None: - if not isinstance(eval_set[0], (list, tuple)): - raise TypeError('Unexpected input type for `eval_set`') - if sample_weight_eval_set is None: - sample_weight_eval_set = [None] * len(eval_set) - else: - assert len(eval_set) == len(sample_weight_eval_set) - evals = list( - DMatrix(eval_set[i][0], label=eval_set[i][1], missing=self.missing, - weight=sample_weight_eval_set[i], nthread=self.n_jobs) - for i in range(len(eval_set))) - evals = list(zip(evals, ["validation_{}".format(i) for i in - range(len(evals))])) - else: - evals = () - + evals = self._wrap_evaluation_matrices( + X, y, group=None, sample_weight=sample_weight, train_dmatrix=train_dmatrix, + eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, + eval_group=None) params = self.get_xgb_params() if callable(self.objective): @@ -854,24 +875,6 @@ def fit(self, X, y, sample_weight=None, base_margin=None, label_transform = (lambda x: x) training_labels = label_transform(y) - if eval_set is not None: - if sample_weight_eval_set is None: - sample_weight_eval_set = [None] * len(eval_set) - else: - assert len(sample_weight_eval_set) == len(eval_set) - evals = list( - DMatrix(eval_set[i][0], - label=label_transform(eval_set[i][1]), - missing=self.missing, weight=sample_weight_eval_set[i], - nthread=self.n_jobs) - for i in range(len(eval_set)) - ) - nevals = len(evals) - eval_names = ["validation_{}".format(i) for i in range(nevals)] - evals = list(zip(evals, eval_names)) - else: - evals = () - if len(X.shape) != 2: # Simply raise an error here since there might be many # different ways of reshaping @@ -885,6 +888,10 @@ def fit(self, X, y, sample_weight=None, base_margin=None, base_margin=base_margin, missing=self.missing, nthread=self.n_jobs) train_dmatrix.set_info(feature_weights=feature_weights) + evals = self._wrap_evaluation_matrices( + X, y, group=None, sample_weight=sample_weight, train_dmatrix=train_dmatrix, + eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, + eval_group=None, label_transform=label_transform) self._Booster = train(xgb_options, train_dmatrix, self.get_num_boosting_rounds(), @@ -1275,11 +1282,6 @@ def fit(self, X, y, group, sample_weight=None, base_margin=None, raise ValueError( "group is required for all eval datasets for ranking task") - def _dmat_init(group, **params): - ret = DMatrix(**params) - ret.set_group(group) - return ret - self.n_features_in_ = X.shape[1] train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight, @@ -1288,24 +1290,13 @@ def _dmat_init(group, **params): train_dmatrix.set_info(feature_weights=feature_weights) train_dmatrix.set_group(group) - evals_result = {} - - if eval_set is not None: - if sample_weight_eval_set is None: - sample_weight_eval_set = [None] * len(eval_set) - evals = [_dmat_init(eval_group[i], - data=eval_set[i][0], - label=eval_set[i][1], - missing=self.missing, - weight=sample_weight_eval_set[i], - nthread=self.n_jobs) - for i in range(len(eval_set))] - nevals = len(evals) - eval_names = ["eval_{}".format(i) for i in range(nevals)] - evals = list(zip(evals, eval_names)) - else: - evals = () + evals = self._wrap_evaluation_matrices( + X, y, group=group, sample_weight=sample_weight, train_dmatrix=train_dmatrix, + eval_set=eval_set, + sample_weight_eval_set=sample_weight_eval_set, + eval_group=eval_group) + evals_result = {} params = self.get_xgb_params() feval = eval_metric if callable(eval_metric) else None From 372d2e44dfe4f46cbccfa35282aae4b4c568d5ab Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 16:02:20 +0800 Subject: [PATCH 08/14] Lint. --- python-package/xgboost/core.py | 3 ++- python-package/xgboost/sklearn.py | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 3d3ec0955c69..d819e3107dd2 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -404,7 +404,8 @@ def inner_f(*args, **kwargs): "Pass {} as keyword args. Passing these as positional " "arguments will be considered as error in uture releases.". format(", ".join(args_msg)), FutureWarning) - kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) + for k, arg in zip(sig.parameters, args): + kwargs[k] = arg return f(**kwargs) return inner_f diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 157044a91dcf..1090f557f2ea 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -263,17 +263,18 @@ def _wrap_evaluation_matrices(self, X, y, group, assert len(sample_weight_eval_set) == len(eval_set) evals = [] - for i in range(len(eval_set)): - if eval_set[i][0] is X and eval_set[i][1] is y and \ + for i, (valid_X, valid_y) in enumerate(eval_set): + if valid_X is X and valid_y is y and \ sample_weight_eval_set[i] is sample_weight and eval_group[i] is group: evals.append(train_dmatrix) else: - m = DMatrix(eval_set[i][0], - label=label_transform(eval_set[i][1]), + m = DMatrix(valid_X, + label=label_transform(valid_y), missing=self.missing, weight=sample_weight_eval_set[i], nthread=self.n_jobs) m.set_info(group=eval_group[i]) evals.append(m) + nevals = len(evals) eval_names = ["validation_{}".format(i) for i in range(nevals)] evals = list(zip(evals, eval_names)) From 49bdb89dcd8b4527c73297abfabcb05475d79675 Mon Sep 17 00:00:00 2001 From: fis Date: Tue, 10 Nov 2020 16:19:36 +0800 Subject: [PATCH 09/14] doc. --- python-package/xgboost/core.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index d819e3107dd2..89e4220680b2 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -371,9 +371,21 @@ def next(self, input_data): raise NotImplementedError() +# Notice for `_deprecate_positional_args` +# Authors: Olivier Grisel +# Gael Varoquaux +# Andreas Mueller +# Lars Buitinck +# Alexandre Gramfort +# Nicolas Tresegnie +# Sylvain Marie +# License: BSD 3 clause def _deprecate_positional_args(f): """Decorator for methods that issues warnings for positional arguments + Using the keyword-only argument syntax in pep 3102, arguments after the + * will issue a warning when passed as a positional argument. + Modifed from sklearn utils.validation. Parameters @@ -402,7 +414,7 @@ def inner_f(*args, **kwargs): ] warnings.warn( "Pass {} as keyword args. Passing these as positional " - "arguments will be considered as error in uture releases.". + "arguments will be considered as error in future releases.". format(", ".join(args_msg)), FutureWarning) for k, arg in zip(sig.parameters, args): kwargs[k] = arg From 2aa695849121da400618abcb6896ef26c9469928 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 12 Nov 2020 13:46:45 +0800 Subject: [PATCH 10/14] Fix `fit`. --- python-package/xgboost/sklearn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 1090f557f2ea..3000973d7d2c 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -480,7 +480,7 @@ def load_model(self, fname): self.get_booster().set_attr(scikit_learn=None) @_deprecate_positional_args - def fit(self, X, y, sample_weight=None, base_margin=None, + def fit(self, *, X, y, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, feature_weights=None, @@ -803,7 +803,7 @@ def __init__(self, *, objective="binary:logistic", use_label_encoder=True, **kwa super().__init__(objective=objective, **kwargs) @_deprecate_positional_args - def fit(self, X, y, sample_weight=None, base_margin=None, + def fit(self, *, X, y, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, feature_weights=None, callbacks=None): @@ -1183,7 +1183,7 @@ def __init__(self, *, objective='rank:pairwise', **kwargs): raise ValueError("please use XGBRanker for ranking task") @_deprecate_positional_args - def fit(self, X, y, group, sample_weight=None, base_margin=None, + def fit(self, *, X, y, group, sample_weight=None, base_margin=None, eval_set=None, sample_weight_eval_set=None, eval_group=None, eval_metric=None, early_stopping_rounds=None, verbose=False, xgb_model=None, From ae048ecfa587eae686a7b7dea15f3f9a518f6e29 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 12 Nov 2020 13:47:53 +0800 Subject: [PATCH 11/14] Assert. --- python-package/xgboost/sklearn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 3000973d7d2c..661dd9b400dd 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -255,6 +255,8 @@ def _wrap_evaluation_matrices(self, X, y, group, '''Convert array_like evaluation matrices into DMatrix''' if eval_group is None and eval_set is not None: eval_group = [None] * len(eval_set) + else: + assert len(eval_group) == len(eval_set) if eval_set is not None: if sample_weight_eval_set is None: From 82216aa69d91d39ea7cb6c7ac0504bc077730690 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 12 Nov 2020 14:13:35 +0800 Subject: [PATCH 12/14] Tests. --- python-package/xgboost/sklearn.py | 59 +++++++++++++++---------------- tests/python/test_with_sklearn.py | 31 ++++++++++++++++ 2 files changed, 60 insertions(+), 30 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 661dd9b400dd..30d0afdad2fb 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -249,23 +249,32 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100, self.validate_parameters = validate_parameters def _wrap_evaluation_matrices(self, X, y, group, - sample_weight, train_dmatrix: DMatrix, + sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, eval_group, label_transform=lambda x: x): '''Convert array_like evaluation matrices into DMatrix''' - if eval_group is None and eval_set is not None: - eval_group = [None] * len(eval_set) - else: + if sample_weight_eval_set is not None: + assert eval_set is not None + assert len(sample_weight_eval_set) == len(eval_set) + if eval_group is not None: + assert eval_set is not None assert len(eval_group) == len(eval_set) + y = label_transform(y) + train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight, + base_margin=base_margin, + missing=self.missing, nthread=self.n_jobs) + train_dmatrix.set_info(feature_weights=feature_weights, group=group) + if eval_set is not None: if sample_weight_eval_set is None: sample_weight_eval_set = [None] * len(eval_set) - else: - assert len(sample_weight_eval_set) == len(eval_set) + if eval_group is None: + eval_group = [None] * len(eval_set) evals = [] for i, (valid_X, valid_y) in enumerate(eval_set): + # Skip the duplicated entry. if valid_X is X and valid_y is y and \ sample_weight_eval_set[i] is sample_weight and eval_group[i] is group: evals.append(train_dmatrix) @@ -282,7 +291,7 @@ def _wrap_evaluation_matrices(self, X, y, group, evals = list(zip(evals, eval_names)) else: evals = () - return evals + return train_dmatrix, evals def _more_tags(self): '''Tags used for scikit-learn data validation.''' @@ -482,7 +491,7 @@ def load_model(self, fname): self.get_booster().set_attr(scikit_learn=None) @_deprecate_positional_args - def fit(self, *, X, y, sample_weight=None, base_margin=None, + def fit(self, X, y, *, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, feature_weights=None, @@ -560,10 +569,10 @@ def fit(self, *, X, y, sample_weight=None, base_margin=None, evals_result = {} - evals = self._wrap_evaluation_matrices( - X, y, group=None, sample_weight=sample_weight, train_dmatrix=train_dmatrix, - eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, - eval_group=None) + train_dmatrix, evals = self._wrap_evaluation_matrices( + X, y, group=None, sample_weight=sample_weight, base_margin=base_margin, + feature_weights=feature_weights, eval_set=eval_set, + sample_weight_eval_set=sample_weight_eval_set, eval_group=None) params = self.get_xgb_params() if callable(self.objective): @@ -805,7 +814,7 @@ def __init__(self, *, objective="binary:logistic", use_label_encoder=True, **kwa super().__init__(objective=objective, **kwargs) @_deprecate_positional_args - def fit(self, *, X, y, sample_weight=None, base_margin=None, + def fit(self, X, y, *, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, feature_weights=None, callbacks=None): @@ -876,7 +885,6 @@ def fit(self, *, X, y, sample_weight=None, base_margin=None, label_transform = self._le.transform else: label_transform = (lambda x: x) - training_labels = label_transform(y) if len(X.shape) != 2: # Simply raise an error here since there might be many @@ -887,12 +895,9 @@ def fit(self, *, X, y, sample_weight=None, base_margin=None, self._features_count = X.shape[1] self.n_features_in_ = self._features_count - train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight, - base_margin=base_margin, - missing=self.missing, nthread=self.n_jobs) - train_dmatrix.set_info(feature_weights=feature_weights) - evals = self._wrap_evaluation_matrices( - X, y, group=None, sample_weight=sample_weight, train_dmatrix=train_dmatrix, + train_dmatrix, evals = self._wrap_evaluation_matrices( + X, y, group=None, sample_weight=sample_weight, base_margin=base_margin, + feature_weights=feature_weights, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, eval_group=None, label_transform=label_transform) @@ -1185,7 +1190,7 @@ def __init__(self, *, objective='rank:pairwise', **kwargs): raise ValueError("please use XGBRanker for ranking task") @_deprecate_positional_args - def fit(self, *, X, y, group, sample_weight=None, base_margin=None, + def fit(self, X, y, *, group, sample_weight=None, base_margin=None, eval_set=None, sample_weight_eval_set=None, eval_group=None, eval_metric=None, early_stopping_rounds=None, verbose=False, xgb_model=None, @@ -1287,15 +1292,9 @@ def fit(self, *, X, y, group, sample_weight=None, base_margin=None, self.n_features_in_ = X.shape[1] - train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight, - base_margin=base_margin, - missing=self.missing, nthread=self.n_jobs) - train_dmatrix.set_info(feature_weights=feature_weights) - train_dmatrix.set_group(group) - - evals = self._wrap_evaluation_matrices( - X, y, group=group, sample_weight=sample_weight, train_dmatrix=train_dmatrix, - eval_set=eval_set, + train_dmatrix, evals = self._wrap_evaluation_matrices( + X, y, group=group, sample_weight=sample_weight, base_margin=base_margin, + feature_weights=feature_weights, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, eval_group=eval_group) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 3343aeb65467..80378ec966f9 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -890,8 +890,39 @@ def test_parameter_validation(): def test_deprecate_position_arg(): + from sklearn.datasets import load_digits + X, y = load_digits(return_X_y=True, n_class=2) + w = y with pytest.warns(FutureWarning): xgb.XGBRegressor(3, learning_rate=0.1) + model = xgb.XGBRegressor(n_estimators=1) + with pytest.warns(FutureWarning): + model.fit(X, y, w) + + with pytest.warns(FutureWarning): + xgb.XGBClassifier(1, use_label_encoder=False) + model = xgb.XGBClassifier(n_estimators=1, use_label_encoder=False) + with pytest.warns(FutureWarning): + model.fit(X, y, w) + + with pytest.warns(FutureWarning): + xgb.XGBRanker('rank:ndcg', learning_rate=0.1) + model = xgb.XGBRanker(n_estimators=1) + group = np.repeat(1, X.shape[0]) + with pytest.warns(FutureWarning): + model.fit(X, y, group) + + with pytest.warns(FutureWarning): + xgb.XGBRFRegressor(1, learning_rate=0.1) + model = xgb.XGBRFRegressor(n_estimators=1) + with pytest.warns(FutureWarning): + model.fit(X, y, w) + + with pytest.warns(FutureWarning): + xgb.XGBRFClassifier(1, use_label_encoder=True) + model = xgb.XGBRFClassifier(n_estimators=1) + with pytest.warns(FutureWarning): + model.fit(X, y, w) @pytest.mark.skipif(**tm.no_pandas()) From d06c104cd5ca22a338e4ee0c1031355615d7be2f Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 12 Nov 2020 14:33:57 +0800 Subject: [PATCH 13/14] On dask interface. --- python-package/xgboost/dask.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index ba5316800d14..045cb885152e 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -31,6 +31,7 @@ from .compat import lazy_isinstance from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter +from .core import _deprecate_positional_args from .training import train as worker_train from .tracker import RabitTracker from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase @@ -1015,7 +1016,8 @@ class DaskScikitLearnBase(XGBModel): _client = None # pylint: disable=arguments-differ - def fit(self, X, y, + @_deprecate_positional_args + def fit(self, X, y, *, sample_weight=None, base_margin=None, eval_set=None, @@ -1039,6 +1041,8 @@ def fit(self, X, y, sample_weight_eval_set : list, optional A list of the form [L_1, L_2, ..., L_n], where each L_i is a list of group weights on the i-th validation set. + early_stopping_rounds : int + Activates early stopping. verbose : bool If `verbose` and an evaluation set is used, writes the evaluation metric measured on the validation set to stderr.''' @@ -1101,9 +1105,11 @@ async def _fit_async(self, X, y, sample_weight, base_margin, eval_set, return self # pylint: disable=missing-docstring + @_deprecate_positional_args def fit(self, X, y, + *, sample_weight=None, base_margin=None, eval_set=None, @@ -1183,9 +1189,11 @@ async def _fit_async(self, X, y, sample_weight, base_margin, eval_set, self.evals_result_ = results['history'] return self + @_deprecate_positional_args def fit(self, X, y, + *, sample_weight=None, base_margin=None, eval_set=None, From f84037800b2a30da69954f6847dbe325894aeaa9 Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 12 Nov 2020 15:10:39 +0800 Subject: [PATCH 14/14] Don't print the argument. --- python-package/xgboost/core.py | 4 ++-- tests/python/test_with_sklearn.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 89e4220680b2..17804715c29d 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -409,11 +409,11 @@ def inner_f(*args, **kwargs): if extra_args > 0: # ignore first 'self' argument for instance methods args_msg = [ - '{}={}'.format(name, arg) for name, arg in zip( + '{}'.format(name) for name, _ in zip( kwonly_args[:extra_args], args[-extra_args:]) ] warnings.warn( - "Pass {} as keyword args. Passing these as positional " + "Pass `{}` as keyword args. Passing these as positional " "arguments will be considered as error in future releases.". format(", ".join(args_msg)), FutureWarning) for k, arg in zip(sig.parameters, args): diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 80378ec966f9..de85a12f7361 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -96,7 +96,7 @@ def test_ranking(): 'learning_rate': 0.1, 'gamma': 1.0, 'min_child_weight': 0.1, 'max_depth': 6, 'n_estimators': 4} model = xgb.sklearn.XGBRanker(**params) - model.fit(x_train, y_train, train_group, + model.fit(x_train, y_train, group=train_group, eval_set=[(x_valid, y_valid)], eval_group=[valid_group]) pred = model.predict(x_test)