Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate positional arguments. #6365

Merged
merged 14 commits into from
Nov 13, 2020
57 changes: 56 additions & 1 deletion python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import sys
import json
import warnings
from functools import wraps
from inspect import signature, Parameter

import numpy as np
import scipy.sparse
Expand Down Expand Up @@ -369,6 +371,58 @@ def next(self, input_data):
raise NotImplementedError()


# Notice for `_deprecate_positional_args`
# Authors: Olivier Grisel
# Gael Varoquaux
# Andreas Mueller
# Lars Buitinck
# Alexandre Gramfort
# Nicolas Tresegnie
# Sylvain Marie
# License: BSD 3 clause
def _deprecate_positional_args(f):
"""Decorator for methods that issues warnings for positional arguments

Using the keyword-only argument syntax in pep 3102, arguments after the
* will issue a warning when passed as a positional argument.

Modifed from sklearn utils.validation.

Parameters
----------
f : function
function to check arguments on
"""
sig = signature(f)
kwonly_args = []
all_args = []

for name, param in sig.parameters.items():
if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
all_args.append(name)
elif param.kind == Parameter.KEYWORD_ONLY:
kwonly_args.append(name)

@wraps(f)
def inner_f(*args, **kwargs):
extra_args = len(args) - len(all_args)
if extra_args > 0:
# ignore first 'self' argument for instance methods
args_msg = [
'{}={}'.format(name, arg) for name, arg in zip(
kwonly_args[:extra_args], args[-extra_args:])
]
warnings.warn(
"Pass {} as keyword args. Passing these as positional "
"arguments will be considered as error in future releases.".
format(", ".join(args_msg)), FutureWarning)
for k, arg in zip(sig.parameters, args):
kwargs[k] = arg
return f(**kwargs)

return inner_f


class DMatrix: # pylint: disable=too-many-instance-attributes
"""Data Matrix used in XGBoost.

Expand Down Expand Up @@ -461,7 +515,8 @@ def __del__(self):
_check_call(_LIB.XGDMatrixFree(self.handle))
self.handle = None

def set_info(self,
@_deprecate_positional_args
def set_info(self, *,
label=None, weight=None, base_margin=None,
group=None,
label_lower_bound=None,
Expand Down
124 changes: 62 additions & 62 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import warnings
import json
import numpy as np
from .core import Booster, DMatrix, XGBoostError
from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
from .training import train
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array

Expand Down Expand Up @@ -248,6 +248,40 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100,
self.gpu_id = gpu_id
self.validate_parameters = validate_parameters

def _wrap_evaluation_matrices(self, X, y, group,
sample_weight, train_dmatrix: DMatrix,
eval_set, sample_weight_eval_set, eval_group,
label_transform=lambda x: x):
'''Convert array_like evaluation matrices into DMatrix'''
if eval_group is None and eval_set is not None:
eval_group = [None] * len(eval_set)
trivialfis marked this conversation as resolved.
Show resolved Hide resolved

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
else:
assert len(sample_weight_eval_set) == len(eval_set)

evals = []
for i, (valid_X, valid_y) in enumerate(eval_set):
if valid_X is X and valid_y is y and \
sample_weight_eval_set[i] is sample_weight and eval_group[i] is group:
evals.append(train_dmatrix)
else:
m = DMatrix(valid_X,
label=label_transform(valid_y),
missing=self.missing, weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
m.set_info(group=eval_group[i])
evals.append(m)

nevals = len(evals)
eval_names = ["validation_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()
return evals

def _more_tags(self):
'''Tags used for scikit-learn data validation.'''
return {'allow_nan': True, 'no_validation': True}
Expand Down Expand Up @@ -445,6 +479,7 @@ def load_model(self, fname):
# Delete the attribute after load
self.get_booster().set_attr(scikit_learn=None)

@_deprecate_positional_args
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
def fit(self, X, y, sample_weight=None, base_margin=None,
eval_set=None, eval_metric=None, early_stopping_rounds=None,
verbose=True, xgb_model=None, sample_weight_eval_set=None,
Expand Down Expand Up @@ -523,22 +558,10 @@ def fit(self, X, y, sample_weight=None, base_margin=None,

evals_result = {}

if eval_set is not None:
if not isinstance(eval_set[0], (list, tuple)):
raise TypeError('Unexpected input type for `eval_set`')
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
else:
assert len(eval_set) == len(sample_weight_eval_set)
evals = list(
DMatrix(eval_set[i][0], label=eval_set[i][1], missing=self.missing,
weight=sample_weight_eval_set[i], nthread=self.n_jobs)
for i in range(len(eval_set)))
evals = list(zip(evals, ["validation_{}".format(i) for i in
range(len(evals))]))
else:
evals = ()

evals = self._wrap_evaluation_matrices(
X, y, group=None, sample_weight=sample_weight, train_dmatrix=train_dmatrix,
eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set,
eval_group=None)
params = self.get_xgb_params()

if callable(self.objective):
Expand Down Expand Up @@ -774,10 +797,12 @@ def intercept_(self):
''')
class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
def __init__(self, objective="binary:logistic", use_label_encoder=True, **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective="binary:logistic", use_label_encoder=True, **kwargs):
self.use_label_encoder = use_label_encoder
super().__init__(objective=objective, **kwargs)

@_deprecate_positional_args
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
def fit(self, X, y, sample_weight=None, base_margin=None,
eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True, xgb_model=None,
Expand Down Expand Up @@ -851,24 +876,6 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
label_transform = (lambda x: x)
training_labels = label_transform(y)

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
else:
assert len(sample_weight_eval_set) == len(eval_set)
evals = list(
DMatrix(eval_set[i][0],
label=label_transform(eval_set[i][1]),
missing=self.missing, weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
for i in range(len(eval_set))
)
nevals = len(evals)
eval_names = ["validation_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()

if len(X.shape) != 2:
# Simply raise an error here since there might be many
# different ways of reshaping
Expand All @@ -882,6 +889,10 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
train_dmatrix.set_info(feature_weights=feature_weights)
evals = self._wrap_evaluation_matrices(
X, y, group=None, sample_weight=sample_weight, train_dmatrix=train_dmatrix,
eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set,
eval_group=None, label_transform=label_transform)

self._Booster = train(xgb_options, train_dmatrix,
self.get_num_boosting_rounds(),
Expand Down Expand Up @@ -1063,7 +1074,8 @@ def evals_result(self):
''')
class XGBRFClassifier(XGBClassifier):
# pylint: disable=missing-docstring
def __init__(self,
@_deprecate_positional_args
def __init__(self, *,
learning_rate=1,
subsample=0.8,
colsample_bynode=0.8,
Expand Down Expand Up @@ -1091,7 +1103,8 @@ def get_num_boosting_rounds(self):
['estimators', 'model', 'objective'])
class XGBRegressor(XGBModel, XGBRegressorBase):
# pylint: disable=missing-docstring
def __init__(self, objective="reg:squarederror", **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective="reg:squarederror", **kwargs):
super().__init__(objective=objective, **kwargs)


Expand All @@ -1103,7 +1116,8 @@ def __init__(self, objective="reg:squarederror", **kwargs):
''')
class XGBRFRegressor(XGBRegressor):
# pylint: disable=missing-docstring
def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
@_deprecate_positional_args
def __init__(self, *, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
reg_lambda=1e-5, **kwargs):
super().__init__(learning_rate=learning_rate, subsample=subsample,
colsample_bynode=colsample_bynode,
Expand Down Expand Up @@ -1159,14 +1173,16 @@ def get_num_boosting_rounds(self):
''')
class XGBRanker(XGBModel):
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
def __init__(self, objective='rank:pairwise', **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective='rank:pairwise', **kwargs):
super().__init__(objective=objective, **kwargs)
if callable(self.objective):
raise ValueError(
"custom objective function not supported by XGBRanker")
if "rank:" not in self.objective:
raise ValueError("please use XGBRanker for ranking task")

@_deprecate_positional_args
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
def fit(self, X, y, group, sample_weight=None, base_margin=None,
eval_set=None, sample_weight_eval_set=None,
eval_group=None, eval_metric=None,
Expand Down Expand Up @@ -1267,11 +1283,6 @@ def fit(self, X, y, group, sample_weight=None, base_margin=None,
raise ValueError(
"group is required for all eval datasets for ranking task")

def _dmat_init(group, **params):
ret = DMatrix(**params)
ret.set_group(group)
return ret

self.n_features_in_ = X.shape[1]

train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
Expand All @@ -1280,24 +1291,13 @@ def _dmat_init(group, **params):
train_dmatrix.set_info(feature_weights=feature_weights)
train_dmatrix.set_group(group)

evals_result = {}

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
evals = [_dmat_init(eval_group[i],
data=eval_set[i][0],
label=eval_set[i][1],
missing=self.missing,
weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
for i in range(len(eval_set))]
nevals = len(evals)
eval_names = ["eval_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()
evals = self._wrap_evaluation_matrices(
X, y, group=group, sample_weight=sample_weight, train_dmatrix=train_dmatrix,
eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set,
eval_group=eval_group)

evals_result = {}
params = self.get_xgb_params()

feval = eval_metric if callable(eval_metric) else None
Expand Down
21 changes: 13 additions & 8 deletions tests/python/test_with_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_binary_classification():
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold

digits = load_digits(2)
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
Expand Down Expand Up @@ -166,7 +166,7 @@ def test_stacking_classification():
def test_feature_importances_weight():
from sklearn.datasets import load_digits

digits = load_digits(2)
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
xgb_model = xgb.XGBClassifier(random_state=0,
Expand Down Expand Up @@ -204,7 +204,7 @@ def test_feature_importances_weight():
def test_feature_importances_gain():
from sklearn.datasets import load_digits

digits = load_digits(2)
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
xgb_model = xgb.XGBClassifier(
Expand Down Expand Up @@ -243,7 +243,7 @@ def test_feature_importances_gain():
def test_select_feature():
from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectFromModel
digits = load_digits(2)
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
cls = xgb.XGBClassifier()
Expand Down Expand Up @@ -376,7 +376,7 @@ def logregobj(y_true, y_pred):
hess = y_pred * (1.0 - y_pred)
return grad, hess

digits = load_digits(2)
digits = load_digits(n_class=2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
Expand Down Expand Up @@ -473,7 +473,7 @@ def test_sklearn_nfolds_cv():
from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold

digits = load_digits(3)
digits = load_digits(n_class=3)
X = digits['data']
y = digits['target']
dm = xgb.DMatrix(X, label=y)
Expand Down Expand Up @@ -505,7 +505,7 @@ def test_sklearn_nfolds_cv():
def test_split_value_histograms():
from sklearn.datasets import load_digits

digits_2class = load_digits(2)
digits_2class = load_digits(n_class=2)

X = digits_2class['data']
y = digits_2class['target']
Expand Down Expand Up @@ -591,7 +591,7 @@ def test_sklearn_clone():

def test_sklearn_get_default_params():
from sklearn.datasets import load_digits
digits_2class = load_digits(2)
digits_2class = load_digits(n_class=2)
X = digits_2class['data']
y = digits_2class['target']
cls = xgb.XGBClassifier()
Expand Down Expand Up @@ -889,6 +889,11 @@ def test_parameter_validation():
assert len(output) == 0


def test_deprecate_position_arg():
with pytest.warns(FutureWarning):
xgb.XGBRegressor(3, learning_rate=0.1)


@pytest.mark.skipif(**tm.no_pandas())
def test_pandas_input():
import pandas as pd
Expand Down