Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate positional arguments. #6365

Merged
merged 14 commits into from
Nov 13, 2020
57 changes: 56 additions & 1 deletion python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import sys
import json
import warnings
from functools import wraps
from inspect import signature, Parameter

import numpy as np
import scipy.sparse
Expand Down Expand Up @@ -369,6 +371,58 @@ def next(self, input_data):
raise NotImplementedError()


# Notice for `_deprecate_positional_args`
# Authors: Olivier Grisel
# Gael Varoquaux
# Andreas Mueller
# Lars Buitinck
# Alexandre Gramfort
# Nicolas Tresegnie
# Sylvain Marie
# License: BSD 3 clause
def _deprecate_positional_args(f):
"""Decorator for methods that issues warnings for positional arguments

Using the keyword-only argument syntax in pep 3102, arguments after the
* will issue a warning when passed as a positional argument.

Modifed from sklearn utils.validation.

Parameters
----------
f : function
function to check arguments on
"""
sig = signature(f)
kwonly_args = []
all_args = []

for name, param in sig.parameters.items():
if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
all_args.append(name)
elif param.kind == Parameter.KEYWORD_ONLY:
kwonly_args.append(name)

@wraps(f)
def inner_f(*args, **kwargs):
extra_args = len(args) - len(all_args)
if extra_args > 0:
# ignore first 'self' argument for instance methods
args_msg = [
'{}'.format(name) for name, _ in zip(
kwonly_args[:extra_args], args[-extra_args:])
]
warnings.warn(
"Pass `{}` as keyword args. Passing these as positional "
"arguments will be considered as error in future releases.".
format(", ".join(args_msg)), FutureWarning)
for k, arg in zip(sig.parameters, args):
kwargs[k] = arg
return f(**kwargs)

return inner_f


class DMatrix: # pylint: disable=too-many-instance-attributes
"""Data Matrix used in XGBoost.

Expand Down Expand Up @@ -461,7 +515,8 @@ def __del__(self):
_check_call(_LIB.XGDMatrixFree(self.handle))
self.handle = None

def set_info(self,
@_deprecate_positional_args
def set_info(self, *,
label=None, weight=None, base_margin=None,
group=None,
label_lower_bound=None,
Expand Down
10 changes: 9 additions & 1 deletion python-package/xgboost/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .compat import lazy_isinstance

from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter
from .core import _deprecate_positional_args
from .training import train as worker_train
from .tracker import RabitTracker
from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase
Expand Down Expand Up @@ -1015,7 +1016,8 @@ class DaskScikitLearnBase(XGBModel):
_client = None

# pylint: disable=arguments-differ
def fit(self, X, y,
@_deprecate_positional_args
def fit(self, X, y, *,
sample_weight=None,
base_margin=None,
eval_set=None,
Expand All @@ -1039,6 +1041,8 @@ def fit(self, X, y,
sample_weight_eval_set : list, optional
A list of the form [L_1, L_2, ..., L_n], where each L_i is a list
of group weights on the i-th validation set.
early_stopping_rounds : int
Activates early stopping.
verbose : bool
If `verbose` and an evaluation set is used, writes the evaluation
metric measured on the validation set to stderr.'''
Expand Down Expand Up @@ -1101,9 +1105,11 @@ async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
return self

# pylint: disable=missing-docstring
@_deprecate_positional_args
def fit(self,
X,
y,
*,
sample_weight=None,
base_margin=None,
eval_set=None,
Expand Down Expand Up @@ -1183,9 +1189,11 @@ async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
self.evals_result_ = results['history']
return self

@_deprecate_positional_args
def fit(self,
X,
y,
*,
sample_weight=None,
base_margin=None,
eval_set=None,
Expand Down
151 changes: 76 additions & 75 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import warnings
import json
import numpy as np
from .core import Booster, DMatrix, XGBoostError
from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
from .training import train
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array

Expand Down Expand Up @@ -248,6 +248,51 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100,
self.gpu_id = gpu_id
self.validate_parameters = validate_parameters

def _wrap_evaluation_matrices(self, X, y, group,
sample_weight, base_margin, feature_weights,
eval_set, sample_weight_eval_set, eval_group,
label_transform=lambda x: x):
'''Convert array_like evaluation matrices into DMatrix'''
if sample_weight_eval_set is not None:
assert eval_set is not None
assert len(sample_weight_eval_set) == len(eval_set)
if eval_group is not None:
assert eval_set is not None
assert len(eval_group) == len(eval_set)

y = label_transform(y)
train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
train_dmatrix.set_info(feature_weights=feature_weights, group=group)

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
if eval_group is None:
eval_group = [None] * len(eval_set)

evals = []
for i, (valid_X, valid_y) in enumerate(eval_set):
# Skip the duplicated entry.
if valid_X is X and valid_y is y and \
sample_weight_eval_set[i] is sample_weight and eval_group[i] is group:
evals.append(train_dmatrix)
else:
m = DMatrix(valid_X,
label=label_transform(valid_y),
missing=self.missing, weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
m.set_info(group=eval_group[i])
evals.append(m)

nevals = len(evals)
eval_names = ["validation_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()
return train_dmatrix, evals

def _more_tags(self):
'''Tags used for scikit-learn data validation.'''
return {'allow_nan': True, 'no_validation': True}
Expand Down Expand Up @@ -445,7 +490,8 @@ def load_model(self, fname):
# Delete the attribute after load
self.get_booster().set_attr(scikit_learn=None)

def fit(self, X, y, sample_weight=None, base_margin=None,
@_deprecate_positional_args
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
def fit(self, X, y, *, sample_weight=None, base_margin=None,
eval_set=None, eval_metric=None, early_stopping_rounds=None,
verbose=True, xgb_model=None, sample_weight_eval_set=None,
feature_weights=None,
Expand Down Expand Up @@ -523,22 +569,10 @@ def fit(self, X, y, sample_weight=None, base_margin=None,

evals_result = {}

if eval_set is not None:
if not isinstance(eval_set[0], (list, tuple)):
raise TypeError('Unexpected input type for `eval_set`')
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
else:
assert len(eval_set) == len(sample_weight_eval_set)
evals = list(
DMatrix(eval_set[i][0], label=eval_set[i][1], missing=self.missing,
weight=sample_weight_eval_set[i], nthread=self.n_jobs)
for i in range(len(eval_set)))
evals = list(zip(evals, ["validation_{}".format(i) for i in
range(len(evals))]))
else:
evals = ()

train_dmatrix, evals = self._wrap_evaluation_matrices(
X, y, group=None, sample_weight=sample_weight, base_margin=base_margin,
feature_weights=feature_weights, eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set, eval_group=None)
params = self.get_xgb_params()

if callable(self.objective):
Expand Down Expand Up @@ -774,11 +808,13 @@ def intercept_(self):
''')
class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
def __init__(self, objective="binary:logistic", use_label_encoder=True, **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective="binary:logistic", use_label_encoder=True, **kwargs):
self.use_label_encoder = use_label_encoder
super().__init__(objective=objective, **kwargs)

def fit(self, X, y, sample_weight=None, base_margin=None,
@_deprecate_positional_args
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
def fit(self, X, y, *, sample_weight=None, base_margin=None,
eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True, xgb_model=None,
sample_weight_eval_set=None, feature_weights=None, callbacks=None):
Expand Down Expand Up @@ -849,25 +885,6 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
label_transform = self._le.transform
else:
label_transform = (lambda x: x)
training_labels = label_transform(y)

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
else:
assert len(sample_weight_eval_set) == len(eval_set)
evals = list(
DMatrix(eval_set[i][0],
label=label_transform(eval_set[i][1]),
missing=self.missing, weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
for i in range(len(eval_set))
)
nevals = len(evals)
eval_names = ["validation_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()

if len(X.shape) != 2:
# Simply raise an error here since there might be many
Expand All @@ -878,10 +895,11 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
self._features_count = X.shape[1]
self.n_features_in_ = self._features_count

train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
train_dmatrix.set_info(feature_weights=feature_weights)
train_dmatrix, evals = self._wrap_evaluation_matrices(
X, y, group=None, sample_weight=sample_weight, base_margin=base_margin,
feature_weights=feature_weights,
eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set,
eval_group=None, label_transform=label_transform)

self._Booster = train(xgb_options, train_dmatrix,
self.get_num_boosting_rounds(),
Expand Down Expand Up @@ -1063,7 +1081,8 @@ def evals_result(self):
''')
class XGBRFClassifier(XGBClassifier):
# pylint: disable=missing-docstring
def __init__(self,
@_deprecate_positional_args
def __init__(self, *,
learning_rate=1,
subsample=0.8,
colsample_bynode=0.8,
Expand Down Expand Up @@ -1091,7 +1110,8 @@ def get_num_boosting_rounds(self):
['estimators', 'model', 'objective'])
class XGBRegressor(XGBModel, XGBRegressorBase):
# pylint: disable=missing-docstring
def __init__(self, objective="reg:squarederror", **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective="reg:squarederror", **kwargs):
super().__init__(objective=objective, **kwargs)


Expand All @@ -1103,7 +1123,8 @@ def __init__(self, objective="reg:squarederror", **kwargs):
''')
class XGBRFRegressor(XGBRegressor):
# pylint: disable=missing-docstring
def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
@_deprecate_positional_args
def __init__(self, *, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
reg_lambda=1e-5, **kwargs):
super().__init__(learning_rate=learning_rate, subsample=subsample,
colsample_bynode=colsample_bynode,
Expand Down Expand Up @@ -1159,15 +1180,17 @@ def get_num_boosting_rounds(self):
''')
class XGBRanker(XGBModel):
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
def __init__(self, objective='rank:pairwise', **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective='rank:pairwise', **kwargs):
super().__init__(objective=objective, **kwargs)
if callable(self.objective):
raise ValueError(
"custom objective function not supported by XGBRanker")
if "rank:" not in self.objective:
raise ValueError("please use XGBRanker for ranking task")

def fit(self, X, y, group, sample_weight=None, base_margin=None,
@_deprecate_positional_args
trivialfis marked this conversation as resolved.
Show resolved Hide resolved
def fit(self, X, y, *, group, sample_weight=None, base_margin=None,
eval_set=None, sample_weight_eval_set=None,
eval_group=None, eval_metric=None,
early_stopping_rounds=None, verbose=False, xgb_model=None,
Expand Down Expand Up @@ -1267,37 +1290,15 @@ def fit(self, X, y, group, sample_weight=None, base_margin=None,
raise ValueError(
"group is required for all eval datasets for ranking task")

def _dmat_init(group, **params):
ret = DMatrix(**params)
ret.set_group(group)
return ret

self.n_features_in_ = X.shape[1]

train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
train_dmatrix.set_info(feature_weights=feature_weights)
train_dmatrix.set_group(group)
train_dmatrix, evals = self._wrap_evaluation_matrices(
X, y, group=group, sample_weight=sample_weight, base_margin=base_margin,
feature_weights=feature_weights, eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set,
eval_group=eval_group)

evals_result = {}

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
evals = [_dmat_init(eval_group[i],
data=eval_set[i][0],
label=eval_set[i][1],
missing=self.missing,
weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
for i in range(len(eval_set))]
nevals = len(evals)
eval_names = ["eval_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()

params = self.get_xgb_params()

feval = eval_metric if callable(eval_metric) else None
Expand Down
Loading