Skip to content

Commit

Permalink
Deprecate positional arguments. (#6365)
Browse files Browse the repository at this point in the history
Deprecate positional arguments in following functions:

- `__init__` for all classes in sklearn module.
- `fit` method for all classes in sklearn module.
- dask interface.
- `set_info` for `DMatrix` class.

Refactor the evaluation matrices handling.
  • Loading branch information
trivialfis authored Nov 13, 2020
1 parent e5193c2 commit fcfeb49
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 86 deletions.
57 changes: 56 additions & 1 deletion python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import sys
import json
import warnings
from functools import wraps
from inspect import signature, Parameter

import numpy as np
import scipy.sparse
Expand Down Expand Up @@ -369,6 +371,58 @@ def next(self, input_data):
raise NotImplementedError()


# Notice for `_deprecate_positional_args`
# Authors: Olivier Grisel
# Gael Varoquaux
# Andreas Mueller
# Lars Buitinck
# Alexandre Gramfort
# Nicolas Tresegnie
# Sylvain Marie
# License: BSD 3 clause
def _deprecate_positional_args(f):
"""Decorator for methods that issues warnings for positional arguments
Using the keyword-only argument syntax in pep 3102, arguments after the
* will issue a warning when passed as a positional argument.
Modifed from sklearn utils.validation.
Parameters
----------
f : function
function to check arguments on
"""
sig = signature(f)
kwonly_args = []
all_args = []

for name, param in sig.parameters.items():
if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
all_args.append(name)
elif param.kind == Parameter.KEYWORD_ONLY:
kwonly_args.append(name)

@wraps(f)
def inner_f(*args, **kwargs):
extra_args = len(args) - len(all_args)
if extra_args > 0:
# ignore first 'self' argument for instance methods
args_msg = [
'{}'.format(name) for name, _ in zip(
kwonly_args[:extra_args], args[-extra_args:])
]
warnings.warn(
"Pass `{}` as keyword args. Passing these as positional "
"arguments will be considered as error in future releases.".
format(", ".join(args_msg)), FutureWarning)
for k, arg in zip(sig.parameters, args):
kwargs[k] = arg
return f(**kwargs)

return inner_f


class DMatrix: # pylint: disable=too-many-instance-attributes
"""Data Matrix used in XGBoost.
Expand Down Expand Up @@ -461,7 +515,8 @@ def __del__(self):
_check_call(_LIB.XGDMatrixFree(self.handle))
self.handle = None

def set_info(self,
@_deprecate_positional_args
def set_info(self, *,
label=None, weight=None, base_margin=None,
group=None,
label_lower_bound=None,
Expand Down
10 changes: 9 additions & 1 deletion python-package/xgboost/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .compat import lazy_isinstance

from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter
from .core import _deprecate_positional_args
from .training import train as worker_train
from .tracker import RabitTracker
from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase
Expand Down Expand Up @@ -1026,7 +1027,8 @@ class DaskScikitLearnBase(XGBModel):
_client = None

# pylint: disable=arguments-differ
def fit(self, X, y,
@_deprecate_positional_args
def fit(self, X, y, *,
sample_weight=None,
base_margin=None,
eval_set=None,
Expand All @@ -1050,6 +1052,8 @@ def fit(self, X, y,
sample_weight_eval_set : list, optional
A list of the form [L_1, L_2, ..., L_n], where each L_i is a list
of group weights on the i-th validation set.
early_stopping_rounds : int
Activates early stopping.
verbose : bool
If `verbose` and an evaluation set is used, writes the evaluation
metric measured on the validation set to stderr.'''
Expand Down Expand Up @@ -1112,9 +1116,11 @@ async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
return self

# pylint: disable=missing-docstring
@_deprecate_positional_args
def fit(self,
X,
y,
*,
sample_weight=None,
base_margin=None,
eval_set=None,
Expand Down Expand Up @@ -1195,9 +1201,11 @@ async def _fit_async(self, X, y, sample_weight, base_margin, eval_set,
self.evals_result_ = results['history']
return self

@_deprecate_positional_args
def fit(self,
X,
y,
*,
sample_weight=None,
base_margin=None,
eval_set=None,
Expand Down
151 changes: 76 additions & 75 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import warnings
import json
import numpy as np
from .core import Booster, DMatrix, XGBoostError
from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
from .training import train
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array

Expand Down Expand Up @@ -248,6 +248,51 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100,
self.gpu_id = gpu_id
self.validate_parameters = validate_parameters

def _wrap_evaluation_matrices(self, X, y, group,
sample_weight, base_margin, feature_weights,
eval_set, sample_weight_eval_set, eval_group,
label_transform=lambda x: x):
'''Convert array_like evaluation matrices into DMatrix'''
if sample_weight_eval_set is not None:
assert eval_set is not None
assert len(sample_weight_eval_set) == len(eval_set)
if eval_group is not None:
assert eval_set is not None
assert len(eval_group) == len(eval_set)

y = label_transform(y)
train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
train_dmatrix.set_info(feature_weights=feature_weights, group=group)

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
if eval_group is None:
eval_group = [None] * len(eval_set)

evals = []
for i, (valid_X, valid_y) in enumerate(eval_set):
# Skip the duplicated entry.
if valid_X is X and valid_y is y and \
sample_weight_eval_set[i] is sample_weight and eval_group[i] is group:
evals.append(train_dmatrix)
else:
m = DMatrix(valid_X,
label=label_transform(valid_y),
missing=self.missing, weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
m.set_info(group=eval_group[i])
evals.append(m)

nevals = len(evals)
eval_names = ["validation_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()
return train_dmatrix, evals

def _more_tags(self):
'''Tags used for scikit-learn data validation.'''
return {'allow_nan': True, 'no_validation': True}
Expand Down Expand Up @@ -445,7 +490,8 @@ def load_model(self, fname):
# Delete the attribute after load
self.get_booster().set_attr(scikit_learn=None)

def fit(self, X, y, sample_weight=None, base_margin=None,
@_deprecate_positional_args
def fit(self, X, y, *, sample_weight=None, base_margin=None,
eval_set=None, eval_metric=None, early_stopping_rounds=None,
verbose=True, xgb_model=None, sample_weight_eval_set=None,
feature_weights=None,
Expand Down Expand Up @@ -524,22 +570,10 @@ def fit(self, X, y, sample_weight=None, base_margin=None,

evals_result = {}

if eval_set is not None:
if not isinstance(eval_set[0], (list, tuple)):
raise TypeError('Unexpected input type for `eval_set`')
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
else:
assert len(eval_set) == len(sample_weight_eval_set)
evals = list(
DMatrix(eval_set[i][0], label=eval_set[i][1], missing=self.missing,
weight=sample_weight_eval_set[i], nthread=self.n_jobs)
for i in range(len(eval_set)))
evals = list(zip(evals, ["validation_{}".format(i) for i in
range(len(evals))]))
else:
evals = ()

train_dmatrix, evals = self._wrap_evaluation_matrices(
X, y, group=None, sample_weight=sample_weight, base_margin=base_margin,
feature_weights=feature_weights, eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set, eval_group=None)
params = self.get_xgb_params()

if callable(self.objective):
Expand Down Expand Up @@ -775,11 +809,13 @@ def intercept_(self):
''')
class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
def __init__(self, objective="binary:logistic", use_label_encoder=True, **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective="binary:logistic", use_label_encoder=True, **kwargs):
self.use_label_encoder = use_label_encoder
super().__init__(objective=objective, **kwargs)

def fit(self, X, y, sample_weight=None, base_margin=None,
@_deprecate_positional_args
def fit(self, X, y, *, sample_weight=None, base_margin=None,
eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True, xgb_model=None,
sample_weight_eval_set=None, feature_weights=None, callbacks=None):
Expand Down Expand Up @@ -850,25 +886,6 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
label_transform = self._le.transform
else:
label_transform = (lambda x: x)
training_labels = label_transform(y)

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
else:
assert len(sample_weight_eval_set) == len(eval_set)
evals = list(
DMatrix(eval_set[i][0],
label=label_transform(eval_set[i][1]),
missing=self.missing, weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
for i in range(len(eval_set))
)
nevals = len(evals)
eval_names = ["validation_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()

if len(X.shape) != 2:
# Simply raise an error here since there might be many
Expand All @@ -879,10 +896,11 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
self._features_count = X.shape[1]
self.n_features_in_ = self._features_count

train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
train_dmatrix.set_info(feature_weights=feature_weights)
train_dmatrix, evals = self._wrap_evaluation_matrices(
X, y, group=None, sample_weight=sample_weight, base_margin=base_margin,
feature_weights=feature_weights,
eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set,
eval_group=None, label_transform=label_transform)

self._Booster = train(xgb_options, train_dmatrix,
self.get_num_boosting_rounds(),
Expand Down Expand Up @@ -1064,7 +1082,8 @@ def evals_result(self):
''')
class XGBRFClassifier(XGBClassifier):
# pylint: disable=missing-docstring
def __init__(self,
@_deprecate_positional_args
def __init__(self, *,
learning_rate=1,
subsample=0.8,
colsample_bynode=0.8,
Expand Down Expand Up @@ -1092,7 +1111,8 @@ def get_num_boosting_rounds(self):
['estimators', 'model', 'objective'])
class XGBRegressor(XGBModel, XGBRegressorBase):
# pylint: disable=missing-docstring
def __init__(self, objective="reg:squarederror", **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective="reg:squarederror", **kwargs):
super().__init__(objective=objective, **kwargs)


Expand All @@ -1104,7 +1124,8 @@ def __init__(self, objective="reg:squarederror", **kwargs):
''')
class XGBRFRegressor(XGBRegressor):
# pylint: disable=missing-docstring
def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
@_deprecate_positional_args
def __init__(self, *, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
reg_lambda=1e-5, **kwargs):
super().__init__(learning_rate=learning_rate, subsample=subsample,
colsample_bynode=colsample_bynode,
Expand Down Expand Up @@ -1160,15 +1181,17 @@ def get_num_boosting_rounds(self):
''')
class XGBRanker(XGBModel):
# pylint: disable=missing-docstring,too-many-arguments,invalid-name
def __init__(self, objective='rank:pairwise', **kwargs):
@_deprecate_positional_args
def __init__(self, *, objective='rank:pairwise', **kwargs):
super().__init__(objective=objective, **kwargs)
if callable(self.objective):
raise ValueError(
"custom objective function not supported by XGBRanker")
if "rank:" not in self.objective:
raise ValueError("please use XGBRanker for ranking task")

def fit(self, X, y, group, sample_weight=None, base_margin=None,
@_deprecate_positional_args
def fit(self, X, y, *, group, sample_weight=None, base_margin=None,
eval_set=None, sample_weight_eval_set=None,
eval_group=None, eval_metric=None,
early_stopping_rounds=None, verbose=False, xgb_model=None,
Expand Down Expand Up @@ -1269,37 +1292,15 @@ def fit(self, X, y, group, sample_weight=None, base_margin=None,
raise ValueError(
"group is required for all eval datasets for ranking task")

def _dmat_init(group, **params):
ret = DMatrix(**params)
ret.set_group(group)
return ret

self.n_features_in_ = X.shape[1]

train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
base_margin=base_margin,
missing=self.missing, nthread=self.n_jobs)
train_dmatrix.set_info(feature_weights=feature_weights)
train_dmatrix.set_group(group)
train_dmatrix, evals = self._wrap_evaluation_matrices(
X, y, group=group, sample_weight=sample_weight, base_margin=base_margin,
feature_weights=feature_weights, eval_set=eval_set,
sample_weight_eval_set=sample_weight_eval_set,
eval_group=eval_group)

evals_result = {}

if eval_set is not None:
if sample_weight_eval_set is None:
sample_weight_eval_set = [None] * len(eval_set)
evals = [_dmat_init(eval_group[i],
data=eval_set[i][0],
label=eval_set[i][1],
missing=self.missing,
weight=sample_weight_eval_set[i],
nthread=self.n_jobs)
for i in range(len(eval_set))]
nevals = len(evals)
eval_names = ["eval_{}".format(i) for i in range(nevals)]
evals = list(zip(evals, eval_names))
else:
evals = ()

params = self.get_xgb_params()

feval = eval_metric if callable(eval_metric) else None
Expand Down
Loading

0 comments on commit fcfeb49

Please sign in to comment.