From 0121181e8d3e8230cebf5d322e31c7890fb8c86a Mon Sep 17 00:00:00 2001 From: BenEfrati Date: Sun, 22 Jan 2023 20:44:15 +0000 Subject: [PATCH 1/3] Add sample_weight to eval_metric --- python-package/xgboost/sklearn.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 15d93fd38bb2..38da6e57c2e9 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -123,6 +123,24 @@ def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]: return inner +def _sample_weight_metric_decorator(func: Callable) -> Metric: + """Decorate a metric function from sklearn. + + Converts an metric function that uses the typical sklearn metric signature so that it + is compatible with :py:func:`train` + + """ + + def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]: + y_true = dmatrix.get_label() + sample_weight = dmatrix.get_weight() + try: + return func.__name__, func(y_true, y_score, sample_weight=sample_weight) + except TypeError: + #func has no sample_weight + return func.__name__, func(y_true, y_score) + + return inner __estimator_doc = """ n_estimators : int @@ -819,6 +837,7 @@ def _configure_fit( params: Dict[str, Any], early_stopping_rounds: Optional[int], callbacks: Optional[Sequence[TrainingCallback]], + sample_weight_metric: Optional[bool], ) -> Tuple[ Optional[Union[Booster, str, "XGBModel"]], Optional[Metric], @@ -865,7 +884,10 @@ def _duplicated(parameter: str) -> None: metric = eval_metric elif callable(eval_metric): # Parameter from constructor or set_params - metric = _metric_decorator(eval_metric) + if sample_weight_metric: + metric = _sample_weight_metric_decorator(eval_metric) + else: + metric = _metric_decorator(eval_metric) else: params.update({"eval_metric": eval_metric}) @@ -1029,7 +1051,7 @@ def fit( early_stopping_rounds, callbacks, ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks + xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None ) self._Booster = train( params, @@ -1475,7 +1497,7 @@ def fit( early_stopping_rounds, callbacks, ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks + xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None ) train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, @@ -1974,7 +1996,7 @@ def fit( early_stopping_rounds, callbacks, ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks + xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None ) if callable(metric): raise ValueError( From 130129b3e373972c3304f1c76f30a71712556cf6 Mon Sep 17 00:00:00 2001 From: BenEfrati Date: Sun, 29 Jan 2023 20:32:51 +0000 Subject: [PATCH 2/3] check if weight is empty instead 2 decorators --- python-package/xgboost/sklearn.py | 36 ++++++++----------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 38da6e57c2e9..e1f8e087dde6 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -119,29 +119,15 @@ def _metric_decorator(func: Callable) -> Metric: def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]: y_true = dmatrix.get_label() - return func.__name__, func(y_true, y_score) - - return inner - -def _sample_weight_metric_decorator(func: Callable) -> Metric: - """Decorate a metric function from sklearn. - - Converts an metric function that uses the typical sklearn metric signature so that it - is compatible with :py:func:`train` - - """ - - def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]: - y_true = dmatrix.get_label() - sample_weight = dmatrix.get_weight() - try: - return func.__name__, func(y_true, y_score, sample_weight=sample_weight) - except TypeError: - #func has no sample_weight + weight = dmatrix.get_weight() + if weight.size == 0: return func.__name__, func(y_true, y_score) + else: + return func.__name__, func(y_true, y_score, sample_weight=weight) return inner + __estimator_doc = """ n_estimators : int Number of gradient boosted trees. Equivalent to number of boosting @@ -837,7 +823,6 @@ def _configure_fit( params: Dict[str, Any], early_stopping_rounds: Optional[int], callbacks: Optional[Sequence[TrainingCallback]], - sample_weight_metric: Optional[bool], ) -> Tuple[ Optional[Union[Booster, str, "XGBModel"]], Optional[Metric], @@ -884,10 +869,7 @@ def _duplicated(parameter: str) -> None: metric = eval_metric elif callable(eval_metric): # Parameter from constructor or set_params - if sample_weight_metric: - metric = _sample_weight_metric_decorator(eval_metric) - else: - metric = _metric_decorator(eval_metric) + metric = _metric_decorator(eval_metric) else: params.update({"eval_metric": eval_metric}) @@ -1051,7 +1033,7 @@ def fit( early_stopping_rounds, callbacks, ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None + xgb_model, eval_metric, params, early_stopping_rounds, callbacks ) self._Booster = train( params, @@ -1497,7 +1479,7 @@ def fit( early_stopping_rounds, callbacks, ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None + xgb_model, eval_metric, params, early_stopping_rounds, callbacks ) train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, @@ -1996,7 +1978,7 @@ def fit( early_stopping_rounds, callbacks, ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None + xgb_model, eval_metric, params, early_stopping_rounds, callbacks ) if callable(metric): raise ValueError( From 233f0506044de193a0ac41683dbc29ff75fb7f4f Mon Sep 17 00:00:00 2001 From: BenEfrati Date: Thu, 2 Feb 2023 12:32:59 +0000 Subject: [PATCH 3/3] Add test --- tests/python/test_with_sklearn.py | 38 +++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 8dfb5cab2267..4f627cd34e85 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1417,3 +1417,41 @@ def merror(y_true: np.ndarray, predt: np.ndarray): with pytest.raises(AssertionError): # shape check inside the `merror` function clf.fit(X, y, eval_set=[(X, y)]) + +def test_weighted_evaluation_metric(): + from sklearn.datasets import make_hastie_10_2 + from sklearn.metrics import log_loss + X, y = make_hastie_10_2(n_samples=2000, random_state=42) + labels, y = np.unique(y, return_inverse=True) + X_train, X_test = X[:1600], X[1600:] + y_train, y_test = y[:1600], y[1600:] + weights_eval_set = np.random.choice([1, 2], len(X_test)) + + np.random.seed(0) + weights_train = np.random.choice([1, 2], len(X_train)) + + clf = xgb.XGBClassifier( + tree_method="hist", + eval_metric=log_loss, + n_estimators=16, + objective="binary:logistic", + ) + clf.fit(X_train, y_train, sample_weight=weights_train, eval_set=[(X_test, y_test)], + sample_weight_eval_set=[weights_eval_set]) + custom = clf.evals_result() + + clf = xgb.XGBClassifier( + tree_method="hist", + eval_metric="logloss", + n_estimators=16, + objective="binary:logistic" + ) + clf.fit(X_train, y_train, sample_weight=weights_train, eval_set=[(X_test, y_test)], + sample_weight_eval_set=[weights_eval_set]) + internal = clf.evals_result() + + np.testing.assert_allclose( + custom["validation_0"]["log_loss"], + internal["validation_0"]["logloss"], + atol=1e-6 + )