From 0121181e8d3e8230cebf5d322e31c7890fb8c86a Mon Sep 17 00:00:00 2001
From: BenEfrati <BenEfrati@users.noreply.github.com>
Date: Sun, 22 Jan 2023 20:44:15 +0000
Subject: [PATCH 1/3] Add sample_weight to eval_metric

---
 python-package/xgboost/sklearn.py | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 15d93fd38bb2..38da6e57c2e9 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -123,6 +123,24 @@ def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
 
     return inner
 
+def _sample_weight_metric_decorator(func: Callable) -> Metric:
+    """Decorate a metric function from sklearn.
+
+    Converts an metric function that uses the typical sklearn metric signature so that it
+    is compatible with :py:func:`train`
+
+    """
+
+    def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
+        y_true = dmatrix.get_label()
+        sample_weight = dmatrix.get_weight()
+        try:
+            return func.__name__, func(y_true, y_score, sample_weight=sample_weight)
+        except TypeError:
+            #func has no sample_weight
+            return func.__name__, func(y_true, y_score)
+
+    return inner
 
 __estimator_doc = """
     n_estimators : int
@@ -819,6 +837,7 @@ def _configure_fit(
         params: Dict[str, Any],
         early_stopping_rounds: Optional[int],
         callbacks: Optional[Sequence[TrainingCallback]],
+        sample_weight_metric: Optional[bool],
     ) -> Tuple[
         Optional[Union[Booster, str, "XGBModel"]],
         Optional[Metric],
@@ -865,7 +884,10 @@ def _duplicated(parameter: str) -> None:
                 metric = eval_metric
             elif callable(eval_metric):
                 # Parameter from constructor or set_params
-                metric = _metric_decorator(eval_metric)
+                if sample_weight_metric:
+                    metric = _sample_weight_metric_decorator(eval_metric)
+                else:
+                    metric = _metric_decorator(eval_metric)
             else:
                 params.update({"eval_metric": eval_metric})
 
@@ -1029,7 +1051,7 @@ def fit(
                 early_stopping_rounds,
                 callbacks,
             ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
+                xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None
             )
             self._Booster = train(
                 params,
@@ -1475,7 +1497,7 @@ def fit(
                 early_stopping_rounds,
                 callbacks,
             ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
+                xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None
             )
             train_dmatrix, evals = _wrap_evaluation_matrices(
                 missing=self.missing,
@@ -1974,7 +1996,7 @@ def fit(
                 early_stopping_rounds,
                 callbacks,
             ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
+                xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None
             )
             if callable(metric):
                 raise ValueError(

From 130129b3e373972c3304f1c76f30a71712556cf6 Mon Sep 17 00:00:00 2001
From: BenEfrati <BenEfrati@users.noreply.github.com>
Date: Sun, 29 Jan 2023 20:32:51 +0000
Subject: [PATCH 2/3] check if weight is empty instead 2 decorators

---
 python-package/xgboost/sklearn.py | 36 ++++++++-----------------------
 1 file changed, 9 insertions(+), 27 deletions(-)

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
index 38da6e57c2e9..e1f8e087dde6 100644
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -119,29 +119,15 @@ def _metric_decorator(func: Callable) -> Metric:
 
     def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
         y_true = dmatrix.get_label()
-        return func.__name__, func(y_true, y_score)
-
-    return inner
-
-def _sample_weight_metric_decorator(func: Callable) -> Metric:
-    """Decorate a metric function from sklearn.
-
-    Converts an metric function that uses the typical sklearn metric signature so that it
-    is compatible with :py:func:`train`
-
-    """
-
-    def inner(y_score: np.ndarray, dmatrix: DMatrix) -> Tuple[str, float]:
-        y_true = dmatrix.get_label()
-        sample_weight = dmatrix.get_weight()
-        try:
-            return func.__name__, func(y_true, y_score, sample_weight=sample_weight)
-        except TypeError:
-            #func has no sample_weight
+        weight = dmatrix.get_weight()
+        if weight.size == 0:
             return func.__name__, func(y_true, y_score)
+        else:
+            return func.__name__, func(y_true, y_score, sample_weight=weight)
 
     return inner
 
+
 __estimator_doc = """
     n_estimators : int
         Number of gradient boosted trees.  Equivalent to number of boosting
@@ -837,7 +823,6 @@ def _configure_fit(
         params: Dict[str, Any],
         early_stopping_rounds: Optional[int],
         callbacks: Optional[Sequence[TrainingCallback]],
-        sample_weight_metric: Optional[bool],
     ) -> Tuple[
         Optional[Union[Booster, str, "XGBModel"]],
         Optional[Metric],
@@ -884,10 +869,7 @@ def _duplicated(parameter: str) -> None:
                 metric = eval_metric
             elif callable(eval_metric):
                 # Parameter from constructor or set_params
-                if sample_weight_metric:
-                    metric = _sample_weight_metric_decorator(eval_metric)
-                else:
-                    metric = _metric_decorator(eval_metric)
+                metric = _metric_decorator(eval_metric)
             else:
                 params.update({"eval_metric": eval_metric})
 
@@ -1051,7 +1033,7 @@ def fit(
                 early_stopping_rounds,
                 callbacks,
             ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None
+                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
             )
             self._Booster = train(
                 params,
@@ -1497,7 +1479,7 @@ def fit(
                 early_stopping_rounds,
                 callbacks,
             ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None
+                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
             )
             train_dmatrix, evals = _wrap_evaluation_matrices(
                 missing=self.missing,
@@ -1996,7 +1978,7 @@ def fit(
                 early_stopping_rounds,
                 callbacks,
             ) = self._configure_fit(
-                xgb_model, eval_metric, params, early_stopping_rounds, callbacks, sample_weight != None
+                xgb_model, eval_metric, params, early_stopping_rounds, callbacks
             )
             if callable(metric):
                 raise ValueError(

From 233f0506044de193a0ac41683dbc29ff75fb7f4f Mon Sep 17 00:00:00 2001
From: BenEfrati <BenEfrati@users.noreply.github.com>
Date: Thu, 2 Feb 2023 12:32:59 +0000
Subject: [PATCH 3/3] Add test

---
 tests/python/test_with_sklearn.py | 38 +++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index 8dfb5cab2267..4f627cd34e85 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -1417,3 +1417,41 @@ def merror(y_true: np.ndarray, predt: np.ndarray):
     with pytest.raises(AssertionError):
         # shape check inside the `merror` function
         clf.fit(X, y, eval_set=[(X, y)])
+
+def test_weighted_evaluation_metric():
+    from sklearn.datasets import make_hastie_10_2
+    from sklearn.metrics import log_loss
+    X, y = make_hastie_10_2(n_samples=2000, random_state=42)
+    labels, y = np.unique(y, return_inverse=True)
+    X_train, X_test = X[:1600], X[1600:]
+    y_train, y_test = y[:1600], y[1600:]
+    weights_eval_set = np.random.choice([1, 2], len(X_test))
+    
+    np.random.seed(0)
+    weights_train = np.random.choice([1, 2], len(X_train))
+    
+    clf = xgb.XGBClassifier(
+        tree_method="hist",
+        eval_metric=log_loss,
+        n_estimators=16,
+        objective="binary:logistic",
+    )
+    clf.fit(X_train, y_train, sample_weight=weights_train, eval_set=[(X_test, y_test)],
+            sample_weight_eval_set=[weights_eval_set])
+    custom = clf.evals_result()
+
+    clf = xgb.XGBClassifier(
+        tree_method="hist",
+        eval_metric="logloss",
+        n_estimators=16,
+        objective="binary:logistic"
+    )
+    clf.fit(X_train, y_train, sample_weight=weights_train, eval_set=[(X_test, y_test)],
+            sample_weight_eval_set=[weights_eval_set])
+    internal = clf.evals_result()
+
+    np.testing.assert_allclose(
+        custom["validation_0"]["log_loss"],
+        internal["validation_0"]["logloss"],
+        atol=1e-6
+    )