add regression example

RektPunk · Aug 7, 2024 · 0bdc4d7 · 0bdc4d7
1 parent 1d03e14
commit 0bdc4d7
Show file tree

Hide file tree

Showing 6 changed files with 99 additions and 28 deletions.
diff --git a/examples/nonnegative_regression.py b/examples/nonnegative_regression.py
@@ -0,0 +1,32 @@
+import numpy as np
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+
+from rektgbm import RektDataset, RektGBM, RektOptimizer
+
+X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
+y = np.maximum(y, 0)
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.1, random_state=42
+)
+dtrain = RektDataset(data=X_train, label=y_train)
+dtest = RektDataset(data=X_test, label=y_test)
+
+rekt_optimizer = RektOptimizer(
+    method="both",  # Optimization method: options are both, lightgbm, xgboost
+    task_type="regression",  # Type of task: regression
+    objective="gamma",  # Objective function
+)
+
+rekt_optimizer.optimize_params(
+    dataset=RektDataset(X_train, y_train),
+    n_trials=10,
+)
+print(rekt_optimizer.best_params)
+
+rekt_gbm = RektGBM(**rekt_optimizer.best_params)
+rekt_gbm.fit(
+    dataset=RektDataset(X_train, y_train),
+)
+preds = rekt_gbm.predict(RektDataset(X_test, y_train))
diff --git a/examples/quantile_regression.py b/examples/quantile_regression.py
@@ -0,0 +1,33 @@
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+
+from rektgbm import RektDataset, RektGBM, RektOptimizer
+
+X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.1, random_state=42
+)
+dtrain = RektDataset(data=X_train, label=y_train)
+dtest = RektDataset(data=X_test, label=y_test)
+
+
+rekt_optimizer = RektOptimizer(
+    method="both",  # Optimization method: options are both, lightgbm, xgboost
+    task_type="regression",  # Type of task: regression
+    objective="quantile",  # Objective function
+    additional_params={
+        "alpha": 0.5,  # # Additional parameter for quanrile; "quantile_alpha" can also be used
+    },
+)
+
+rekt_optimizer.optimize_params(
+    dataset=RektDataset(X_train, y_train),
+    n_trials=10,
+)
+print(rekt_optimizer.best_params)
+
+rekt_gbm = RektGBM(**rekt_optimizer.best_params)
+rekt_gbm.fit(
+    dataset=RektDataset(X_train, y_train),
+)
+preds = rekt_gbm.predict(RektDataset(X_test, y_train))
diff --git a/examples/regression.py b/examples/regression.py
@@ -0,0 +1,32 @@
+from sklearn.datasets import make_regression
+from sklearn.model_selection import train_test_split
+
+from rektgbm import RektDataset, RektGBM, RektOptimizer
+
+X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.1, random_state=42
+)
+dtrain = RektDataset(data=X_train, label=y_train)
+dtest = RektDataset(data=X_test, label=y_test)
+
+rekt_optimizer = RektOptimizer(
+    method="both",  # Optimization method: options are both, lightgbm, xgboost
+    task_type="regression",  # Type of task: regression
+    objective="huber",  # Objective function: options are rmse, mae, huber
+    additional_params={
+        "huber_slope": 0.5  # Additional parameter for huber; "alpha" can also be used
+    },
+)
+
+rekt_optimizer.optimize_params(
+    dataset=RektDataset(X_train, y_train),
+    n_trials=10,
+)
+print(rekt_optimizer.best_params)
+
+rekt_gbm = RektGBM(**rekt_optimizer.best_params)
+rekt_gbm.fit(
+    dataset=RektDataset(X_train, y_train),
+)
+preds = rekt_gbm.predict(RektDataset(X_test, y_train))
diff --git a/rektgbm/metric.py b/rektgbm/metric.py
@@ -13,8 +13,6 @@ class MetricName(BaseEnum):
     huber: str = "huber"
     gamma: str = "gamma"
     gamma_deviance: str = "gamma_deviance"
-    poisson: str = "poisson"
-    tweedie: str = "tweedie"
     quantile: str = "quantile"
     logloss: str = "logloss"
     auc: str = "auc"
@@ -43,7 +41,6 @@ class XgbMetricName(BaseEnum):
     gamma_nloglik: str = "gamma-nloglik"
     gamma_deviance: str = "gamma-deviance"
     poisson_nloglik: str = "poisson-nloglik"
-    poisson_deviance: str = "poisson-deviance"
     tweedie_nloglik: str = "tweedie-nloglik"
     aft_nloglik: str = "aft-nloglik"
     interval_regression_accuracy: str = "interval-regression-accuracy"
@@ -84,9 +81,7 @@ class LgbMetricName(BaseEnum):
         MetricName.mape,
         MetricName.gamma,
         MetricName.gamma_deviance,
-        MetricName.poisson,
         MetricName.quantile,
-        MetricName.tweedie,
     ],
     TaskType.binary: [
         MetricName.logloss,
@@ -106,10 +101,8 @@ class LgbMetricName(BaseEnum):
     ObjectiveName.rmse: MetricName.rmse,
     ObjectiveName.mae: MetricName.mae,
     ObjectiveName.huber: MetricName.huber,
-    ObjectiveName.poisson: MetricName.poisson,
     ObjectiveName.quantile: MetricName.quantile,
     ObjectiveName.gamma: MetricName.gamma,
-    ObjectiveName.tweedie: MetricName.tweedie,
     ObjectiveName.binary: MetricName.logloss,
     ObjectiveName.multiclass: MetricName.mlogloss,
     ObjectiveName.lambdarank: MetricName.ndcg,
@@ -159,14 +152,6 @@ class LgbMetricName(BaseEnum):
         MethodName.lightgbm: LgbMetricName.gamma_deviance.value,
         MethodName.xgboost: XgbMetricName.gamma_deviance.value,
     },
-    MetricName.poisson: {
-        MethodName.lightgbm: LgbMetricName.poisson.value,
-        MethodName.xgboost: XgbMetricName.poisson_nloglik.value,
-    },
-    MetricName.tweedie: {
-        MethodName.lightgbm: LgbMetricName.tweedie.value,
-        MethodName.xgboost: XgbMetricName.tweedie_nloglik.value,
-    },
     MetricName.quantile: {
         MethodName.lightgbm: LgbMetricName.quantile.value,
         MethodName.xgboost: XgbMetricName.quantile.value,

diff --git a/rektgbm/objective.py b/rektgbm/objective.py
@@ -11,10 +11,8 @@ class ObjectiveName(BaseEnum):
     rmse: str = "rmse"
     mae: str = "mae"
     huber: str = "huber"
-    poisson: str = "poisson"
     quantile: str = "quantile"
     gamma: str = "gamma"
-    tweedie: str = "tweedie"
     binary: str = "binary"
     multiclass: str = "multiclass"
     lambdarank: str = "lambdarank"
@@ -66,10 +64,8 @@ class LgbObjectiveName(BaseEnum):
         ObjectiveName.rmse,
         ObjectiveName.mae,
         ObjectiveName.huber,
-        ObjectiveName.poisson,
         ObjectiveName.quantile,
         ObjectiveName.gamma,
-        ObjectiveName.tweedie,
     ],
     TaskType.binary: [
         ObjectiveName.binary,
@@ -97,10 +93,6 @@ class LgbObjectiveName(BaseEnum):
         MethodName.lightgbm: LgbObjectiveName.huber.value,
         MethodName.xgboost: XgbObjectiveName.pseudohubererror.value,
     },
-    ObjectiveName.poisson: {
-        MethodName.lightgbm: LgbObjectiveName.poisson.value,
-        MethodName.xgboost: XgbObjectiveName.poisson.value,
-    },
     ObjectiveName.quantile: {
         MethodName.lightgbm: LgbObjectiveName.quantile.value,
         MethodName.xgboost: XgbObjectiveName.quantileerror.value,
@@ -109,10 +101,6 @@ class LgbObjectiveName(BaseEnum):
         MethodName.lightgbm: LgbObjectiveName.gamma.value,
         MethodName.xgboost: XgbObjectiveName.gamma.value,
     },
-    ObjectiveName.tweedie: {
-        MethodName.lightgbm: LgbObjectiveName.tweedie.value,
-        MethodName.xgboost: XgbObjectiveName.tweedie.value,
-    },
     ObjectiveName.binary: {
         MethodName.lightgbm: LgbObjectiveName.binary.value,
         MethodName.xgboost: XgbObjectiveName.logistic.value,

diff --git a/rektgbm/optimizer.py b/rektgbm/optimizer.py
@@ -58,7 +58,8 @@ def optimize_params(
             task_type=self._task_type,
         )
         self.rekt_objective = RektObjective(
-            task_type=self.task_type, objective=self.objective
+            task_type=self.task_type,
+            objective=self.objective,
         )
         self.rekt_metric = RektMetric(
             task_type=self.task_type,