Skip to content

Commit

Permalink
add regression example
Browse files Browse the repository at this point in the history
  • Loading branch information
RektPunk committed Aug 7, 2024
1 parent 1d03e14 commit 0bdc4d7
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 28 deletions.
32 changes: 32 additions & 0 deletions examples/nonnegative_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

from rektgbm import RektDataset, RektGBM, RektOptimizer

X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
y = np.maximum(y, 0)

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.1, random_state=42
)
dtrain = RektDataset(data=X_train, label=y_train)
dtest = RektDataset(data=X_test, label=y_test)

rekt_optimizer = RektOptimizer(
method="both", # Optimization method: options are both, lightgbm, xgboost
task_type="regression", # Type of task: regression
objective="gamma", # Objective function
)

rekt_optimizer.optimize_params(
dataset=RektDataset(X_train, y_train),
n_trials=10,
)
print(rekt_optimizer.best_params)

rekt_gbm = RektGBM(**rekt_optimizer.best_params)
rekt_gbm.fit(
dataset=RektDataset(X_train, y_train),
)
preds = rekt_gbm.predict(RektDataset(X_test, y_train))
33 changes: 33 additions & 0 deletions examples/quantile_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

from rektgbm import RektDataset, RektGBM, RektOptimizer

X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.1, random_state=42
)
dtrain = RektDataset(data=X_train, label=y_train)
dtest = RektDataset(data=X_test, label=y_test)


rekt_optimizer = RektOptimizer(
method="both", # Optimization method: options are both, lightgbm, xgboost
task_type="regression", # Type of task: regression
objective="quantile", # Objective function
additional_params={
"alpha": 0.5, # # Additional parameter for quanrile; "quantile_alpha" can also be used
},
)

rekt_optimizer.optimize_params(
dataset=RektDataset(X_train, y_train),
n_trials=10,
)
print(rekt_optimizer.best_params)

rekt_gbm = RektGBM(**rekt_optimizer.best_params)
rekt_gbm.fit(
dataset=RektDataset(X_train, y_train),
)
preds = rekt_gbm.predict(RektDataset(X_test, y_train))
32 changes: 32 additions & 0 deletions examples/regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

from rektgbm import RektDataset, RektGBM, RektOptimizer

X, y = make_regression(n_samples=10_000, n_features=10, n_informative=5)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.1, random_state=42
)
dtrain = RektDataset(data=X_train, label=y_train)
dtest = RektDataset(data=X_test, label=y_test)

rekt_optimizer = RektOptimizer(
method="both", # Optimization method: options are both, lightgbm, xgboost
task_type="regression", # Type of task: regression
objective="huber", # Objective function: options are rmse, mae, huber
additional_params={
"huber_slope": 0.5 # Additional parameter for huber; "alpha" can also be used
},
)

rekt_optimizer.optimize_params(
dataset=RektDataset(X_train, y_train),
n_trials=10,
)
print(rekt_optimizer.best_params)

rekt_gbm = RektGBM(**rekt_optimizer.best_params)
rekt_gbm.fit(
dataset=RektDataset(X_train, y_train),
)
preds = rekt_gbm.predict(RektDataset(X_test, y_train))
15 changes: 0 additions & 15 deletions rektgbm/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ class MetricName(BaseEnum):
huber: str = "huber"
gamma: str = "gamma"
gamma_deviance: str = "gamma_deviance"
poisson: str = "poisson"
tweedie: str = "tweedie"
quantile: str = "quantile"
logloss: str = "logloss"
auc: str = "auc"
Expand Down Expand Up @@ -43,7 +41,6 @@ class XgbMetricName(BaseEnum):
gamma_nloglik: str = "gamma-nloglik"
gamma_deviance: str = "gamma-deviance"
poisson_nloglik: str = "poisson-nloglik"
poisson_deviance: str = "poisson-deviance"
tweedie_nloglik: str = "tweedie-nloglik"
aft_nloglik: str = "aft-nloglik"
interval_regression_accuracy: str = "interval-regression-accuracy"
Expand Down Expand Up @@ -84,9 +81,7 @@ class LgbMetricName(BaseEnum):
MetricName.mape,
MetricName.gamma,
MetricName.gamma_deviance,
MetricName.poisson,
MetricName.quantile,
MetricName.tweedie,
],
TaskType.binary: [
MetricName.logloss,
Expand All @@ -106,10 +101,8 @@ class LgbMetricName(BaseEnum):
ObjectiveName.rmse: MetricName.rmse,
ObjectiveName.mae: MetricName.mae,
ObjectiveName.huber: MetricName.huber,
ObjectiveName.poisson: MetricName.poisson,
ObjectiveName.quantile: MetricName.quantile,
ObjectiveName.gamma: MetricName.gamma,
ObjectiveName.tweedie: MetricName.tweedie,
ObjectiveName.binary: MetricName.logloss,
ObjectiveName.multiclass: MetricName.mlogloss,
ObjectiveName.lambdarank: MetricName.ndcg,
Expand Down Expand Up @@ -159,14 +152,6 @@ class LgbMetricName(BaseEnum):
MethodName.lightgbm: LgbMetricName.gamma_deviance.value,
MethodName.xgboost: XgbMetricName.gamma_deviance.value,
},
MetricName.poisson: {
MethodName.lightgbm: LgbMetricName.poisson.value,
MethodName.xgboost: XgbMetricName.poisson_nloglik.value,
},
MetricName.tweedie: {
MethodName.lightgbm: LgbMetricName.tweedie.value,
MethodName.xgboost: XgbMetricName.tweedie_nloglik.value,
},
MetricName.quantile: {
MethodName.lightgbm: LgbMetricName.quantile.value,
MethodName.xgboost: XgbMetricName.quantile.value,
Expand Down
12 changes: 0 additions & 12 deletions rektgbm/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ class ObjectiveName(BaseEnum):
rmse: str = "rmse"
mae: str = "mae"
huber: str = "huber"
poisson: str = "poisson"
quantile: str = "quantile"
gamma: str = "gamma"
tweedie: str = "tweedie"
binary: str = "binary"
multiclass: str = "multiclass"
lambdarank: str = "lambdarank"
Expand Down Expand Up @@ -66,10 +64,8 @@ class LgbObjectiveName(BaseEnum):
ObjectiveName.rmse,
ObjectiveName.mae,
ObjectiveName.huber,
ObjectiveName.poisson,
ObjectiveName.quantile,
ObjectiveName.gamma,
ObjectiveName.tweedie,
],
TaskType.binary: [
ObjectiveName.binary,
Expand Down Expand Up @@ -97,10 +93,6 @@ class LgbObjectiveName(BaseEnum):
MethodName.lightgbm: LgbObjectiveName.huber.value,
MethodName.xgboost: XgbObjectiveName.pseudohubererror.value,
},
ObjectiveName.poisson: {
MethodName.lightgbm: LgbObjectiveName.poisson.value,
MethodName.xgboost: XgbObjectiveName.poisson.value,
},
ObjectiveName.quantile: {
MethodName.lightgbm: LgbObjectiveName.quantile.value,
MethodName.xgboost: XgbObjectiveName.quantileerror.value,
Expand All @@ -109,10 +101,6 @@ class LgbObjectiveName(BaseEnum):
MethodName.lightgbm: LgbObjectiveName.gamma.value,
MethodName.xgboost: XgbObjectiveName.gamma.value,
},
ObjectiveName.tweedie: {
MethodName.lightgbm: LgbObjectiveName.tweedie.value,
MethodName.xgboost: XgbObjectiveName.tweedie.value,
},
ObjectiveName.binary: {
MethodName.lightgbm: LgbObjectiveName.binary.value,
MethodName.xgboost: XgbObjectiveName.logistic.value,
Expand Down
3 changes: 2 additions & 1 deletion rektgbm/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def optimize_params(
task_type=self._task_type,
)
self.rekt_objective = RektObjective(
task_type=self.task_type, objective=self.objective
task_type=self.task_type,
objective=self.objective,
)
self.rekt_metric = RektMetric(
task_type=self.task_type,
Expand Down

0 comments on commit 0bdc4d7

Please sign in to comment.