Skip to content

Commit

Permalink
check that the result is the same with dict input or PreprocessorConf…
Browse files Browse the repository at this point in the history
…ig input
  • Loading branch information
LeoGrin committed Jan 21, 2025
1 parent 1802f0b commit 287b33e
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 0 deletions.
49 changes: 49 additions & 0 deletions tests/test_classifier_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sklearn.utils.estimator_checks import parametrize_with_checks

from tabpfn import TabPFNClassifier
from tabpfn.preprocessing import PreprocessorConfig

devices = ["cpu"]
if torch.cuda.is_available():
Expand Down Expand Up @@ -169,3 +170,51 @@ def test_classifier_in_pipeline(X_y: tuple[np.ndarray, np.ndarray]) -> None:
expected_mean,
rtol=0.1,
), "Class probabilities are not properly balanced in pipeline"

def test_dict_vs_object_preprocessor_config(X_y: tuple[np.ndarray, np.ndarray]) -> None:
"""Test that dict configs behave identically to PreprocessorConfig objects."""
X, y = X_y

# Define same config as both dict and object
dict_config = {
"name": "quantile_uni_coarse",
"append_original": False, # changed from default
"categorical_name": "ordinal_very_common_categories_shuffled",
"global_transformer_name": "svd",
"subsample_features": -1,
}

object_config = PreprocessorConfig(
name="quantile_uni_coarse",
append_original=False, # changed from default
categorical_name="ordinal_very_common_categories_shuffled",
global_transformer_name="svd",
subsample_features=-1,
)

# Create two models with same random state
model_dict = TabPFNClassifier(
inference_config={"PREPROCESS_TRANSFORMS": [dict_config]},
n_estimators=2,
random_state=42
)

model_obj = TabPFNClassifier(
inference_config={"PREPROCESS_TRANSFORMS": [object_config]},
n_estimators=2,
random_state=42
)

# Fit both models
model_dict.fit(X, y)
model_obj.fit(X, y)

# Compare predictions
pred_dict = model_dict.predict(X)
pred_obj = model_obj.predict(X)
np.testing.assert_array_equal(pred_dict, pred_obj)

# Compare probabilities
prob_dict = model_dict.predict_proba(X)
prob_obj = model_obj.predict_proba(X)
np.testing.assert_array_almost_equal(prob_dict, prob_obj)
63 changes: 63 additions & 0 deletions tests/test_regressor_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sklearn.utils.estimator_checks import parametrize_with_checks

from tabpfn import TabPFNRegressor
from tabpfn.preprocessing import PreprocessorConfig

devices = ["cpu"]
if torch.cuda.is_available():
Expand Down Expand Up @@ -155,3 +156,65 @@ def test_regressor_in_pipeline(X_y: tuple[np.ndarray, np.ndarray]) -> None:
assert quantiles[0].shape == (
X.shape[0],
), "Quantile predictions shape is incorrect"

def test_dict_vs_object_preprocessor_config(X_y: tuple[np.ndarray, np.ndarray]) -> None:
"""Test that dict configs behave identically to PreprocessorConfig objects."""
X, y = X_y

# Define same config as both dict and object
dict_config = {
"name": "quantile_uni",
"append_original": False, # changed from default
"categorical_name": "ordinal_very_common_categories_shuffled",
"global_transformer_name": "svd",
"subsample_features": -1,
}

object_config = PreprocessorConfig(
name="quantile_uni",
append_original=False, # changed from default
categorical_name="ordinal_very_common_categories_shuffled",
global_transformer_name="svd",
subsample_features=-1,
)

# Create two models with same random state
model_dict = TabPFNRegressor(
inference_config={"PREPROCESS_TRANSFORMS": [dict_config]},
n_estimators=2,
random_state=42
)

model_obj = TabPFNRegressor(
inference_config={"PREPROCESS_TRANSFORMS": [object_config]},
n_estimators=2,
random_state=42
)

# Fit both models
model_dict.fit(X, y)
model_obj.fit(X, y)

# Compare predictions for different output types
for output_type in ["mean", "median", "mode"]:
pred_dict = model_dict.predict(X, output_type=output_type)
pred_obj = model_obj.predict(X, output_type=output_type)
np.testing.assert_array_almost_equal(
pred_dict,
pred_obj,
err_msg=f"Predictions differ for output_type={output_type}"
)

# Compare quantile predictions
quantiles = [0.1, 0.5, 0.9]
quant_dict = model_dict.predict(X, output_type="quantiles", quantiles=quantiles)
quant_obj = model_obj.predict(X, output_type="quantiles", quantiles=quantiles)

for q_dict, q_obj in zip(quant_dict, quant_obj):
np.testing.assert_array_almost_equal(
q_dict,
q_obj,
err_msg=f"Quantile predictions differ"
)


0 comments on commit 287b33e

Please sign in to comment.