Skip to content

Commit

Permalink
Fix Multi-output when max_samples_leaf > 1 (#30)
Browse files Browse the repository at this point in the history
* Fixes and tests for multi-output when max_samples_leaf > 1
  • Loading branch information
reidjohnson authored Feb 14, 2024
1 parent b3f39e4 commit 3821294
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 11 deletions.
2 changes: 1 addition & 1 deletion quantile_forest/_quantile_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def _get_y_train_leaves(self, X, y_dim, sorter=None, sample_weight=None):
y_indices = random.sample(y_indices, max_samples_leaf)

if sorter is not None:
y_indices = np.asarray(y_indices).reshape(y_dim, -1)
y_indices = np.asarray(y_indices).reshape(-1, y_dim).swapaxes(0, 1)

for j in range(y_dim):
y_train_leaves[i, leaf_idx, j, : len(y_indices[j])] = y_indices[j]
Expand Down
24 changes: 14 additions & 10 deletions quantile_forest/tests/test_quantile_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
generate_unsampled_indices,
)

np.random.seed(0)

rng = check_random_state(0)

# Load the California Housing Prices dataset.
Expand Down Expand Up @@ -448,27 +450,29 @@ def check_predict_quantiles(
assert_allclose(y_pred_1, y_pred_2)

# Check multi-target outputs.
X = np.arange(20).reshape(2, 10).T
y = np.arange(30).reshape(3, 10).T
X = np.linspace(-1, 0.3, 500)
y = np.empty((len(X), 2))
y[:, 0] = (X**3) + 3 * np.exp(-6 * (X - 0.3) ** 2)
y[:, 0] += np.random.normal(0, 0.2 * np.abs(X), len(X))
y[:, 1] = np.log1p(X + 1)
y[:, 1] += np.log1p(X + 1) * np.random.uniform(size=len(X))

est = ForestRegressor(n_estimators=1, max_samples_leaf=max_samples_leaf, random_state=0)
est.fit(X, y)
est.fit(X.reshape(-1, 1), y)

with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
y_pred = est.predict(
X,
X.reshape(-1, 1),
quantiles=quantiles,
weighted_quantile=weighted_quantile,
aggregate_leaves_first=aggregate_leaves_first,
)
score = est.score(X, y, quantiles=0.5)
score = est.score(X.reshape(-1, 1), y, quantiles=0.5)
assert y_pred.ndim == (3 if isinstance(quantiles, list) else 2)
assert y_pred.shape[-1] == y.shape[1]
assert np.any(y_pred[..., 0] != y_pred[..., 1])
assert np.any(y_pred[..., 1] != y_pred[..., 2])
assert score > 0.9

est = ForestRegressor(n_estimators=1, max_samples_leaf=max_samples_leaf, random_state=0)
est.fit(X_train, y_train)
assert score > 0.97

# Check that specifying `quantiles` overwrites `default_quantiles`.
est1 = ForestRegressor(n_estimators=1, max_samples_leaf=max_samples_leaf, random_state=0)
Expand Down

0 comments on commit 3821294

Please sign in to comment.