Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kernels working on a given set of features #476

Merged
merged 45 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ac7de64
kernels working on a given set of features
e-dorigatti Dec 4, 2024
a806dc0
pre-commit
e-dorigatti Dec 4, 2024
c3f1899
test map singletaskgp with additive kernel
R-M-Lee Dec 4, 2024
86b6ad6
test active_dims of mapped kernels
R-M-Lee Dec 4, 2024
b4458fb
add features_to_idx_mapper to outlier detection tutorial
R-M-Lee Dec 4, 2024
d243ad0
correctly handling categorical mol features
e-dorigatti Dec 4, 2024
842797f
validating mol features transforms
e-dorigatti Dec 4, 2024
22c4382
verifying proper type
e-dorigatti Dec 6, 2024
17d8350
custom hamming kernel enabling single task gp on categorical features
e-dorigatti Dec 19, 2024
6ad1dfd
removed unnecessary parameter from data model
e-dorigatti Dec 19, 2024
4a2a547
testing equivalence of mixed gp and single gp with custom kernel
e-dorigatti Dec 19, 2024
3750827
(temporary) running on all py versions
e-dorigatti Dec 19, 2024
7162983
(temporary) debug github actions by printing
e-dorigatti Dec 19, 2024
01a01e1
more printing
e-dorigatti Dec 19, 2024
1cd2776
Revert "testing equivalence of mixed gp and single gp with custom ker…
e-dorigatti Dec 19, 2024
8400fdb
Revert "removed unnecessary parameter from data model"
e-dorigatti Dec 19, 2024
2e29852
Revert "custom hamming kernel enabling single task gp on categorical …
e-dorigatti Dec 19, 2024
7e455b7
Revert "Revert "custom hamming kernel enabling single task gp on cate…
e-dorigatti Dec 19, 2024
25f947b
Revert "Revert "testing equivalence of mixed gp and single gp with cu…
e-dorigatti Dec 19, 2024
2c145b6
removed test debug and restored to latest implemented features
e-dorigatti Dec 19, 2024
30dd123
pinning compatible version of formulaic
e-dorigatti Dec 19, 2024
065824f
Merge branch 'main' into 474-kernels-on-feature-subsets
e-dorigatti Dec 19, 2024
b53d3bb
pinning compatible version of formulaic
e-dorigatti Dec 19, 2024
8d47cbd
removed old code
e-dorigatti Dec 19, 2024
ce38428
lint
e-dorigatti Dec 19, 2024
16bdc1f
removed scratch file
e-dorigatti Dec 19, 2024
e306d16
removed old code again
e-dorigatti Dec 19, 2024
9d5dfc6
silencing pyright false positive
e-dorigatti Dec 19, 2024
62ba2c2
compatibility with py39
e-dorigatti Dec 19, 2024
d2c1f5d
pin compatible version of formulaic
e-dorigatti Dec 19, 2024
966bf8b
restored old code
e-dorigatti Dec 19, 2024
231f9f6
pinning sklearn
e-dorigatti Dec 19, 2024
6a7c9d7
pinning sklearn
e-dorigatti Dec 19, 2024
6576547
pinning scikit everywhere
e-dorigatti Dec 19, 2024
3e79e31
Merge branch '488-tests-failing-with-formulaic=11' into 474-kernels-o…
e-dorigatti Dec 19, 2024
e70cc16
not testing for prediction quality
e-dorigatti Dec 20, 2024
54b3c7f
matching lengthscale constraints in hamming kernel
e-dorigatti Dec 20, 2024
9b32536
removed equivalence test
e-dorigatti Dec 20, 2024
831a03e
testing hamming kernel
e-dorigatti Dec 20, 2024
561ac20
added test for mol features in single task gp
e-dorigatti Jan 13, 2025
1867e7b
categorical onehot kernel uses the right lengthscale for multiple fea…
e-dorigatti Jan 13, 2025
f30ed6d
removed redundant check
e-dorigatti Jan 13, 2025
7afcd7c
more descriptive name for base kernel
e-dorigatti Jan 13, 2025
d6e2957
updated docstring
e-dorigatti Jan 13, 2025
16d831c
improved tests and comments
e-dorigatti Jan 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bofire/data_models/domain/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ def _validate_transform_specs(
raise ValueError(
f"Forbidden transform type for feature with key {key}",
)

return specs

def get_bounds(
Expand Down
8 changes: 4 additions & 4 deletions bofire/data_models/kernels/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

from bofire.data_models.kernels.categorical import HammingDistanceKernel
from bofire.data_models.kernels.continuous import LinearKernel, MaternKernel, RBFKernel
from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import AggregationKernel
from bofire.data_models.kernels.molecular import TanimotoKernel
from bofire.data_models.kernels.shape import WassersteinKernel
from bofire.data_models.priors.api import AnyGeneralPrior


class AdditiveKernel(Kernel):
class AdditiveKernel(AggregationKernel):
type: Literal["AdditiveKernel"] = "AdditiveKernel"
kernels: Sequence[
Union[
Expand All @@ -26,7 +26,7 @@ class AdditiveKernel(Kernel):
type: Literal["AdditiveKernel"] = "AdditiveKernel"


class MultiplicativeKernel(Kernel):
class MultiplicativeKernel(AggregationKernel):
type: Literal["MultiplicativeKernel"] = "MultiplicativeKernel"
kernels: Sequence[
Union[
Expand All @@ -42,7 +42,7 @@ class MultiplicativeKernel(Kernel):
]


class ScaleKernel(Kernel):
class ScaleKernel(AggregationKernel):
type: Literal["ScaleKernel"] = "ScaleKernel"
base_kernel: Union[
RBFKernel,
Expand Down
15 changes: 13 additions & 2 deletions bofire/data_models/kernels/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,23 @@
PolynomialKernel,
RBFKernel,
)
from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import (
AggregationKernel,
FeatureSpecificKernel,
Kernel,
)
from bofire.data_models.kernels.molecular import MolecularKernel, TanimotoKernel
from bofire.data_models.kernels.shape import WassersteinKernel


AbstractKernel = Union[Kernel, CategoricalKernel, ContinuousKernel, MolecularKernel]
AbstractKernel = Union[
Kernel,
CategoricalKernel,
ContinuousKernel,
MolecularKernel,
FeatureSpecificKernel,
AggregationKernel,
]

AnyContinuousKernel = Union[
MaternKernel,
Expand Down
4 changes: 2 additions & 2 deletions bofire/data_models/kernels/categorical.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Literal

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import FeatureSpecificKernel


class CategoricalKernel(Kernel):
class CategoricalKernel(FeatureSpecificKernel):
pass


Expand Down
9 changes: 5 additions & 4 deletions bofire/data_models/kernels/continuous.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Literal, Optional
from typing import List, Literal, Optional

from pydantic import PositiveInt, field_validator

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import FeatureSpecificKernel
from bofire.data_models.priors.api import AnyGeneralPrior, AnyPrior


class ContinuousKernel(Kernel):
class ContinuousKernel(FeatureSpecificKernel):
pass


Expand Down Expand Up @@ -40,6 +40,7 @@ class PolynomialKernel(ContinuousKernel):
power: int = 2


class InfiniteWidthBNNKernel(Kernel):
class InfiniteWidthBNNKernel(ContinuousKernel):
features: Optional[List[str]] = None
type: Literal["InfiniteWidthBNNKernel"] = "InfiniteWidthBNNKernel"
depth: PositiveInt = 3
10 changes: 10 additions & 0 deletions bofire/data_models/kernels/kernel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from typing import List, Optional

from bofire.data_models.base import BaseModel


class Kernel(BaseModel):
type: str


class AggregationKernel(Kernel):
pass


class FeatureSpecificKernel(Kernel):
features: Optional[List[str]] = None
4 changes: 2 additions & 2 deletions bofire/data_models/kernels/molecular.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Literal

from bofire.data_models.kernels.kernel import Kernel
from bofire.data_models.kernels.kernel import FeatureSpecificKernel


class MolecularKernel(Kernel):
class MolecularKernel(FeatureSpecificKernel):
pass


Expand Down
70 changes: 70 additions & 0 deletions bofire/kernels/categorical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from typing import Dict

import torch
from botorch.models.transforms.input import OneHotToNumeric
from gpytorch.kernels.kernel import Kernel
from torch import Tensor


class HammingKernelWithOneHots(Kernel):
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved
r"""
A Kernel for one-hot enocded categorical features. The inputs
may contain more than one categorical feature.

This kernel mimics the functionality of CategoricalKernel from
botorch, but assumes categorical features encoded as one-hot variables.
Computes `exp(-dist(x1, x2) / lengthscale)`, where
`dist(x1, x2)` is zero if `x1` and `x2` correspond to the
same category, and one otherwise. If the last dimension
is not a batch dimension, then the mean is considered.

Note: This kernel is NOT differentiable w.r.t. the inputs.
"""

has_lengthscale = True

def __init__(self, categorical_features: Dict[int, int], *args, **kwargs):
"""
Initialize.

Args:
categorical_features: A dictionary mapping the starting index of each
categorical feature to its cardinality. This assumes that categoricals
are one-hot encoded.
*args, **kwargs: Passed to gpytorch.kernels.kernel.Kernel.__init__
"""
super().__init__(*args, **kwargs)

onehot_dim = sum(categorical_features.values())
self.trx = OneHotToNumeric(
onehot_dim, categorical_features=categorical_features
)

def forward(
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved
self,
x1: Tensor,
x2: Tensor,
diag: bool = False,
last_dim_is_batch: bool = False,
**params,
) -> Tensor:
x1 = self.trx(x1)
x2 = self.trx(x2)

delta = x1.unsqueeze(-2) != x2.unsqueeze(-3)
if self.ard_num_dims is not None:
e-dorigatti marked this conversation as resolved.
Show resolved Hide resolved
# botorch forces ard_num_dims to be the same as the total size of the of one-hot encoded features
# however here we just need one length scale per categorical feature
ls = self.lengthscale[..., : delta.shape[-1]]
else:
ls = self.lengthscale

dists = delta / ls.unsqueeze(-2)
if last_dim_is_batch:
dists = dists.transpose(-3, -1)
else:
dists = dists.mean(-1)
res = torch.exp(-dists)
if diag:
res = torch.diagonal(res, dim1=-1, dim2=-2)
return res
Loading
Loading