Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Classification surrogates #297

Merged
merged 38 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
11cc40a
Begin refactor
gmancino Sep 22, 2023
9b688e0
Update bug fixes
gmancino Sep 22, 2023
1e35bad
Merge branch 'main' into classification_surrogates
gmancino Sep 22, 2023
51ea59d
Update based on main
gmancino Sep 22, 2023
e1414c5
Start fixing constraint output checks
gmancino Sep 22, 2023
bb14774
Sync categories and objectives
gmancino Sep 28, 2023
acdbbf2
Add categorical objective
gmancino Oct 3, 2023
53e61da
Update validators, fix bugs, link categorical objectives and weights
gmancino Oct 5, 2023
89df3a5
Pre-merge commit
gmancino Dec 21, 2023
0b5f083
Merge remote-tracking branch 'origin/main' into classification_surrog…
gmancino Dec 21, 2023
b1f5147
Refactor classification surrogates
gmancino Dec 21, 2023
ecbd410
Merge remote-tracking branch 'origin/main' into classification_surrog…
gmancino Jan 17, 2024
9caecc7
Address previous PR issues
gmancino Jan 24, 2024
090dcfe
Initial test fixes
gmancino Jan 26, 2024
4f75694
Fix type changes and tutorials
gmancino Jan 26, 2024
dbeb6b8
Fix tests
gmancino Jan 30, 2024
c8aa1af
Merge remote-tracking branch 'origin/main' into classification_surrog…
gmancino Jan 30, 2024
2021d73
Update Tanimoto GP
gmancino Jan 30, 2024
20ec136
Fix black version
gmancino Jan 30, 2024
b8a99fc
Update DOE
gmancino Feb 1, 2024
d1f7b6b
Merge remote-tracking branch 'origin/main' into classification_surrog…
gmancino Feb 1, 2024
a2287d8
Fix type checks
gmancino Feb 1, 2024
6cb4676
More type fixes
gmancino Feb 1, 2024
93d5675
Fix MLP loss function issue
gmancino Feb 1, 2024
3ca1d34
Formatting
gmancino Feb 1, 2024
5ed04e9
Format MLP loss function
gmancino Feb 1, 2024
417fb0f
Type checking fix
gmancino Feb 1, 2024
dc7dac0
Start fixes
gmancino Feb 8, 2024
6a792e1
Merge remote-tracking branch 'origin/main' into classification_surrog…
gmancino Feb 8, 2024
3d2465e
Update PR to include type changes and tests
gmancino Feb 9, 2024
18d0f13
Type checking
gmancino Feb 12, 2024
c19411c
Fix constraint tests
gmancino Feb 12, 2024
8266240
Fix tests and update naming convention script
gmancino Feb 16, 2024
db06fe3
Merge remote-tracking branch 'origin/main' into classification_surrog…
gmancino Feb 16, 2024
e009509
Remove comments from MLP file
gmancino Feb 26, 2024
f025988
Fix tests
gmancino Feb 26, 2024
8dc5ee0
Fix MLP scalers
gmancino Feb 26, 2024
dd341d6
Remove CategoricalObjective
gmancino Feb 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 28 additions & 22 deletions bofire/benchmarks/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,18 +175,20 @@ def __init__(self, dim: int = 6, allowed_k: Optional[int] = None, **kwargs) -> N
outputs=Outputs(
features=[ContinuousOutput(key="y", objective=MinimizeObjective())]
),
constraints=Constraints(
constraints=[
NChooseKConstraint(
features=[f"x_{i}" for i in range(dim)],
min_count=0,
max_count=allowed_k,
none_also_valid=True,
)
]
)
if allowed_k
else Constraints(),
constraints=(
Constraints(
constraints=[
NChooseKConstraint(
features=[f"x_{i}" for i in range(dim)],
min_count=0,
max_count=allowed_k,
none_also_valid=True,
)
]
)
if allowed_k
else Constraints()
),
)
self._hartmann = botorch_hartmann(dim=dim)

Expand Down Expand Up @@ -227,21 +229,25 @@ def __init__(self, locality_factor: Optional[float] = None, **kwargs) -> None:
key="x_1",
bounds=(-5.0, 10),
local_relative_bounds=(
0.5 * locality_factor,
0.5 * locality_factor,
)
if locality_factor is not None
else (math.inf, math.inf),
(
0.5 * locality_factor,
0.5 * locality_factor,
)
if locality_factor is not None
else (math.inf, math.inf)
),
),
ContinuousInput(
key="x_2",
bounds=(0.0, 15.0),
local_relative_bounds=(
1.5 * locality_factor,
1.5 * locality_factor,
)
if locality_factor is not None
else (math.inf, math.inf),
(
1.5 * locality_factor,
1.5 * locality_factor,
)
if locality_factor is not None
else (math.inf, math.inf)
),
),
]
),
Expand Down
59 changes: 45 additions & 14 deletions bofire/data_models/domain/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
CategoricalDescriptorInput,
CategoricalInput,
CategoricalMolecularInput,
CategoricalOutput,
ContinuousInput,
ContinuousOutput,
DiscreteInput,
Expand All @@ -29,7 +30,11 @@
)
from bofire.data_models.filters import filter_by_attribute, filter_by_class
from bofire.data_models.molfeatures.api import MolFeatures
from bofire.data_models.objectives.api import AbstractObjective, Objective
from bofire.data_models.objectives.api import (
AbstractObjective,
CategoricalObjective,
Objective,
)
from bofire.data_models.types import TInputTransformSpecs

FeatureSequence = Union[List[AnyFeature], Tuple[AnyFeature]]
Expand Down Expand Up @@ -565,9 +570,11 @@ def get_bounds(
lo, up = feat.get_bounds(
transform_type=specs.get(feat.key), # type: ignore
values=experiments[feat.key] if experiments is not None else None,
reference_value=reference_experiment[feat.key] # type: ignore
if reference_experiment is not None
else None,
reference_value=(
reference_experiment[feat.key] # type: ignore
if reference_experiment is not None
else None
),
)
lower += lo
upper += up
Expand Down Expand Up @@ -635,8 +642,8 @@ def get_by_objective(
return Outputs(
features=sorted(
filter_by_attribute(
self.get(ContinuousOutput).features,
lambda of: of.objective, # type: ignore
self.get([ContinuousOutput, CategoricalOutput]).features,
jduerholt marked this conversation as resolved.
Show resolved Hide resolved
lambda of: of.objective,
includes,
excludes, # type: ignore
exact,
Expand All @@ -652,7 +659,9 @@ def get_keys_by_objective(
Type[Objective],
] = Objective,
excludes: Union[
List[Type[AbstractObjective]], Type[AbstractObjective], None
List[Type[AbstractObjective]],
Type[AbstractObjective],
None,
] = None,
exact: bool = False,
) -> List[str]:
Expand Down Expand Up @@ -687,6 +696,16 @@ def __call__(
feat(experiments[f"{feat.key}_pred" if predictions else feat.key]) # type: ignore
for feat in self.features
if feat.objective is not None
and not isinstance(feat, CategoricalOutput)
]
+ [
(
pd.Series(data=feat(experiments.filter(regex=f"{feat.key}(.*)_prob")), name=f"{feat.key}_pred") # type: ignore
if predictions
else experiments[feat.key]
)
for feat in self.features
jduerholt marked this conversation as resolved.
Show resolved Hide resolved
if feat.objective is not None and isinstance(feat, CategoricalOutput)
],
axis=1,
)
Expand Down Expand Up @@ -734,24 +753,24 @@ def validate_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:

def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
# for each continuous output feature with an attached objective object
# ToDo: adjust it for the CategoricalOutput
cols = list(
continuous_cols = list(
itertools.chain.from_iterable(
[
[f"{key}_pred", f"{key}_sd", f"{key}_des"]
for key in self.get_keys_by_objective(Objective)
[f"{feat.key}_pred", f"{feat.key}_sd", f"{feat.key}_des"]
for feat in self.get_by_objective(
includes=Objective, excludes=CategoricalObjective
)
]
+ [
[f"{key}_pred", f"{key}_sd"]
for key in self.get_keys_by_objective(
gmancino marked this conversation as resolved.
Show resolved Hide resolved
excludes=Objective,
includes=None, # type: ignore
excludes=Objective, includes=None # type: ignore
gmancino marked this conversation as resolved.
Show resolved Hide resolved
)
]
)
)
# check that pred, sd, and des cols are specified and numerical
for col in cols:
for col in continuous_cols:
if col not in candidates:
raise ValueError(f"missing column {col}")
try:
Expand All @@ -762,6 +781,18 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
raise ValueError(f"Not all values of column `{col}` are numerical.")
if candidates[col].isnull().to_numpy().any():
raise ValueError(f"Nan values are present in {col}.")
# Looping over features allows to check categories objective wise
for feat in self.get(CategoricalOutput):
jduerholt marked this conversation as resolved.
Show resolved Hide resolved
cols = [f"{feat.key}_pred", f"{feat.key}_des"]
for col in cols:
if col not in candidates:
raise ValueError(f"missing column {col}")
if col == f"{feat.key}_pred":
feat.validate_experimental(candidates[col])
else:
# Check sd and desirability
if candidates[col].isnull().to_numpy().any():
raise ValueError(f"Nan values are present in {col}.")
return candidates

def preprocess_experiments_one_valid_output(
Expand Down
7 changes: 7 additions & 0 deletions bofire/data_models/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ class CategoricalEncodingEnum(Enum):
DESCRIPTOR = "DESCRIPTOR" # only possible for categorical with descriptors


class ClassificationMetricsEnum(Enum):
"""Enumeration class for classification metrics."""

ACCURACY = "ACCURACY"
F1 = "F1"


class OutputFilteringEnum(Enum):
ALL = "ALL"
ANY = "ANY"
Expand Down
47 changes: 27 additions & 20 deletions bofire/data_models/features/categorical.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import ClassVar, Dict, List, Literal, Optional, Tuple, Union
from typing import Annotated, ClassVar, List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd
from pydantic import Field, field_validator, model_validator
from typing_extensions import Annotated

from bofire.data_models.enum import CategoricalEncodingEnum
from bofire.data_models.features.feature import _CAT_SEP, Input, Output, TTransform
from bofire.data_models.objectives.api import AnyCategoricalObjective
from bofire.data_models.types import TCategoryVals


Expand Down Expand Up @@ -336,19 +336,30 @@ class CategoricalOutput(Output):
order_id: ClassVar[int] = 9

categories: TCategoryVals
objective: Annotated[List[Annotated[float, Field(ge=0, le=1)]], Field(min_length=2)]
objective: AnyCategoricalObjective

@field_validator("objective")
@classmethod
def validate_objective(cls, objective, info):
if len(objective) != len(info.data["categories"]):
raise ValueError("Length of objectives and categories do not match.")
for o in objective:
if o > 1:
raise ValueError("Objective values has to be smaller equal than 1.")
if o < 0:
raise ValueError("Objective values has to be larger equal than zero")
return objective
@model_validator(mode="after")
def validate_objective_categories(self):
"""validates that objective categories match the output categories

Raises:
ValueError: when categories do not match objective categories

Returns:
self
"""
if self.objective.categories != self.categories: # type: ignore
raise ValueError("categories must match to objective categories")
return self
gmancino marked this conversation as resolved.
Show resolved Hide resolved

def __call__(self, values: pd.Series) -> pd.Series:
if self.objective is None:
return pd.Series(
data=[np.nan for _ in range(len(values))],
index=values.index,
gmancino marked this conversation as resolved.
Show resolved Hide resolved
name=values.name,
)
return self.objective(values) # type: ignore

def validate_experimental(self, values: pd.Series) -> pd.Series:
values = values.map(str)
Expand All @@ -358,9 +369,5 @@ def validate_experimental(self, values: pd.Series) -> pd.Series:
)
return values

def to_dict(self) -> Dict:
"""Returns the catergories and corresponding objective values as dictionary"""
return dict(zip(self.categories, self.objective))

def __call__(self, values: pd.Series) -> pd.Series:
return values.map(self.to_dict()).astype(float)
def __str__(self) -> str:
return "CategoricalOutputFeature"
8 changes: 5 additions & 3 deletions bofire/data_models/features/molecular.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,11 @@ def get_bounds(
# else we return the complete bounds
data = self.to_descriptor_encoding(
transform_type=transform_type,
values=pd.Series(self.get_allowed_categories())
if values is None
else pd.Series(self.categories),
values=(
pd.Series(self.get_allowed_categories())
if values is None
else pd.Series(self.categories)
),
)
lower = data.min(axis=0).values.tolist()
upper = data.max(axis=0).values.tolist()
Expand Down
8 changes: 8 additions & 0 deletions bofire/data_models/objectives/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from typing import Union

from bofire.data_models.objectives.categorical import (
CategoricalObjective,
ConstrainedCategoricalObjective,
)
from bofire.data_models.objectives.identity import (
IdentityObjective,
MaximizeObjective,
Expand All @@ -22,8 +26,11 @@
IdentityObjective,
SigmoidObjective,
ConstrainedObjective,
CategoricalObjective,
]

AnyCategoricalObjective = ConstrainedCategoricalObjective

AnyConstraintObjective = Union[
MaximizeSigmoidObjective,
MinimizeSigmoidObjective,
Expand All @@ -39,4 +46,5 @@
MinimizeSigmoidObjective,
TargetObjective,
CloseToTargetObjective,
ConstrainedCategoricalObjective,
]
Loading
Loading