experimental-design · jduerholt · Feb 27, 2024 · Sep 22, 2023 · Sep 22, 2023 · Sep 22, 2023
diff --git a/bofire/benchmarks/single.py b/bofire/benchmarks/single.py
@@ -175,18 +175,20 @@ def __init__(self, dim: int = 6, allowed_k: Optional[int] = None, **kwargs) -> N
             outputs=Outputs(
                 features=[ContinuousOutput(key="y", objective=MinimizeObjective())]
             ),
-            constraints=Constraints(
-                constraints=[
-                    NChooseKConstraint(
-                        features=[f"x_{i}" for i in range(dim)],
-                        min_count=0,
-                        max_count=allowed_k,
-                        none_also_valid=True,
-                    )
-                ]
-            )
-            if allowed_k
-            else Constraints(),
+            constraints=(
+                Constraints(
+                    constraints=[
+                        NChooseKConstraint(
+                            features=[f"x_{i}" for i in range(dim)],
+                            min_count=0,
+                            max_count=allowed_k,
+                            none_also_valid=True,
+                        )
+                    ]
+                )
+                if allowed_k
+                else Constraints()
+            ),
         )
         self._hartmann = botorch_hartmann(dim=dim)
 
@@ -227,21 +229,25 @@ def __init__(self, locality_factor: Optional[float] = None, **kwargs) -> None:
                         key="x_1",
                         bounds=(-5.0, 10),
                         local_relative_bounds=(
-                            0.5 * locality_factor,
-                            0.5 * locality_factor,
-                        )
-                        if locality_factor is not None
-                        else (math.inf, math.inf),
+                            (
+                                0.5 * locality_factor,
+                                0.5 * locality_factor,
+                            )
+                            if locality_factor is not None
+                            else (math.inf, math.inf)
+                        ),
                     ),
                     ContinuousInput(
                         key="x_2",
                         bounds=(0.0, 15.0),
                         local_relative_bounds=(
-                            1.5 * locality_factor,
-                            1.5 * locality_factor,
-                        )
-                        if locality_factor is not None
-                        else (math.inf, math.inf),
+                            (
+                                1.5 * locality_factor,
+                                1.5 * locality_factor,
+                            )
+                            if locality_factor is not None
+                            else (math.inf, math.inf)
+                        ),
                     ),
                 ]
             ),

diff --git a/bofire/data_models/domain/features.py b/bofire/data_models/domain/features.py
@@ -20,6 +20,7 @@
     CategoricalDescriptorInput,
     CategoricalInput,
     CategoricalMolecularInput,
+    CategoricalOutput,
     ContinuousInput,
     ContinuousOutput,
     DiscreteInput,
@@ -29,7 +30,11 @@
 )
 from bofire.data_models.filters import filter_by_attribute, filter_by_class
 from bofire.data_models.molfeatures.api import MolFeatures
-from bofire.data_models.objectives.api import AbstractObjective, Objective
+from bofire.data_models.objectives.api import (
+    AbstractObjective,
+    CategoricalObjective,
+    Objective,
+)
 from bofire.data_models.types import TInputTransformSpecs
 
 FeatureSequence = Union[List[AnyFeature], Tuple[AnyFeature]]
@@ -565,9 +570,11 @@ def get_bounds(
             lo, up = feat.get_bounds(
                 transform_type=specs.get(feat.key),  # type: ignore
                 values=experiments[feat.key] if experiments is not None else None,
-                reference_value=reference_experiment[feat.key]  # type: ignore
-                if reference_experiment is not None
-                else None,
+                reference_value=(
+                    reference_experiment[feat.key]  # type: ignore
+                    if reference_experiment is not None
+                    else None
+                ),
             )
             lower += lo
             upper += up
@@ -635,8 +642,8 @@ def get_by_objective(
             return Outputs(
                 features=sorted(
                     filter_by_attribute(
-                        self.get(ContinuousOutput).features,
-                        lambda of: of.objective,  # type: ignore
+                        self.get([ContinuousOutput, CategoricalOutput]).features,
+                        lambda of: of.objective,
                         includes,
                         excludes,  # type: ignore
                         exact,
@@ -652,7 +659,9 @@ def get_keys_by_objective(
             Type[Objective],
         ] = Objective,
         excludes: Union[
-            List[Type[AbstractObjective]], Type[AbstractObjective], None
+            List[Type[AbstractObjective]],
+            Type[AbstractObjective],
+            None,
         ] = None,
         exact: bool = False,
     ) -> List[str]:
@@ -687,6 +696,16 @@ def __call__(
                 feat(experiments[f"{feat.key}_pred" if predictions else feat.key])  # type: ignore
                 for feat in self.features
                 if feat.objective is not None
+                and not isinstance(feat, CategoricalOutput)
+            ]
+            + [
+                (
+                    pd.Series(data=feat(experiments.filter(regex=f"{feat.key}(.*)_prob")), name=f"{feat.key}_pred")  # type: ignore
+                    if predictions
+                    else experiments[feat.key]
+                )
+                for feat in self.features
+                if feat.objective is not None and isinstance(feat, CategoricalOutput)
             ],
             axis=1,
         )
@@ -734,24 +753,24 @@ def validate_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
 
     def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
         # for each continuous output feature with an attached objective object
-        # ToDo: adjust it for the CategoricalOutput
-        cols = list(
+        continuous_cols = list(
             itertools.chain.from_iterable(
                 [
-                    [f"{key}_pred", f"{key}_sd", f"{key}_des"]
-                    for key in self.get_keys_by_objective(Objective)
+                    [f"{feat.key}_pred", f"{feat.key}_sd", f"{feat.key}_des"]
+                    for feat in self.get_by_objective(
+                        includes=Objective, excludes=CategoricalObjective
+                    )
                 ]
                 + [
                     [f"{key}_pred", f"{key}_sd"]
                     for key in self.get_keys_by_objective(
-                        excludes=Objective,
-                        includes=None,  # type: ignore
+                        excludes=Objective, includes=None  # type: ignore
                     )
                 ]
             )
         )
         # check that pred, sd, and des cols are specified and numerical
-        for col in cols:
+        for col in continuous_cols:
             if col not in candidates:
                 raise ValueError(f"missing column {col}")
             try:
@@ -762,6 +781,18 @@ def validate_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
                 raise ValueError(f"Not all values of column `{col}` are numerical.")
             if candidates[col].isnull().to_numpy().any():
                 raise ValueError(f"Nan values are present in {col}.")
+        # Looping over features allows to check categories objective wise
+        for feat in self.get(CategoricalOutput):
+            cols = [f"{feat.key}_pred", f"{feat.key}_des"]
+            for col in cols:
+                if col not in candidates:
+                    raise ValueError(f"missing column {col}")
+                if col == f"{feat.key}_pred":
+                    feat.validate_experimental(candidates[col])
+                else:
+                    # Check sd and desirability
+                    if candidates[col].isnull().to_numpy().any():
+                        raise ValueError(f"Nan values are present in {col}.")
         return candidates
 
     def preprocess_experiments_one_valid_output(

diff --git a/bofire/data_models/enum.py b/bofire/data_models/enum.py
@@ -28,6 +28,13 @@ class CategoricalEncodingEnum(Enum):
     DESCRIPTOR = "DESCRIPTOR"  # only possible for categorical with descriptors
 
 
+class ClassificationMetricsEnum(Enum):
+    """Enumeration class for classification metrics."""
+
+    ACCURACY = "ACCURACY"
+    F1 = "F1"
+
+
 class OutputFilteringEnum(Enum):
     ALL = "ALL"
     ANY = "ANY"

diff --git a/bofire/data_models/features/categorical.py b/bofire/data_models/features/categorical.py
@@ -1,12 +1,12 @@
-from typing import ClassVar, Dict, List, Literal, Optional, Tuple, Union
+from typing import Annotated, ClassVar, List, Literal, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
 from pydantic import Field, field_validator, model_validator
-from typing_extensions import Annotated
 
 from bofire.data_models.enum import CategoricalEncodingEnum
 from bofire.data_models.features.feature import _CAT_SEP, Input, Output, TTransform
+from bofire.data_models.objectives.api import AnyCategoricalObjective
 from bofire.data_models.types import TCategoryVals
 
 
@@ -336,19 +336,30 @@ class CategoricalOutput(Output):
     order_id: ClassVar[int] = 9
 
     categories: TCategoryVals
-    objective: Annotated[List[Annotated[float, Field(ge=0, le=1)]], Field(min_length=2)]
+    objective: AnyCategoricalObjective
 
-    @field_validator("objective")
-    @classmethod
-    def validate_objective(cls, objective, info):
-        if len(objective) != len(info.data["categories"]):
-            raise ValueError("Length of objectives and categories do not match.")
-        for o in objective:
-            if o > 1:
-                raise ValueError("Objective values has to be smaller equal than 1.")
-            if o < 0:
-                raise ValueError("Objective values has to be larger equal than zero")
-        return objective
+    @model_validator(mode="after")
+    def validate_objective_categories(self):
+        """validates that objective categories match the output categories
+
+        Raises:
+            ValueError: when categories do not match objective categories
+
+        Returns:
+            self
+        """
+        if self.objective.categories != self.categories:  # type: ignore
+            raise ValueError("categories must match to objective categories")
+        return self
+
+    def __call__(self, values: pd.Series) -> pd.Series:
+        if self.objective is None:
+            return pd.Series(
+                data=[np.nan for _ in range(len(values))],
+                index=values.index,
+                name=values.name,
+            )
+        return self.objective(values)  # type: ignore
 
     def validate_experimental(self, values: pd.Series) -> pd.Series:
         values = values.map(str)
@@ -358,9 +369,5 @@ def validate_experimental(self, values: pd.Series) -> pd.Series:
             )
         return values
 
-    def to_dict(self) -> Dict:
-        """Returns the catergories and corresponding objective values as dictionary"""
-        return dict(zip(self.categories, self.objective))
-
-    def __call__(self, values: pd.Series) -> pd.Series:
-        return values.map(self.to_dict()).astype(float)
+    def __str__(self) -> str:
+        return "CategoricalOutputFeature"
diff --git a/bofire/data_models/features/molecular.py b/bofire/data_models/features/molecular.py
@@ -163,9 +163,11 @@ def get_bounds(
             # else we return the complete bounds
             data = self.to_descriptor_encoding(
                 transform_type=transform_type,
-                values=pd.Series(self.get_allowed_categories())
-                if values is None
-                else pd.Series(self.categories),
+                values=(
+                    pd.Series(self.get_allowed_categories())
+                    if values is None
+                    else pd.Series(self.categories)
+                ),
             )
         lower = data.min(axis=0).values.tolist()
         upper = data.max(axis=0).values.tolist()

diff --git a/bofire/data_models/objectives/api.py b/bofire/data_models/objectives/api.py
@@ -1,5 +1,9 @@
 from typing import Union
 
+from bofire.data_models.objectives.categorical import (
+    CategoricalObjective,
+    ConstrainedCategoricalObjective,
+)
 from bofire.data_models.objectives.identity import (
     IdentityObjective,
     MaximizeObjective,
@@ -22,8 +26,11 @@
     IdentityObjective,
     SigmoidObjective,
     ConstrainedObjective,
+    CategoricalObjective,
 ]
 
+AnyCategoricalObjective = ConstrainedCategoricalObjective
+
 AnyConstraintObjective = Union[
     MaximizeSigmoidObjective,
     MinimizeSigmoidObjective,
@@ -39,4 +46,5 @@
     MinimizeSigmoidObjective,
     TargetObjective,
     CloseToTargetObjective,
+    ConstrainedCategoricalObjective,
 ]