Skip to content

Commit

Permalink
Add pandas data manipulation transform (#460)
Browse files Browse the repository at this point in the history
* add __len__ for dataframes

* add new transform

* add missing files

* fix 3.9 compatibility issue

* fix typing
  • Loading branch information
jduerholt authored Nov 18, 2024
1 parent 6318d0e commit 1b99e8a
Show file tree
Hide file tree
Showing 16 changed files with 269 additions and 31 deletions.
3 changes: 3 additions & 0 deletions bofire/data_models/dataframes/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ def from_pandas(row: pd.Series, domain: Domain) -> "CandidateRow":
class DataFrame(BaseModel, Generic[TRow]):
rows: Sequence[TRow]

def __len__(self):
return len(self.rows)

@field_validator("rows")
def validate_rows(cls, rows):
if len({tuple(sorted(row.input_keys)) for row in rows}) > 1:
Expand Down
6 changes: 5 additions & 1 deletion bofire/data_models/strategies/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@
)
from bofire.data_models.strategies.stepwise.stepwise import Step, StepwiseStrategy
from bofire.data_models.strategies.strategy import Strategy
from bofire.data_models.transforms.api import AnyTransform, DropDataTransform
from bofire.data_models.transforms.api import (
AnyTransform,
DropDataTransform,
ManipulateDataTransform,
)


AbstractStrategy = Union[
Expand Down
58 changes: 45 additions & 13 deletions bofire/data_models/strategies/stepwise/stepwise.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from typing import Annotated, List, Literal, Optional, Type

from pydantic import Field, field_validator
from pydantic import Field, model_validator

from bofire.data_models.base import BaseModel
from bofire.data_models.constraints.api import Constraint
from bofire.data_models.features.api import Feature
from bofire.data_models.domain.api import Domain
from bofire.data_models.features.api import CategoricalInput, CategoricalOutput, Feature
from bofire.data_models.strategies.actual_strategy_type import ActualStrategy
from bofire.data_models.strategies.stepwise.conditions import (
AlwaysTrueCondition,
Expand All @@ -21,23 +22,54 @@ class Step(BaseModel):
transform: Optional[AnyTransform] = None


class StepwiseStrategy(Strategy):
type: Literal["StepwiseStrategy"] = "StepwiseStrategy"
steps: Annotated[List[Step], Field(min_length=2)]
def validate_domain_compatibility(domain1: Domain, domain2: Domain):
"""Validates if two domains are compatible to each other.
@field_validator("steps")
@classmethod
def validate_steps(cls, v: List[Step], info):
for i, step in enumerate(v):
if step.strategy_data.domain != info.data["domain"]:
To be compatible it is necessary that they have the same number
of features, the same feature keys and that the
features with the same key have the same type and categories.
The bounds and allowed categories of the features can vary.
Args:
domain1 (Domain): First domain to be compared.
domain2 (Domain): Second domain to be compared.
Raises:
ValueError: If one of the the conditions mentioned above is not met.
"""
features1 = domain1.inputs + domain1.outputs
features2 = domain2.inputs + domain2.outputs
if len(features1) != len(features2):
raise ValueError("Domains have different number of features.")
if features1.get_keys() != features2.get_keys():
raise ValueError("Domains have different feature keys.")
for feature1, feature2 in zip(features1.get(), features2.get()):
if feature1.__class__ != feature2.__class__:
raise ValueError(f"Features with key {feature1.key} have different types.")
if isinstance(feature1, (CategoricalInput, CategoricalOutput)) and isinstance(
feature2, (CategoricalInput, CategoricalOutput)
):
if feature1.categories != feature2.categories:
raise ValueError(
f"Domain of step {i} is incompatible to domain of StepwiseStrategy.",
f"Features with key {feature1.key} have different categories."
)
if i < len(v) - 1 and isinstance(step.condition, AlwaysTrueCondition):


class StepwiseStrategy(Strategy):
type: Literal["StepwiseStrategy"] = "StepwiseStrategy" # type: ignore
steps: Annotated[List[Step], Field(min_length=2)]

@model_validator(mode="after")
def validate_steps(self):
for i, step in enumerate(self.steps):
validate_domain_compatibility(self.domain, step.strategy_data.domain)
if i < len(self.steps) - 1 and isinstance(
step.condition, AlwaysTrueCondition
):
raise ValueError(
"`AlwaysTrueCondition` is only allowed for the last step.",
)
return v
return self

@classmethod
def is_feature_implemented(cls, my_type: Type[Feature]) -> bool:
Expand Down
5 changes: 4 additions & 1 deletion bofire/data_models/transforms/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import Union

from bofire.data_models.transforms.drop_data import DropDataTransform
from bofire.data_models.transforms.manipulate_data import ManipulateDataTransform


AnyTransform = DropDataTransform
AnyTransform = Union[DropDataTransform, ManipulateDataTransform]
4 changes: 2 additions & 2 deletions bofire/data_models/transforms/drop_data.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import List, Literal, Optional

from pydantic import BaseModel
from bofire.data_models.transforms.transform import Transform


class DropDataTransform(BaseModel):
class DropDataTransform(Transform):
type: Literal["DropDataTransform"] = "DropDataTransform"
to_be_removed_experiments: Optional[List[int]] = None
to_be_removed_candidates: Optional[List[int]] = None
38 changes: 38 additions & 0 deletions bofire/data_models/transforms/manipulate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import List, Literal, Optional

from pydantic import Field, model_validator

from bofire.data_models.transforms.transform import Transform


class ManipulateDataTransform(Transform):
"""Transform that can be used to manipulate experiments/candidates by applying pandas based transformations
as described here: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.eval.html#pandas.DataFrame.eval
Attributes:
experiment_transformations: List of strings representing the transformations to be applied to the experiments
candidate_transformations: List of strings representing the transformations to be applied to the candidates
candidate_untransformations: List of strings representing the transformations to be applied to untransform the
generated candidates
"""

type: Literal["ManipulateDataTransform"] = "ManipulateDataTransform"
experiment_transforms: Optional[List[str]] = Field(None, min_length=1)
candidate_transforms: Optional[List[str]] = Field(None, min_length=1)
candidate_untransforms: Optional[List[str]] = Field(None, min_length=1)

@model_validator(mode="after")
def validate_transformations(self):
if not any(
[
self.experiment_transforms,
self.candidate_transforms,
self.candidate_untransforms,
]
):
raise ValueError(
"At least one of experiment_transforms, candidate_transforms, or candidate_untransforms must be provided."
)

return self
7 changes: 7 additions & 0 deletions bofire/data_models/transforms/transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from typing import Any

from bofire.data_models.base import BaseModel


class Transform(BaseModel):
type: Any
7 changes: 3 additions & 4 deletions bofire/strategies/stepwise/stepwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@ def _ask(self, candidate_count: Optional[PositiveInt]) -> pd.DataFrame: # type:

candidate_count = candidate_count or 1

# handle a possible transform
tf_domain = _apply_tf(self.domain, transform, "domain")
transformed_domain = tf_domain or self.domain
strategy.domain = transformed_domain
# handle a possible transform, no need to apply transforms to domains, as domains
# do not have to be exactly the same for each step, they only have to be compatible
# to the master domain of the stepwise strategy
tf_exp = _apply_tf(self.experiments, transform, "experiments")
transformed_experiments = self.experiments if tf_exp is None else tf_exp
tf_cand = _apply_tf(self.candidates, transform, "candidates")
Expand Down
2 changes: 2 additions & 0 deletions bofire/transforms/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import bofire.data_models.transforms.api as data_models
from bofire.transforms.drop_data import DropDataTransform
from bofire.transforms.manipulate_data import ManipulateDataTransform
from bofire.transforms.transform import Transform


TRANSFORM_MAP: Dict[Type[data_models.AnyTransform], Type[Transform]] = {
data_models.DropDataTransform: DropDataTransform,
data_models.ManipulateDataTransform: ManipulateDataTransform,
}


Expand Down
31 changes: 31 additions & 0 deletions bofire/transforms/manipulate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import pandas as pd

from bofire.data_models.transforms.api import ManipulateDataTransform as DataModel
from bofire.transforms.transform import Transform


class ManipulateDataTransform(Transform):
def __init__(self, data_model: DataModel):
self.experiment_transforms = data_model.experiment_transforms or []
self.candidate_transforms = data_model.candidate_transforms or []
self.candidate_untransforms = data_model.candidate_untransforms or []

def _apply_pd_transforms(self, df: pd.DataFrame, transforms: list) -> pd.DataFrame:
if len(transforms) == 0:
return df
transformed_df = df.copy()
print(transformed_df)
for tr in transforms:
transformed_df.eval(tr, inplace=True)
print(transformed_df)

return transformed_df

def transform_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
return self._apply_pd_transforms(experiments, self.experiment_transforms)

def transform_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
return self._apply_pd_transforms(candidates, self.candidate_transforms)

def untransform_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
return self._apply_pd_transforms(candidates, self.candidate_untransforms)
9 changes: 2 additions & 7 deletions bofire/transforms/transform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import pandas as pd

from bofire.data_models.domain.api import Domain


class Transform:
def __init__(self, *_args, **_kwargs) -> None:
Expand All @@ -13,8 +11,5 @@ def transform_experiments(self, experiments: pd.DataFrame) -> pd.DataFrame:
def transform_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
return candidates

def transform_domain(self, domain: Domain) -> Domain:
return domain

def untransform_candidates(self, experiments: pd.DataFrame) -> pd.DataFrame:
return experiments
def untransform_candidates(self, candidates: pd.DataFrame) -> pd.DataFrame:
return candidates
2 changes: 2 additions & 0 deletions tests/bofire/data_models/dataframes/test_dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
def test_experiments_to_pandas():
experiments: Experiments = dataframe_spec.valid(Experiments).obj()
df_experiments = experiments.to_pandas()
assert len(df_experiments) == len(experiments)
domain = Domain.from_lists(
inputs=[
ContinuousInput(key="a", bounds=(0, 2)),
Expand All @@ -37,6 +38,7 @@ def test_experiments_to_pandas():
def test_candidates_to_pandas():
candidates: Candidates = dataframe_spec.valid(Candidates).obj()
df_candidates = candidates.to_pandas()
assert len(df_candidates) == len(candidates)
domain = Domain.from_lists(
inputs=[
ContinuousInput(key="a", bounds=(0, 2)),
Expand Down
18 changes: 17 additions & 1 deletion tests/bofire/data_models/specs/transforms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pydantic import ValidationError

from bofire.data_models.strategies.api import DropDataTransform
from bofire.data_models.transforms.api import DropDataTransform, ManipulateDataTransform
from tests.bofire.data_models.specs.specs import Specs


Expand Down Expand Up @@ -35,3 +35,19 @@
lambda: {"to_be_removed_exp": None, "to_be_removed_cand": None},
error=ValidationError,
)

specs.add_valid(
ManipulateDataTransform,
lambda: {"experiment_transforms": ["a=b+c"]},
)

specs.add_invalid(
ManipulateDataTransform,
lambda: {
"experiment_transforms": None,
"candidate_transforms": None,
"candidate_untransforms": None,
},
error=ValueError,
message="At least one of experiment_transforms, candidate_transforms, or candidate_untransforms must be provided.",
)
75 changes: 75 additions & 0 deletions tests/bofire/strategies/stepwise/test_manipulate_data_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from copy import deepcopy

from pandas.testing import assert_frame_equal, assert_series_equal

import bofire.strategies.api as strategies
import bofire.transforms.api as transforms
from bofire.benchmarks.api import Himmelblau
from bofire.data_models.strategies.predictives.sobo import SoboStrategy
from bofire.data_models.strategies.random import RandomStrategy
from bofire.data_models.strategies.stepwise.conditions import (
AlwaysTrueCondition,
NumberOfExperimentsCondition,
)
from bofire.data_models.strategies.stepwise.stepwise import Step, StepwiseStrategy
from bofire.data_models.transforms.api import ManipulateDataTransform


def test_dropdata_transform():
bench = Himmelblau()
candidates = bench.domain.inputs.sample(10)
experiments = bench.f(bench.domain.inputs.sample(10), return_complete=True)

transform_data = ManipulateDataTransform(
experiment_transforms=["x_1 = x_1 + 100", "x_2 = x_2 / 2.0"],
candidate_transforms=["x_1 = x_1 -20", "x_2 = x_2 / 2.0"],
candidate_untransforms=["x_1 = x_1 + 20", "x_2 = x_2 * 2.0"],
)

transform = transforms.map(transform_data)

transformed_experiments = transform.transform_experiments(experiments)
transformed_candidates = transform.transform_candidates(candidates)
untransformed_candidates = transform.untransform_candidates(transformed_candidates)

assert_series_equal(experiments.x_1 + 100, transformed_experiments.x_1)
assert_series_equal(experiments.x_2 / 2.0, transformed_experiments.x_2)

try:
assert_frame_equal(candidates, transformed_candidates)
except AssertionError:
pass

assert_frame_equal(candidates, untransformed_candidates)


def test_stepwise():
bench = Himmelblau()
candidates = bench.domain.inputs.sample(10)

transform_data = ManipulateDataTransform(
candidate_untransforms=["x_1 = x_1 + 200", "x_2 = x_2 - 200"],
)

domain = deepcopy(bench.domain)
domain.inputs.get_by_key("x_1").bounds = (-6, 300)
domain.inputs.get_by_key("x_2").bounds = (-300, 6)
strategy_data = StepwiseStrategy(
domain=domain,
steps=[
Step(
condition=NumberOfExperimentsCondition(n_experiments=5),
strategy_data=RandomStrategy(domain=bench.domain),
transform=transform_data,
),
Step(
condition=AlwaysTrueCondition(),
strategy_data=SoboStrategy(domain=bench.domain),
),
],
)

strategy = strategies.map(strategy_data)
candidates = strategy.ask(candidate_count=1)
assert all(candidates.x_1 >= 150)
assert all(candidates.x_2 <= -150)
Loading

0 comments on commit 1b99e8a

Please sign in to comment.