Skip to content

Commit

Permalink
Refactor API: ComposerRequirements and ComposerBuilder (#852)
Browse files Browse the repository at this point in the history
* minor refactorings 1

* Simplify handling of ``init_cache`` in ApiComposer

* Better logging string for best models in archive (add metric names)

* Reorganize parameters:

n_jobs, start_depth, show_progress:
 PipelineComposerRequirements -> ComposerRequirements
duplicated max_pipeline_fit_time: drop from pipeline_composer_requirements.py

* Reorganize parameters between ComposerRequirements & GraphOptimizerParams

Move most of GraphOptimizerParams into ComposerRequirements.
Make the former a dataclass

* Rename with_auto_depth_configuration -> adaptive_depth

* Renames:

with_auto_depth_configuration -> adaptive_depth
depth_increase_step -> adaptive_depth_max_stagnation

* Renames:

stopping_after_n_generation -> early_stopping_generations

* Move graph depth/arity fields to pipeline_composer_requirements.py

* Add general non-negative check for pipeline values

* Minor fixes renames import

* Drop dupl field in requirements

* Fix postinit of default selection operator in GraphOptimizerParameters

* Move infrastructure-related options from pipeline req-s to composer_requirements.py

* Do not overwrite optimizer_params.multi_objective

* WIP

* Remove unused init in OptNodeFactory. Add signatures.

* WIP move adaptive depth to ComposerRequirements

* WIP move offspring_rate to basic requirements

* Revert multi_objective fix in composer_builder.py

* Separate infrastructural & algorithm options

* WIP: remove is_multi_objective from GraphOptimizerParameters.

Determine is automatically based on the number of metrics in ComposerBuilder.
Or, in the case of manual setup, in Objective class.

* Simplify usages of ComposerBuilder given changes in interface

Drop meaningless tests for ComposerBuilder

* Setup seed() data fixture in compoesr tests

* Fix boosting mutation fix after rebase

* fixup rebase

* fix pep8 issues

* fix review comments (renames, docstrings)

* pep8 fixes

* minor comment changes

* fix docstrings & move primary/secondary

* WIP move optimizer-related params to GrpahOptimizerParameters

* WIP

* Make hyperparameters static dict

* Fix update of requirements & gp parameters in evo operators

* fix n_jobs in obj eval

* Move self.log to Operator base

* Move geneti parameters to GPGraphOptimizerParameters

* Minor rearranges

* Fix tests & examples due to parameter rearrangement

* Remove build of GraphOptimizerParameters from composer_builder.py

* Fix incorrectly passed optimizer params in parameter_change_mutation

* Rebase fixes

* pep8 import fixes

* pep8 import fixes

* fix GraphOptimizerParameters import

* Fix elisitm operator with incorrent sorting direction

* Revert elitism fix

* Set `with_history` in ComposerBuilder by default

* Move additional params from mutation operator to GraphOptParams
  • Loading branch information
gkirgizov authored Sep 13, 2022
1 parent 3db5464 commit fd2daed
Show file tree
Hide file tree
Showing 49 changed files with 871 additions and 950 deletions.
33 changes: 16 additions & 17 deletions cases/credit_scoring/credit_scoring_problem_multiobj.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from cases.credit_scoring.credit_scoring_problem import get_scoring_data
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.data.data import InputData
from fedot.core.optimisers.gp_comp.gp_optimizer import GPGraphOptimizerParameters, GeneticSchemeTypesEnum
from fedot.core.optimisers.gp_comp.gp_optimizer import GeneticSchemeTypesEnum
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.operators.selection import SelectionTypesEnum
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.pipeline import Pipeline
Expand Down Expand Up @@ -55,22 +56,20 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
# the choice and initialisation of the GP search
composer_requirements = PipelineComposerRequirements(
primary=available_model_types,
secondary=available_model_types, max_arity=3,
max_depth=3, pop_size=20, num_of_generations=20,
crossover_prob=0.8, mutation_prob=0.8, timeout=timeout,
start_depth=2)

# GP optimiser parameters choice
scheme_type = GeneticSchemeTypesEnum.parameter_free
optimiser_parameters = GPGraphOptimizerParameters(genetic_scheme_type=scheme_type,
selection_types=[SelectionTypesEnum.spea2])

# Create builder for composer and set composer params
builder = ComposerBuilder(task=task).with_requirements(composer_requirements).with_metrics(
metrics).with_optimiser_params(parameters=optimiser_parameters)

# Create GP-based composer
composer = builder.build()
secondary=available_model_types,
timeout=timeout
)
params = GPGraphOptimizerParameters(
selection_types=[SelectionTypesEnum.spea2],
genetic_scheme_type=GeneticSchemeTypesEnum.parameter_free,
)

# Create composer and with required composer params
composer = ComposerBuilder(task=task). \
with_optimizer_params(params). \
with_requirements(composer_requirements). \
with_metrics(metrics). \
build()

# the optimal pipeline generation by composition - the most time-consuming task
pipelines_evo_composed = composer.compose_pipeline(data=dataset_to_compose)
Expand Down
30 changes: 18 additions & 12 deletions cases/river_levels_prediction/river_level_case_composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.node import PrimaryNode, SecondaryNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.pipelines.tuning.unified import PipelineTuner
from fedot.core.repository.quality_metrics_repository import \
MetricsRepository, RegressionMetricsEnum
RegressionMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum

warnings.filterwarnings('ignore')
Expand Down Expand Up @@ -81,7 +82,7 @@ def run_river_composer_experiment(file_path, init_pipeline, file_to_save,
'pca', 'ransac_non_lin_reg',
'rfe_non_lin_reg', 'normalization']
available_primary_operations = ['one_hot_encoding']

metric_function = RegressionMetricsEnum.MAE
# Report arrays
obtained_pipelines = []
depths = []
Expand All @@ -91,17 +92,22 @@ def run_river_composer_experiment(file_path, init_pipeline, file_to_save,

composer_requirements = PipelineComposerRequirements(
primary=available_primary_operations,
secondary=available_secondary_operations, max_arity=3,
max_depth=8, pop_size=10, num_of_generations=5,
crossover_prob=0.8, mutation_prob=0.8,
timeout=datetime.timedelta(minutes=5))

metric_function = MetricsRepository().metric_by_id(
RegressionMetricsEnum.MAE)
builder = ComposerBuilder(task=data.task). \
secondary=available_secondary_operations,
max_arity=3, max_depth=8,
num_of_generations=5,
timeout=datetime.timedelta(minutes=5)
)

optimizer_parameters = GPGraphOptimizerParameters(
pop_size=10, mutation_prob=0.8, crossover_prob=0.8,
)

composer = ComposerBuilder(task=data.task). \
with_requirements(composer_requirements). \
with_metrics(metric_function).with_initial_pipelines([init_pipeline])
composer = builder.build()
with_optimizer_params(optimizer_parameters). \
with_metrics(metric_function). \
with_initial_pipelines([init_pipeline]). \
build()

obtained_pipeline = composer.compose_pipeline(data=train_input)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from fedot.core.optimisers.adapters import DirectAdapter
from fedot.core.optimisers.gp_comp.gp_optimizer import (
EvoGraphOptimizer,
GeneticSchemeTypesEnum,
GPGraphOptimizerParameters
GeneticSchemeTypesEnum
)
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.optimisers.gp_comp.operators.crossover import CrossoverTypesEnum
from fedot.core.optimisers.gp_comp.operators.regularization import RegularizationTypesEnum
Expand Down Expand Up @@ -84,11 +84,16 @@ def run_custom_example(timeout: datetime.timedelta = None):

requirements = PipelineComposerRequirements(
primary=nodes_types,
secondary=nodes_types, max_arity=10,
max_depth=10, pop_size=5, num_of_generations=5,
crossover_prob=0.8, mutation_prob=0.9, timeout=timeout)
secondary=nodes_types,
max_arity=10,
max_depth=10,
num_of_generations=5,
timeout=timeout
)

optimiser_parameters = GPGraphOptimizerParameters(
pop_size=5,
crossover_prob=0.8, mutation_prob=0.9,
genetic_scheme_type=GeneticSchemeTypesEnum.steady_state,
mutation_types=[custom_mutation],
crossover_types=[CrossoverTypesEnum.none],
Expand Down
35 changes: 19 additions & 16 deletions examples/advanced/pipeline_sensitivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.optimisers.gp_comp.gp_optimizer import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.operators.inheritance import GeneticSchemeTypesEnum
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.repository.operation_types_repository import get_operations_for_task
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, MetricsRepository, \
RegressionMetricsEnum
Expand All @@ -25,20 +25,23 @@ def get_composed_pipeline(dataset_to_compose, task, metric_function):
# the choice and initialisation of the GP search
composer_requirements = PipelineComposerRequirements(
primary=available_model_types,
secondary=available_model_types, max_arity=3,
max_depth=3, pop_size=20, num_of_generations=20,
crossover_prob=0.8, mutation_prob=0.8)

# GP optimiser parameters choice
scheme_type = GeneticSchemeTypesEnum.steady_state
optimiser_parameters = GPGraphOptimizerParameters(genetic_scheme_type=scheme_type)

# Create builder for composer and set composer params
builder = ComposerBuilder(task=task).with_requirements(composer_requirements).with_metrics(
metric_function).with_optimiser_params(parameters=optimiser_parameters)

# Create GP-based composer
composer = builder.build()
secondary=available_model_types,
max_arity=3, max_depth=3,
num_of_generations=20,
)

optimizer_parameters = GPGraphOptimizerParameters(
pop_size=15,
mutation_prob=0.8, crossover_prob=0.8,
genetic_scheme_type=GeneticSchemeTypesEnum.steady_state,
)

# Create composer and with required composer params
composer = ComposerBuilder(task=task). \
with_requirements(composer_requirements). \
with_optimizer_params(optimizer_parameters). \
with_metrics(metric_function). \
build()

# the optimal pipeline generation by composition - the most time-consuming task
pipeline_evo_composed = composer.compose_pipeline(data=dataset_to_compose)
Expand Down
30 changes: 14 additions & 16 deletions examples/advanced/sensitivity_analysis/pipelines_access.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.optimisers.gp_comp.gp_optimizer import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.operators.inheritance import GeneticSchemeTypesEnum
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.node import PrimaryNode, SecondaryNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.repository.operation_types_repository import get_operations_for_task
Expand Down Expand Up @@ -43,20 +43,18 @@ def get_composed_pipeline(dataset_to_compose, task, metric_function):
# the choice and initialisation of the GP search
composer_requirements = PipelineComposerRequirements(
primary=available_model_types,
secondary=available_model_types, max_arity=3,
max_depth=3, pop_size=20, num_of_generations=20,
crossover_prob=0.8, mutation_prob=0.8)

# GP optimiser parameters choice
scheme_type = GeneticSchemeTypesEnum.steady_state
optimiser_parameters = GPGraphOptimizerParameters(genetic_scheme_type=scheme_type)

# Create builder for composer and set composer params
builder = ComposerBuilder(task=task).with_requirements(composer_requirements).with_metrics(
metric_function).with_optimiser_params(parameters=optimiser_parameters)

# Create GP-based composer
composer = builder.build()
secondary=available_model_types,
)
params = GPGraphOptimizerParameters(
genetic_scheme_type=GeneticSchemeTypesEnum.steady_state
)

# Create composer and with required composer params
composer = ComposerBuilder(task=task). \
with_requirements(composer_requirements). \
with_optimizer_params(params). \
with_metrics(metric_function). \
build()

# the optimal pipeline generation by composition - the most time-consuming task
pipeline_evo_composed = composer.compose_pipeline(data=dataset_to_compose)
Expand Down
40 changes: 22 additions & 18 deletions examples/advanced/time_series_forecasting/composing_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

from examples.simple.time_series_forecasting.ts_pipelines import *
from examples.simple.time_series_forecasting.ts_pipelines import ts_complex_ridge_pipeline
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.optimisers.gp_comp.gp_optimizer import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.operators.mutation import MutationTypesEnum
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.quality_metrics_repository import \
MetricsRepository, RegressionMetricsEnum
RegressionMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from fedot.core.utils import fedot_project_root

Expand Down Expand Up @@ -90,23 +90,27 @@ def run_composing(dataset: str, pipeline: Pipeline, len_forecast=250):
# Composer parameters
composer_requirements = PipelineComposerRequirements(
primary=primary_operations,
secondary=secondary_operations, max_arity=3,
max_depth=8, pop_size=10, num_of_generations=10,
crossover_prob=0.8, mutation_prob=0.8,
secondary=secondary_operations,
max_arity=3, max_depth=8,
num_of_generations=10,
timeout=datetime.timedelta(minutes=10),
cv_folds=2,
validation_blocks=2)

mutation_types = [parameter_change_mutation, MutationTypesEnum.growth, MutationTypesEnum.reduce,
MutationTypesEnum.simple]
optimiser_parameters = GPGraphOptimizerParameters(mutation_types=mutation_types)

metric_function = MetricsRepository().metric_by_id(RegressionMetricsEnum.RMSE)
builder = ComposerBuilder(task=task). \
with_optimiser_params(parameters=optimiser_parameters). \
validation_blocks=2
)
optimizer_parameters = GPGraphOptimizerParameters(
pop_size=10,
crossover_prob=0.8, mutation_prob=0.8,
mutation_types=[parameter_change_mutation,
MutationTypesEnum.growth,
MutationTypesEnum.reduce,
MutationTypesEnum.simple]
)
composer = ComposerBuilder(task). \
with_requirements(composer_requirements). \
with_metrics(metric_function).with_initial_pipelines([pipeline])
composer = builder.build()
with_optimizer_params(optimizer_parameters). \
with_metrics(RegressionMetricsEnum.RMSE). \
with_initial_pipelines([pipeline]). \
build()

obtained_pipeline = composer.compose_pipeline(data=train_data)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
from examples.simple.time_series_forecasting.ts_pipelines import ts_complex_ridge_smoothing_pipeline
from fedot.core.composer.composer_builder import ComposerBuilder
from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation
from fedot.core.optimisers.gp_comp.gp_optimizer import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.gp_params import GPGraphOptimizerParameters
from fedot.core.optimisers.gp_comp.operators.mutation import MutationTypesEnum
from fedot.core.optimisers.gp_comp.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.pipelines.tuning.unified import PipelineTuner
from fedot.core.repository.quality_metrics_repository import \
MetricsRepository, RegressionMetricsEnum
RegressionMetricsEnum


def calculate_metrics(target, predicted):
Expand All @@ -36,21 +36,24 @@ def compose_pipeline(pipeline, train_data, task):
primary_operations, secondary_operations = get_available_operations()
composer_requirements = PipelineComposerRequirements(
primary=primary_operations,
secondary=secondary_operations, max_arity=3,
max_depth=5, pop_size=15, num_of_generations=30,
crossover_prob=0.8, mutation_prob=0.8,
secondary=secondary_operations,
max_arity=3, max_depth=5,
num_of_generations=30,
timeout=datetime.timedelta(minutes=10))
mutation_types = [parameter_change_mutation,
MutationTypesEnum.single_change,
MutationTypesEnum.single_drop,
MutationTypesEnum.single_add]
optimiser_parameters = GPGraphOptimizerParameters(mutation_types=mutation_types)
metric_function = MetricsRepository().metric_by_id(RegressionMetricsEnum.MAE)
builder = ComposerBuilder(task=task). \
with_optimiser_params(parameters=optimiser_parameters). \
optimizer_parameters = GPGraphOptimizerParameters(
pop_size=15,
mutation_prob=0.8, crossover_prob=0.8,
mutation_types=[parameter_change_mutation,
MutationTypesEnum.single_change,
MutationTypesEnum.single_drop,
MutationTypesEnum.single_add]
)
composer = ComposerBuilder(task=task). \
with_optimizer_params(optimizer_parameters). \
with_requirements(composer_requirements). \
with_metrics(metric_function).with_initial_pipelines([pipeline])
composer = builder.build()
with_metrics(RegressionMetricsEnum.MAE). \
with_initial_pipelines([pipeline]). \
build()
obtained_pipeline = composer.compose_pipeline(data=train_data)
obtained_pipeline.show()
return obtained_pipeline
Expand Down
Loading

0 comments on commit fd2daed

Please sign in to comment.