diff --git a/cases/credit_scoring/credit_scoring_problem_multiobj.py b/cases/credit_scoring/credit_scoring_problem_multiobj.py index dd2e81fa16..64ed3cf9d6 100644 --- a/cases/credit_scoring/credit_scoring_problem_multiobj.py +++ b/cases/credit_scoring/credit_scoring_problem_multiobj.py @@ -26,10 +26,10 @@ def results_visualization(history, composed_pipelines): - visualiser = OptHistoryExtraVisualizer() - visualiser.visualise_history(history) - visualiser.pareto_gif_create(history.archive_history, history.individuals) - visualiser.boxplots_gif_create(history.individuals) + visualiser = OptHistoryExtraVisualizer(history) + visualiser.visualise_history() + visualiser.pareto_gif_create() + visualiser.boxplots_gif_create() for pipeline_evo_composed in composed_pipelines: pipeline_evo_composed.show() diff --git a/cases/river_levels_prediction/river_level_case_composer.py b/cases/river_levels_prediction/river_level_case_composer.py index 0dd7697d7f..138dd1efc9 100644 --- a/cases/river_levels_prediction/river_level_case_composer.py +++ b/cases/river_levels_prediction/river_level_case_composer.py @@ -168,6 +168,6 @@ def run_river_composer_experiment(file_path, init_pipeline, file_to_save, # Available tuners for application: SimultaneousTuner, SequentialTuner run_river_composer_experiment(file_path='../data/river_levels/station_levels.csv', init_pipeline=init_pipeline, - file_to_save='data/river_levels/old_composer_new_preprocessing_report.csv', + file_to_save='../data/river_levels/old_composer_new_preprocessing_report.csv', iterations=20, tuner=SimultaneousTuner) diff --git a/docs/source/advanced/hyperparameters_tuning.rst b/docs/source/advanced/hyperparameters_tuning.rst index 7eecdbe94e..93a2b2ae97 100644 --- a/docs/source/advanced/hyperparameters_tuning.rst +++ b/docs/source/advanced/hyperparameters_tuning.rst @@ -2,14 +2,70 @@ Tuning of Hyperparameters ========================= To tune pipeline hyperparameters you can use GOLEM. There are two ways: -1. Tuning of all models hyperparameters simultaneously. Implemented via ``SimultaneousTuner`` class. +1. Tuning of all models hyperparameters simultaneously. Implemented via ``SimultaneousTuner`` and ``IOptTuner`` classes. -2. Tuning of models hyperparameters sequentially node by node optimizing metric value for the whole pipeline. Implemented via ``SequentialTuner`` class. +2. Tuning of models hyperparameters sequentially node by node optimizing metric value for the whole pipeline or tuning + only one node hyperparametrs. Implemented via ``SequentialTuner`` class. More information about these approaches can be found `here `_. -If ``with_tuning`` flag is set to ``True`` when using :doc:`FEDOT API `, simultaneous hyperparameters tuning is applied for composed pipeline and ``metric`` value is used as a metric for tuning. +If ``with_tuning`` flag is set to ``True`` when using :doc:`FEDOT API `, simultaneous hyperparameters tuning +using ``SimultaneousTuner`` is applied for composed pipeline and ``metric`` value is used as a metric for tuning. + +FEDOT uses tuners implementation from GOLEM, see `GOLEM documentation`_ for more information. + +.. list-table:: Tuners comparison + :widths: 10 30 30 30 + :header-rows: 1 + + * - + - ``SimultaneousTuner`` + - ``SequentialTuner`` + - ``IOptTuner`` + * - Based on + - Hyperopt + - Hyperopt + - iOpt + * - Type of tuning + - Simultaneous + - | Sequential or + | for one node only + - Simultaneous + * - | Optimized + | parameters + - | categorical + | discrete + | continuous + - | categorical + | discrete + | continuous + - | discrete + | continuous + * - Algorithm type + - stochastic + - stochastic + - deterministic + * - | Supported + | constraints + - | timeout + | iterations + | early_stopping_rounds + | eval_time_constraint + - | timeout + | iterations + | early_stopping_rounds + | eval_time_constraint + - | iterations + | eval_time_constraint + * - | Supports initial + | point + - Yes + - No + - No + +Hyperopt based tuners usually take less time for one iteration, but ``IOptTuner`` is able to obtain much more stable results. + Simple example ~~~~~~~~~~~~~~ @@ -45,7 +101,7 @@ To initialize a tuner you can use ``TunerBuilder``. * with_timeout_ * with_eval_time_constraint_ * with_search_space_ -* with_algo_ +* with_additional_params_ Tuner class ----------- @@ -183,15 +239,31 @@ To customize search space use ``PipelineSearchSpace`` class. custom_search_space = { 'logit': { - 'C': (hp.uniform, [0.01, 5.0]) + 'C': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-1, 5.0], + 'type': 'continuous'} }, 'pca': { - 'n_components': (hp.uniform, [0.2, 0.8]) + 'n_components': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.5], + 'type': 'continuous'} }, 'knn': { - 'n_neighbors': (hp.uniformint, [1, 6]), - 'weights': (hp.choice, [["uniform", "distance"]]), - 'p': (hp.choice, [[1, 2]])} + 'n_neighbors': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 20], + 'type': 'discrete'}, + 'weights': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["uniform", "distance"]], + 'type': 'categorical'}, + 'p': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[1, 2]], + 'type': 'categorical'} + } } search_space = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=True) @@ -201,20 +273,35 @@ To customize search space use ``PipelineSearchSpace`` class. tuned_pipeline = pipeline_tuner.tune(pipeline) -Algorithm ---------- +Additional parameters +--------------------- + +.. _with_additional_params: + +If there is no ``TunerBuilder`` function to set a specific parameter of a tuner use ``.with_additional_params()``. + +Possible additional parameters you can see in the `GOLEM documentation`_. -.. _with_algo: +For example, you can set algorithm for with signature similar to ``hyperopt.tse.suggest`` for ``SimultaneousTuner`` or +``SequentialTuner``. -You can set algorithm for hyperparameters optimization with signature similar to ``hyperopt.tse.suggest``. By default, ``hyperopt.tse.suggest`` is used. .. code-block:: python - algo = hyperopt.rand.suggest + pipeline_tuner = TunerBuilder(Task(TaskTypesEnum.classification)) \ + .with_additional_params(algo = hyperopt.rand.suggest) \ + .build(train_data) + + tuned_pipeline = pipeline_tuner.tune(pipeline) + +For ``IOptTuner`` such parameters as ``r``, ``evolvent_density``, ``eps_r`` and etc can be set. + +.. code-block:: python pipeline_tuner = TunerBuilder(Task(TaskTypesEnum.classification)) \ - .with_algo(algo) \ + .with_tuner(IOptTuner) \ + .with_additional_params(r = 1, evolvent_density = 5) \ .build(train_data) tuned_pipeline = pipeline_tuner.tune(pipeline) @@ -260,8 +347,9 @@ Constraints Examples ~~~~~~~~ -Tuning all hyperparameters simultaniously +Tuning all hyperparameters simultaneously ----------------------------------------- +Example for ``SimultaneousTuner``: .. code-block:: python @@ -295,12 +383,24 @@ Tuning all hyperparameters simultaniously custom_search_space = { 'logit': { - 'C': (hp.uniform, [0.01, 5.0]) + 'C': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 5.0], + 'type': 'continuous'} }, 'knn': { - 'n_neighbors': (hp.uniformint, [1, 6]), - 'weights': (hp.choice, [["uniform", "distance"]]), - 'p': (hp.choice, [[1, 2]])} + 'n_neighbors': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 20], + 'type': 'discrete'}, + 'weights': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["uniform", "distance"]], + 'type': 'categorical'}, + 'p': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[1, 2]], + 'type': 'categorical'}} } search_space = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=True) @@ -319,7 +419,7 @@ Tuning all hyperparameters simultaniously .with_early_stopping_rounds(early_stopping_rounds) \ .with_timeout(timeout) \ .with_search_space(search_space) \ - .with_algo(algo) \ + .with_additional_params(algo=algo) \ .with_eval_time_constraint(eval_time_constraint) \ .build(train_data) @@ -339,6 +439,60 @@ Tuned pipeline structure: rf - {'n_jobs': 1, 'bootstrap': True, 'criterion': 'entropy', 'max_features': 0.46348491415788157, 'min_samples_leaf': 11, 'min_samples_split': 2, 'n_estimators': 100} logit - {'C': 3.056080157518786} + +Example for ``IOptTuner``: + +.. code-block:: python + + import datetime + from golem.core.tuning.iopt_tuner import IOptTuner + from fedot.core.data.data import InputData + from fedot.core.pipelines.pipeline_builder import PipelineBuilder + from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements + from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder + from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum + from fedot.core.repository.tasks import TaskTypesEnum, Task + + task = Task(TaskTypesEnum.regression) + + tuner = IOptTuner + + requirements = PipelineComposerRequirements(cv_folds=2, n_jobs=2) + + metric = RegressionMetricsEnum.MSE + + iterations = 100 + + eval_time_constraint = datetime.timedelta(seconds=30) + + train_data = InputData.from_csv('train_data.csv', task='regression') + + pipeline = PipelineBuilder().add_node('knnreg', branch_idx=0).add_branch('rfr', branch_idx=1) \ + .join_branches('knnreg').build() + + pipeline_tuner = TunerBuilder(task) \ + .with_tuner(tuner) \ + .with_requirements(requirements) \ + .with_metric(metric) \ + .with_iterations(iterations) \ + .with_additional_params(eps=0.02, r=1, refine_solution=True) \ + .with_eval_time_constraint(eval_time_constraint) \ + .build(train_data) + + tuned_pipeline = pipeline_tuner.tune(pipeline) + + tuned_pipeline.print_structure() + +Tuned pipeline structure: + +.. code-block:: python + + Pipeline structure: + {'depth': 2, 'length': 3, 'nodes': [knnreg, knnreg, rfr]} + knnreg - {'n_neighbors': 51} + knnreg - {'n_neighbors': 40} + rfr - {'n_jobs': 1, 'max_features': 0.05324707031250003, 'min_samples_split': 12, 'min_samples_leaf': 11} + Sequential tuning ----------------- @@ -476,3 +630,5 @@ Another examples can be found here: **Multitask** * `Multitask pipeline: classification and regression `_ + +.. _GOLEM documentation: https://thegolem.readthedocs.io/en/latest/api/tuning.html diff --git a/examples/advanced/time_series_forecasting/custom_model_tuning.py b/examples/advanced/time_series_forecasting/custom_model_tuning.py index 40f4709029..4b807553cf 100644 --- a/examples/advanced/time_series_forecasting/custom_model_tuning.py +++ b/examples/advanced/time_series_forecasting/custom_model_tuning.py @@ -94,16 +94,23 @@ def run_pipeline_tuning(time_series, len_forecast, pipeline_type): # Setting custom search space for tuner (necessary) # model and output_type should be wrapped into hyperopt custom_search_space = {'custom': { - 'alpha': (hp.uniform, [0.01, 10]), - 'model_predict': (hp.choice, [[custom_ml_model_imitation_predict]]), - 'model_fit': (hp.choice, [[custom_ml_model_imitation_fit]])}} + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 10], + 'type': 'continuous'}}} elif pipeline_type == 'without_fit': pipeline = get_domain_pipeline() # Setting custom search space for tuner (necessary) # model and output_type should be wrapped into hyperopt - custom_search_space = {'custom': {'a': (hp.uniform, [-100, 100]), - 'b': (hp.uniform, [0, 1000]), - 'model_predict': (hp.choice, [[domain_model_imitation_predict]])}} + custom_search_space = {'custom': { + 'a': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [-100, 100], + 'type': 'continuous'}, + 'b': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0, 1000], + 'type': 'continuous'}}} pipeline.fit_from_scratch(train_input) pipeline.print_structure() # Get prediction with initial approximation diff --git a/examples/simple/classification/image_classification_problem.py b/examples/simple/classification/image_classification_problem.py index 29f98270d1..c4ea3b51e8 100644 --- a/examples/simple/classification/image_classification_problem.py +++ b/examples/simple/classification/image_classification_problem.py @@ -6,7 +6,7 @@ try: import tensorflow as tf except ModuleNotFoundError: - warn_requirement('tensorflow') + warn_requirement('tensorflow', 'fedot[extra]') from sklearn.metrics import roc_auc_score as roc_auc diff --git a/examples/simple/pipeline_tuning_with_iopt.py b/examples/simple/pipeline_tuning_with_iopt.py new file mode 100644 index 0000000000..1665514c81 --- /dev/null +++ b/examples/simple/pipeline_tuning_with_iopt.py @@ -0,0 +1,62 @@ +from golem.core.tuning.iopt_tuner import IOptTuner + +from fedot.core.composer.metrics import MSE +from fedot.core.data.data import InputData +from fedot.core.data.data_split import train_test_data_setup +from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements +from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder +from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum +from fedot.core.repository.tasks import TaskTypesEnum, Task +from fedot.core.utils import fedot_project_root + + +def tune_pipeline(pipeline: Pipeline, + train_data: InputData, + test_data: InputData, + tuner_iter_num: int = 100): + + task = Task(TaskTypesEnum.regression) + requirements = PipelineComposerRequirements(cv_folds=3, n_jobs=-1) + metric = RegressionMetricsEnum.MSE + + # Fit initial pipeline + pipeline.fit(train_data) + before_tuning_predicted = pipeline.predict(test_data) + # Obtain test metric before tuning + metric_before_tuning = MSE().metric(test_data, before_tuning_predicted) + + pipeline_tuner = TunerBuilder(task) \ + .with_tuner(IOptTuner) \ + .with_requirements(requirements) \ + .with_metric(metric) \ + .with_iterations(tuner_iter_num) \ + .with_additional_params(eps=0.02, r=1.5, refine_solution=True) \ + .build(train_data) + + tuned_pipeline = pipeline_tuner.tune(pipeline) + + # Fit tuned pipeline + tuned_pipeline.fit(train_data) + after_tuning_predicted = tuned_pipeline.predict(test_data) + # Obtain test metric after tuning + metric_after_tuning = MSE().metric(test_data, after_tuning_predicted) + + print(f'\nMetric before tuning: {metric_before_tuning}') + print(f'Metric after tuning: {metric_after_tuning}') + return tuned_pipeline + + +if __name__ == '__main__': + pipeline = (PipelineBuilder() + .add_node('dtreg', 0) + .add_node('knnreg', 1) + .join_branches('rfr') + .build()) + data_path = f'{fedot_project_root()}/cases/data/cholesterol/cholesterol.csv' + + data = InputData.from_csv(data_path, + task=Task(TaskTypesEnum.regression)) + train_data, test_data = train_test_data_setup(data) + tuned_pipeline = tune_pipeline(pipeline, train_data, test_data, tuner_iter_num=200) diff --git a/fedot/api/api_utils/api_composer.py b/fedot/api/api_utils/api_composer.py index f570bacc0a..8c10d40aa5 100644 --- a/fedot/api/api_utils/api_composer.py +++ b/fedot/api/api_utils/api_composer.py @@ -146,13 +146,13 @@ def tune_final_pipeline(self, train_data: InputData, pipeline_gp_composed: Pipel """ Launch tuning procedure for obtained pipeline by composer """ timeout_for_tuning = abs(self.timer.determine_resources_for_tuning()) / 60 tuner = (TunerBuilder(self.params.task) - .with_tuner(SimultaneousTuner) - .with_metric(self.metrics.metric_functions) - .with_iterations(DEFAULT_TUNING_ITERATIONS_NUMBER) - .with_timeout(datetime.timedelta(minutes=timeout_for_tuning)) - .with_eval_time_constraint(self.params.composer_requirements.max_graph_fit_time) - .with_requirements(self.params.composer_requirements) - .build(train_data)) + .with_tuner(SimultaneousTuner) + .with_metric(self.metrics.metric_functions) + .with_iterations(DEFAULT_TUNING_ITERATIONS_NUMBER) + .with_timeout(datetime.timedelta(minutes=timeout_for_tuning)) + .with_eval_time_constraint(self.params.composer_requirements.max_graph_fit_time) + .with_requirements(self.params.composer_requirements) + .build(train_data)) if self.timer.have_time_for_tuning(): # Tune all nodes in the pipeline diff --git a/fedot/api/help.py b/fedot/api/help.py index 052f314496..6205590f64 100644 --- a/fedot/api/help.py +++ b/fedot/api/help.py @@ -18,7 +18,7 @@ def print_models_info(task_name): search_space = PipelineSearchSpace() for model in repository_operations_list: if model.id != 'custom': - hyperparameters = search_space.get_operation_parameter_range(str(model.id)) + hyperparameters = search_space.get_parameters_for_operation(str(model.id)) implementation_info = model.current_strategy(task)(model.id).implementation_info info_lst = [ f"Model name - '{model.id}'", @@ -41,7 +41,7 @@ def print_data_operations_info(task_name): repository_operations_list = _filter_operations_by_type(repository, task) search_space = PipelineSearchSpace() for operation in repository_operations_list: - hyperparameters = search_space.get_operation_parameter_range(str(operation.id)) + hyperparameters = search_space.get_parameters_for_operation(str(operation.id)) implementation_info = operation.current_strategy(task)(operation.id).implementation_info info_lst = [ f"Data operation name - '{operation.id}'", diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 9d983ddc8c..7cd007afdf 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -14,7 +14,7 @@ try: import cv2 except ModuleNotFoundError: - warn_requirement('opencv-python') + warn_requirement('opencv-python', 'fedot[extra]') cv2 = None from fedot.core.data.array_utilities import atleast_2d diff --git a/fedot/core/operations/evaluation/gpu/classification.py b/fedot/core/operations/evaluation/gpu/classification.py index 2d4eec5356..87ed3d5ab2 100644 --- a/fedot/core/operations/evaluation/gpu/classification.py +++ b/fedot/core/operations/evaluation/gpu/classification.py @@ -5,7 +5,7 @@ try: import cudf except ModuleNotFoundError: - warn_requirement('cudf') + warn_requirement('cudf', 'cudf') cudf = None from fedot.core.data.data import InputData, OutputData diff --git a/fedot/core/operations/evaluation/gpu/clustering.py b/fedot/core/operations/evaluation/gpu/clustering.py index 779330ad70..c46964f874 100644 --- a/fedot/core/operations/evaluation/gpu/clustering.py +++ b/fedot/core/operations/evaluation/gpu/clustering.py @@ -9,7 +9,7 @@ from cuml import KMeans import cudf except ModuleNotFoundError: - warn_requirement('cudf / cuml') + warn_requirement('cudf / cuml', 'cudf / cuml') cudf = None KMeans = None diff --git a/fedot/core/operations/evaluation/gpu/common.py b/fedot/core/operations/evaluation/gpu/common.py index 986e669448..1917093feb 100644 --- a/fedot/core/operations/evaluation/gpu/common.py +++ b/fedot/core/operations/evaluation/gpu/common.py @@ -16,7 +16,7 @@ from cuml import LinearRegression as CuMlLinReg, SGD as CuMlSGD, \ MultinomialNB as CuMlMultinomialNB except ModuleNotFoundError: - warn_requirement('cudf / cuml') + warn_requirement('cudf / cuml', 'cudf / cuml') cudf = None cuml = None diff --git a/fedot/core/operations/evaluation/gpu/regression.py b/fedot/core/operations/evaluation/gpu/regression.py index 38040d1859..fe0a59a038 100644 --- a/fedot/core/operations/evaluation/gpu/regression.py +++ b/fedot/core/operations/evaluation/gpu/regression.py @@ -3,7 +3,7 @@ try: import cudf except ModuleNotFoundError: - warn_requirement('cudf') + warn_requirement('cudf', 'cudf / cuml') cudf = None from fedot.core.data.data import InputData, OutputData diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/text_preprocessing.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/text_preprocessing.py index b576309531..929bfe5897 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/text_preprocessing.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/text_preprocessing.py @@ -10,7 +10,7 @@ try: import nltk except ModuleNotFoundError: - warn_requirement('nltk') + warn_requirement('nltk', 'fedot[extra]') nltk = None from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import ( diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/text_pretrained.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/text_pretrained.py index cdf4529961..70f6cf8f9c 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/text_pretrained.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/text_pretrained.py @@ -15,7 +15,7 @@ import gensim.downloader as api from gensim.models import KeyedVectors except ModuleNotFoundError: - warn_requirement('gensim') + warn_requirement('gensim', 'fedot[extra]') api = None KeyedVectors = None diff --git a/fedot/core/operations/evaluation/operation_implementations/models/keras.py b/fedot/core/operations/evaluation/operation_implementations/models/keras.py index eadd635890..e38bd68aac 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/keras.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/keras.py @@ -10,7 +10,7 @@ try: import tensorflow as tf except ModuleNotFoundError: - warn_requirement('tensorflow') + warn_requirement('tensorflow', 'fedot[extra]') tf = None from fedot.core.data.data import InputData, OutputData diff --git a/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/cgru.py b/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/cgru.py index 6c3162f879..19b1f15ea8 100644 --- a/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/cgru.py +++ b/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/cgru.py @@ -18,7 +18,7 @@ class TorchMock: from torch.optim.lr_scheduler import MultiStepLR from torch.utils.data import DataLoader, TensorDataset except ModuleNotFoundError: - warn_requirement('torch') + warn_requirement('torch', 'fedot[extra]') torch = object() nn = TorchMock diff --git a/fedot/core/pipelines/tuning/hyperparams.py b/fedot/core/pipelines/tuning/hyperparams.py index 25e0bcf7fa..0bc5cbc52c 100644 --- a/fedot/core/pipelines/tuning/hyperparams.py +++ b/fedot/core/pipelines/tuning/hyperparams.py @@ -1,6 +1,7 @@ import random from golem.core.log import default_log +from golem.core.tuning.hyperopt_tuner import get_parameter_hyperopt_space from hyperopt.pyll.stochastic import sample as hp_sample from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace @@ -23,9 +24,9 @@ def get_new_operation_params(self): """ Function return a dictionary with new parameters values """ # Get available parameters for operation - params_list = PipelineSearchSpace().get_operation_parameter_range(self.operation_name) + params_list = PipelineSearchSpace().get_parameters_for_operation(self.operation_name) - if params_list is None: + if not params_list: params_dict = None else: # Get new values for all parameters @@ -66,17 +67,11 @@ def new_params_dict(self, params_list): return params_dict def _get_current_parameter_value(self, parameter_name): - - if isinstance(self.current_params, str): - # TODO 'default_params' - need to process + try: + current_value = self.current_params.get(parameter_name) + except Exception as exec: + self.logger.warning(f'The following error occurred during the hyperparameter configuration.{exec}') current_value = None - else: - # Dictionary with parameters - try: - current_value = self.current_params.get(parameter_name) - except Exception as exec: - self.logger.warning(f'The following error occurred during the hyperparameter configuration.{exec}') - current_value = None return current_value @@ -84,9 +79,10 @@ def _get_current_parameter_value(self, parameter_name): def _random_change(parameter_name, **kwargs): """ Randomly selects a parameter value from a specified range """ - space = PipelineSearchSpace().get_operation_parameter_range(operation_name=kwargs['operation_name'], - parameter_name=parameter_name, - label=parameter_name) + space = get_parameter_hyperopt_space(PipelineSearchSpace(), + operation_name=kwargs['operation_name'], + parameter_name=parameter_name, + label=parameter_name) # Randomly choose new value new_value = hp_sample(space) return {parameter_name: new_value} diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index 611323081a..b6c383d2b0 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -1,7 +1,7 @@ -from typing import Optional, Dict, Tuple, Callable, List +from functools import partial +from typing import Optional -import numpy as np -from golem.core.tuning.search_space import SearchSpace +from golem.core.tuning.search_space import SearchSpace, OperationParametersMapping from hyperopt import hp @@ -15,7 +15,7 @@ class PipelineSearchSpace(SearchSpace): """ def __init__(self, - custom_search_space: Optional[Dict[str, Dict[str, Tuple[Callable, List]]]] = None, + custom_search_space: Optional[OperationParametersMapping] = None, replace_default_search_space: bool = False): self.custom_search_space = custom_search_space self.replace_default_search_space = replace_default_search_space @@ -25,270 +25,706 @@ def __init__(self, def get_parameters_dict(self): parameters_per_operation = { 'kmeans': { - 'n_clusters': (hp.uniformint, [2, 7]) + 'n_clusters': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 7], + 'type': 'discrete'} }, 'adareg': { - - 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]), - 'loss': (hp.choice, [["linear", "square", "exponential"]]) + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["linear", "square", "exponential"]], + 'type': 'categorical'} }, 'gbr': { - - 'loss': (hp.choice, [["ls", "lad", "huber", "quantile"]]), - 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]), - 'max_depth': (hp.uniformint, [1, 11]), - 'min_samples_split': (hp.uniformint, [2, 21]), - 'min_samples_leaf': (hp.uniformint, [1, 21]), - 'subsample': (hp.uniform, [0.05, 1.0]), - 'max_features': (hp.uniform, [0.05, 1.0]), - 'alpha': (hp.uniform, [0.75, 0.99]) + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["ls", "lad", "huber", "quantile"]], + 'type': 'categorical'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.75, 0.99], + 'type': 'continuous'} }, 'logit': { - 'C': (hp.uniform, [1e-2, 10.0]) + 'C': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-2, 10.0], + 'type': 'continuous'} }, 'rf': { - 'criterion': (hp.choice, [["gini", "entropy"]]), - 'max_features': (hp.uniform, [0.05, 1.0]), - 'min_samples_split': (hp.uniformint, [2, 10]), - 'min_samples_leaf': (hp.uniformint, [1, 15]), - 'bootstrap': (hp.choice, [[True, False]]) - }, - 'lasso': { - 'alpha': (hp.uniform, [0.01, 10.0]) + 'criterion': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["gini", "entropy"]], + 'type': 'categorical'}, + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 10], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 15], + 'type': 'discrete'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} }, 'ridge': { - 'alpha': (hp.uniform, [0.01, 10.0]) + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 10.0], + 'type': 'continuous'} + }, + 'lasso': { + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 10.0], + 'type': 'continuous'} }, 'rfr': { - - 'max_features': (hp.uniform, [0.05, 1.0]), - 'min_samples_split': (hp.uniformint, [2, 21]), - 'min_samples_leaf': (hp.uniformint, [1, 21]), - 'bootstrap': (hp.choice, [[True, False]]) + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 15], + 'type': 'discrete'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} }, 'xgbreg': { - - 'max_depth': (hp.uniformint, [1, 11]), - 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]), - 'subsample': (hp.uniform, [0.05, 1.0]), - 'min_child_weight': (hp.uniformint, [1, 21]), - 'objective': (hp.choice, [['reg:squarederror']]) + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_child_weight': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}, }, 'xgboost': { - - 'max_depth': (hp.uniformint, [1, 7]), - 'learning_rate': (hp.loguniform, [np.log(1e-3), np.log(1)]), - 'subsample': (hp.uniform, [0.05, 0.99]), - 'min_child_weight': (hp.uniform, [1, 21]) + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 7], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 0.99], + 'type': 'continuous'}, + 'min_child_weight': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'} }, 'svr': { - 'loss': (hp.choice, [["epsilon_insensitive", "squared_epsilon_insensitive"]]), - 'tol': (hp.loguniform, [np.log(1e-5), np.log(1e-1)]), - 'C': (hp.uniform, [1e-4, 25.0]), - 'epsilon': (hp.uniform, [1e-4, 1.0]) + 'C': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-4, 25.0], + 'type': 'continuous'}, + 'epsilon': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-4, 1], + 'type': 'continuous'}, + 'tol': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-5, 1e-1], + 'type': 'continuous'}, + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["epsilon_insensitive", "squared_epsilon_insensitive"]], + 'type': 'categorical'} }, 'dtreg': { - 'max_depth': (hp.uniformint, [1, 11]), - 'min_samples_split': (hp.uniformint, [2, 21]), - 'min_samples_leaf': (hp.uniformint, [1, 21]) + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'} }, 'treg': { - - 'max_features': (hp.uniform, [0.05, 1.0]), - 'min_samples_split': (hp.uniformint, [2, 21]), - 'min_samples_leaf': (hp.uniformint, [1, 21]), - 'bootstrap': (hp.choice, [[True, False]]) + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} }, 'dt': { - 'max_depth': (hp.uniformint, [1, 11]), - 'min_samples_split': (hp.uniformint, [2, 21]), - 'min_samples_leaf': (hp.uniformint, [1, 21]) + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'} }, 'knnreg': { - 'n_neighbors': (hp.uniformint, [1, 50]), - 'weights': (hp.choice, [["uniform", "distance"]]), - 'p': (hp.choice, [[1, 2]]) + 'n_neighbors': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 50], + 'type': 'discrete'}, + 'weights': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["uniform", "distance"]], + 'type': 'categorical'}, + 'p': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[1, 2]], + 'type': 'categorical'} }, 'knn': { - 'n_neighbors': (hp.uniformint, [1, 50]), - 'weights': (hp.choice, [["uniform", "distance"]]), - 'p': (hp.choice, [[1, 2]]) + 'n_neighbors': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 50], + 'type': 'discrete'}, + 'weights': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["uniform", "distance"]], + 'type': 'categorical'}, + 'p': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[1, 2]], + 'type': 'categorical'} }, 'arima': { - 'p': (hp.uniformint, [1, 7]), - 'd': (hp.uniformint, [0, 2]), - 'q': (hp.uniformint, [1, 5]) + 'p': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 7], + 'type': 'discrete'}, + 'd': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [0, 2], + 'type': 'discrete'}, + 'q': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete'} }, 'stl_arima': { - 'p': (hp.uniformint, [1, 7]), - 'd': (hp.uniformint, [0, 2]), - 'q': (hp.uniformint, [1, 5]), - 'period': (hp.uniformint, [1, 365]) + 'p': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 7], + 'type': 'discrete'}, + 'd': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [0, 2], + 'type': 'discrete'}, + 'q': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete'}, + 'period': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 365], + 'type': 'discrete'} }, 'ar': { - 'lag_1': (hp.uniform, [2, 200]), - 'lag_2': (hp.uniform, [2, 800]) + 'lag_1': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [2, 200], + 'type': 'continuous'}, + 'lag_2': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [2, 800], + 'type': 'continuous'} }, 'ets': { - 'error': (hp.choice, [['add', 'mul']]), - 'trend': (hp.choice, [[None, 'add', 'mul']]), - 'seasonal': (hp.choice, [[None, 'add', 'mul']]), - 'damped_trend': (hp.choice, [[True, False]]), - 'seasonal_periods': (hp.uniform, [1, 100]) - }, - 'glm': {'nested_space': (hp.choice, [[ - { - 'family': 'gaussian', - 'link': hp.choice('link_gaussian', ['identity', - 'inverse_power', - 'log']) - }, - { - 'family': 'gamma', - 'link': hp.choice('link_gamma', ['identity', - 'inverse_power', - 'log']) - }, - { - 'family': 'inverse_gaussian', - 'link': hp.choice('link_inv_gaussian', ['identity', - 'inverse_power']) - } + 'error': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["add", "mul"]], + 'type': 'categorical'}, + 'trend': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[None, "add", "mul"]], + 'type': 'categorical'}, + 'seasonal': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[None, "add", "mul"]], + 'type': 'categorical'}, + 'damped_trend': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}, + 'seasonal_periods': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1, 100], + 'type': 'continuous'} + }, + 'glm': { + 'nested_space': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[ + { + 'family': 'gaussian', + 'link': hp.choice('link_gaussian', ['identity', + 'inverse_power', + 'log']) + }, + { + 'family': 'gamma', + 'link': hp.choice('link_gamma', ['identity', + 'inverse_power', + 'log']) + }, + { + 'family': 'inverse_gaussian', + 'link': hp.choice('link_inv_gaussian', ['identity', + 'inverse_power']) + } - ]])}, + ]], + 'type': 'categorical'} + }, 'cgru': { - 'hidden_size': (hp.uniform, [20, 200]), - 'learning_rate': (hp.uniform, [0.0005, 0.005]), - 'cnn1_kernel_size': (hp.uniformint, [3, 8]), - 'cnn1_output_size': (hp.choice, [[8, 16, 32, 64]]), - 'cnn2_kernel_size': (hp.uniformint, [3, 8]), - 'cnn2_output_size': (hp.choice, [[8, 16, 32, 64]]), - 'batch_size': (hp.choice, [[64, 128]]), - 'num_epochs': (hp.choice, [[10, 20, 50, 100]]), - 'optimizer': (hp.choice, [['adamw', 'sgd']]), - 'loss': (hp.choice, [['mae', 'mse']]) + 'hidden_size': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [20, 200], + 'type': 'continuous'}, + 'learning_rate': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.0005, 0.005], + 'type': 'continuous'}, + 'cnn1_kernel_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [3, 8], + 'type': 'discrete'}, + 'cnn1_output_size': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[8, 16, 32, 64]], + 'type': 'categorical'}, + 'cnn2_kernel_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [3, 8], + 'type': 'discrete'}, + 'cnn2_output_size': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[8, 16, 32, 64]], + 'type': 'categorical'}, + 'batch_size': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[64, 128]], + 'type': 'categorical'}, + 'num_epochs': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[10, 20, 50, 100]], + 'type': 'categorical'}, + 'optimizer': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['adamw', 'sgd']], + 'type': 'categorical'}, + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['mae', 'mse']], + 'type': 'categorical'}, }, 'pca': { - 'n_components': (hp.uniform, [0.1, 0.99]), - 'svd_solver': (hp.choice, [['full']]) + 'n_components': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.99], + 'type': 'continuous'} }, 'kernel_pca': { - 'n_components': (hp.uniformint, [1, 20]), - 'kernel': (hp.choice, [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']]) + 'n_components': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 20], + 'type': 'discrete'}, + 'kernel': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']], + 'type': 'categorical'} }, 'fast_ica': { - 'n_components': (hp.uniformint, [1, 20]), - 'fun': (hp.choice, [['logcosh', 'exp', 'cube']]) + 'n_components': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 20], + 'type': 'discrete'}, + 'fun': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['logcosh', 'exp', 'cube']], + 'type': 'categorical'} }, 'ransac_lin_reg': { - 'min_samples': (hp.uniform, [0.1, 0.9]), - 'residual_threshold': (hp.loguniform, [np.log(0.1), np.log(1000)]), - 'max_trials': (hp.uniform, [50, 500]), - 'max_skips': (hp.uniform, [50, 500000]) + 'min_samples': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.9], + 'type': 'continuous'}, + 'residual_threshold': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.1, 1000], + 'type': 'continuous'}, + 'max_trials': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [50, 500], + 'type': 'continuous'}, + 'max_skips': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [50, 500000], + 'type': 'continuous'} }, 'ransac_non_lin_reg': { - 'min_samples': (hp.uniform, [0.1, 0.9]), - 'residual_threshold': (hp.loguniform, [np.log(0.1), np.log(1000)]), - 'max_trials': (hp.uniform, [50, 500]), - 'max_skips': (hp.uniform, [50, 500000]) + 'min_samples': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.9], + 'type': 'continuous'}, + 'residual_threshold': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.1, 1000], + 'type': 'continuous'}, + 'max_trials': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [50, 500], + 'type': 'continuous'}, + 'max_skips': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [50, 500000], + 'type': 'continuous'} }, 'isolation_forest_reg': { - 'max_samples': (hp.uniform, [0.05, 0.99]), - 'max_features': (hp.uniform, [0.05, 0.99]), - 'bootstrap': (hp.choice, [[True, False]]) + 'max_samples': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 0.99], + 'type': 'continuous'}, + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 0.99], + 'type': 'continuous'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} }, 'isolation_forest_class': { - 'max_samples': (hp.uniform, [0.05, 0.99]), - 'max_features': (hp.uniform, [0.05, 0.99]), - 'bootstrap': (hp.choice, [[True, False]]) + 'max_samples': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 0.99], + 'type': 'continuous'}, + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 0.99], + 'type': 'continuous'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} }, 'rfe_lin_reg': { - 'n_features_to_select': (hp.uniform, [0.5, 0.9]), - 'step': (hp.uniform, [0.1, 0.2]) + 'n_features_to_select': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.5, 0.9], + 'type': 'continuous'}, + 'step': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.2], + 'type': 'continuous'} }, 'rfe_non_lin_reg': { - 'n_features_to_select': (hp.uniform, [0.5, 0.9]), - 'step': (hp.uniform, [0.1, 0.2]) + 'n_features_to_select': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.5, 0.9], + 'type': 'continuous'}, + 'step': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.2], + 'type': 'continuous'} }, 'poly_features': { - 'degree': (hp.uniformint, [2, 5]), - 'interaction_only': (hp.choice, [[True, False]]) + 'degree': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 5], + 'type': 'discrete'}, + 'interaction_only': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} }, 'polyfit': { - 'degree': (hp.uniformint, [1, 6]) + 'degree': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 6], + 'type': 'discrete'} }, 'lagged': { - 'window_size': (hp.uniformint, [5, 500]) + 'window_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [5, 500], + 'type': 'discrete'} }, 'sparse_lagged': { - 'window_size': (hp.uniformint, [5, 500]), - 'n_components': (hp.uniform, [0, 0.5]), - 'use_svd': (hp.choice, [[True, False]]) + 'window_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [5, 500], + 'type': 'discrete'}, + 'n_components': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0, 0.5], + 'type': 'continuous'}, + 'use_svd': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} }, 'smoothing': { - 'window_size': (hp.uniformint, [2, 20]) + 'window_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 20], + 'type': 'discrete'} }, 'gaussian_filter': { - 'sigma': (hp.uniform, [1, 5]) + 'sigma': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1, 5], + 'type': 'continuous'} }, 'diff_filter': { - 'poly_degree': (hp.uniformint, [1, 5]), - 'order': (hp.uniform, [1, 3]), - 'window_size': (hp.uniform, [3, 20]) + 'poly_degree': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete'}, + 'order': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1, 3], + 'type': 'continuous'}, + 'window_size': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [3, 20], + 'type': 'continuous'} }, 'cut': { - 'cut_part': (hp.uniform, [0, 0.9]) + 'cut_part': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0, 0.9], + 'type': 'continuous'} }, 'lgbm': { - 'class_weight': (hp.choice, [[None, 'balanced']]), - 'num_leaves': (hp.uniformint, [2, 256]), - 'learning_rate': (hp.loguniform, [np.log(0.01), np.log(0.2)]), - 'colsample_bytree': (hp.uniform, [0.4, 1]), - 'subsample': (hp.uniform, [0.4, 1]), - 'reg_alpha': (hp.loguniform, [np.log(1e-8), np.log(10)]), - 'reg_lambda': (hp.loguniform, [np.log(1e-8), np.log(10)]) + 'class_weight': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[None, 'balanced']], + 'type': 'categorical'}, + 'num_leaves': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 256], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.01, 0.2], + 'type': 'continuous'}, + 'colsample_bytree': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.4, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.4, 1], + 'type': 'continuous'}, + 'reg_alpha': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}, + 'reg_lambda': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'} }, 'lgbmreg': { - 'num_leaves': (hp.uniformint, [2, 256]), - 'learning_rate': (hp.loguniform, [np.log(0.01), np.log(0.2)]), - 'colsample_bytree': (hp.uniform, [0.4, 1]), - 'subsample': (hp.uniform, [0.4, 1]), - 'reg_alpha': (hp.loguniform, [np.log(1e-8), np.log(10)]), - 'reg_lambda': (hp.loguniform, [np.log(1e-8), np.log(10)]) + 'num_leaves': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 256], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.01, 0.2], + 'type': 'continuous'}, + 'colsample_bytree': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.4, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.4, 1], + 'type': 'continuous'}, + 'reg_alpha': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}, + 'reg_lambda': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'} }, 'catboost': { - 'max_depth': (hp.uniformint, [1, 11]), - 'learning_rate': (hp.loguniform, [np.log(0.01), np.log(0.2)]), - 'min_data_in_leaf': (hp.qloguniform, [0, 6, 1]), - 'border_count': (hp.uniformint, [2, 255]), - 'l2_leaf_reg': (hp.loguniform, [np.log(1e-8), np.log(10)]) + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.01, 0.2], + 'type': 'continuous'}, + 'min_data_in_leaf': { + 'hyperopt-dist': partial(hp.qloguniform, q=1), + 'sampling-scope': [0, 6], + 'type': 'discrete'}, + 'border_count': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 255], + 'type': 'discrete'}, + 'l2_leaf_reg': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'} }, 'catboostreg': { - 'max_depth': (hp.uniformint, [1, 11]), - 'learning_rate': (hp.loguniform, [np.log(0.01), np.log(0.2)]), - 'min_data_in_leaf': (hp.qloguniform, [0, 6, 1]), - 'border_count': (hp.uniformint, [2, 255]), - 'l2_leaf_reg': (hp.loguniform, [np.log(1e-8), np.log(10)]) + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.01, 0.2], + 'type': 'continuous'}, + 'min_data_in_leaf': { + 'hyperopt-dist': partial(hp.qloguniform, q=1), + 'sampling-scope': [0, 6], + 'type': 'discrete'}, + 'border_count': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 255], + 'type': 'discrete'}, + 'l2_leaf_reg': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'} }, 'resample': { - 'balance': (hp.choice, [['expand_minority', 'reduce_majority']]), - 'replace': (hp.choice, [[True, False]]), - 'balance_ratio': (hp.uniform, [0.3, 1]) + 'balance': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['expand_minority', 'reduce_majority']], + 'type': 'categorical'}, + 'replace': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}, + 'balance_ratio': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.3, 1], + 'type': 'continuous'} }, 'lda': { - 'solver': (hp.choice, [['svd', 'lsqr', 'eigen']]), - 'shrinkage': (hp.uniform, [0.1, 0.9]) + 'solver': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['svd', 'lsqr', 'eigen']], + 'type': 'categorical'}, + 'shrinkage': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.9], + 'type': 'continuous'} + }, + 'ts_naive_average': { + 'part_for_averaging': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 1], + 'type': 'continuous'} + }, + 'locf': { + 'part_for_repeat': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 0.5], + 'type': 'continuous'} }, - 'ts_naive_average': {'part_for_averaging': (hp.uniform, [0.1, 1])}, - 'locf': {'part_for_repeat': (hp.uniform, [0.01, 0.5])}, 'word2vec_pretrained': { - 'model_name': (hp.choice, [['glove-twitter-25', 'glove-twitter-50', - 'glove-wiki-gigaword-100', 'word2vec-ruscorpora-300']]) + 'model_name': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['glove-twitter-25', 'glove-twitter-50', + 'glove-wiki-gigaword-100', 'word2vec-ruscorpora-300']], + 'type': 'categorical'} }, 'tfidf': { - 'ngram_range': (hp.choice, [[(1, 1), (1, 2), (1, 3)]]), - 'min_df': (hp.uniform, [0.0001, 0.1]), - 'max_df': (hp.uniform, [0.9, 0.99]) + 'ngram_range': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[(1, 1), (1, 2), (1, 3)]], + 'type': 'categorical'}, + 'min_df': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.0001, 0.1], + 'type': 'continuous'}, + 'max_df': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.9, 0.99], + 'type': 'continuous'} }, } diff --git a/fedot/core/pipelines/tuning/tuner_builder.py b/fedot/core/pipelines/tuning/tuner_builder.py index edeaa55dad..fa889e0026 100644 --- a/fedot/core/pipelines/tuning/tuner_builder.py +++ b/fedot/core/pipelines/tuning/tuner_builder.py @@ -1,9 +1,8 @@ from datetime import timedelta -from typing import Callable, Type, Union +from typing import Type, Union from golem.core.tuning.simultaneous import SimultaneousTuner -from golem.core.tuning.tuner_interface import HyperoptTuner -from hyperopt import tpe +from golem.core.tuning.tuner_interface import BaseTuner from fedot.core.constants import DEFAULT_TUNING_ITERATIONS_NUMBER from fedot.core.data.data import InputData @@ -29,11 +28,11 @@ def __init__(self, task: Task): self.early_stopping_rounds = None self.timeout = timedelta(minutes=5) self.search_space = PipelineSearchSpace() - self.algo = tpe.suggest self.eval_time_constraint = None + self.additional_params = {} self.adapter = PipelineAdapter() - def with_tuner(self, tuner: Type[HyperoptTuner]): + def with_tuner(self, tuner: Type[BaseTuner]): self.tuner_class = tuner return self @@ -85,15 +84,15 @@ def with_search_space(self, search_space: PipelineSearchSpace): self.search_space = search_space return self - def with_algo(self, algo: Callable): - self.algo = algo - return self - def with_adapter(self, adapter): self.adapter = adapter return self - def build(self, data: InputData) -> HyperoptTuner: + def with_additional_params(self, **parameters): + self.additional_params = parameters + return self + + def build(self, data: InputData) -> BaseTuner: objective = MetricsObjective(self.metric) data_producer = DataSourceSplitter(self.cv_folds, self.validation_blocks).build(data) objective_evaluate = PipelineObjectiveEvaluate(objective, data_producer, @@ -106,6 +105,6 @@ def build(self, data: InputData) -> HyperoptTuner: early_stopping_rounds=self.early_stopping_rounds, timeout=self.timeout, search_space=self.search_space, - algo=self.algo, - n_jobs=self.n_jobs) + n_jobs=self.n_jobs, + **self.additional_params) return tuner diff --git a/other_requirements/extra.txt b/other_requirements/extra.txt index 676f9caf87..d896e3d2a6 100644 --- a/other_requirements/extra.txt +++ b/other_requirements/extra.txt @@ -3,7 +3,6 @@ tensorflow >= 2.8.0; python_version >= '3.8' torch >= 1.9.0 # Images -imageio >= 2.8, <= 2.10 opencv-python >= 4.5.5.64 Pillow >= 8.2.0 diff --git a/test/integration/api_params/test_main_api_params.py b/test/integration/api_params/test_main_api_params.py index a7e0eac13c..4ef7f18818 100644 --- a/test/integration/api_params/test_main_api_params.py +++ b/test/integration/api_params/test_main_api_params.py @@ -74,5 +74,5 @@ def test_timeout(case: TimeoutParams): @pytest.mark.parametrize('input_params', [{'use_input_preprocessing': False}]) def test_main_api_params_of_type(input_params: dict): model = Fedot(problem='ts_forecasting', **input_params) - parsed_params = model.params.to_dict() + parsed_params = model.params assert input_params.items() <= parsed_params.items() diff --git a/test/integration/classification/test_classification.py b/test/integration/classification/test_classification.py index 99db05825f..3ecd399fe0 100644 --- a/test/integration/classification/test_classification.py +++ b/test/integration/classification/test_classification.py @@ -5,7 +5,7 @@ except ModuleNotFoundError: from golem.utilities.requirements_notificator import warn_requirement - warn_requirement('tensorflow') + warn_requirement('tensorflow', 'fedot[extra]') from test.unit.common_tests import is_predict_ignores_target from test.unit.tasks.test_classification import get_image_classification_data diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py index d027c884bf..ff0a7df811 100644 --- a/test/integration/pipelines/tuning/test_pipeline_tuning.py +++ b/test/integration/pipelines/tuning/test_pipeline_tuning.py @@ -4,9 +4,11 @@ import numpy as np import pytest +from golem.core.tuning.hyperopt_tuner import get_node_parameters_for_hyperopt +from golem.core.tuning.iopt_tuner import IOptTuner from golem.core.tuning.sequential import SequentialTuner from golem.core.tuning.simultaneous import SimultaneousTuner -from hyperopt import hp, tpe, rand +from hyperopt import hp from hyperopt.pyll.stochastic import sample as hp_sample from sklearn.metrics import mean_squared_error as mse, accuracy_score as acc @@ -131,27 +133,56 @@ def get_class_losses(): def get_not_default_search_space(): custom_search_space = { 'logit': { - 'C': (hp.uniform, [0.01, 5.0]) + 'C': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-1, 5.0], + 'type': 'continuous'} }, 'ridge': { - 'alpha': (hp.uniform, [0.01, 5.0]) + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 5.0], + 'type': 'continuous'} }, 'lgbmreg': { - 'min_samples_leaf': (hp.uniform, [1e-3, 0.5]), - 'max_depth': (hp.choice, [[2.5, 3.5, 4.5]]), - 'learning_rate': (hp.choice, [[1e-3, 1e-2, 1e-1]]), - 'subsample': (hp.uniform, [0.15, 1]) + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.05, 0.1], + 'type': 'continuous'}, + 'colsample_bytree': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.2, 0.8], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.8], + 'type': 'continuous'} }, 'dt': { - 'min_samples_leaf': (hp.uniform, [1e-3, 0.5]), - 'max_depth': (hp.choice, [[-1]]) + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [10, 25], + 'type': 'discrete'} }, 'ar': { - 'lag_1': (hp.uniform, [2, 100]), - 'lag_2': (hp.uniform, [2, 500]) + 'lag_1': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [2, 100], + 'type': 'continuous'}, + 'lag_2': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [2, 500], + 'type': 'continuous'} }, 'pca': { - 'n_components': (hp.uniform, [0.2, 0.8]) + 'n_components': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.5], + 'type': 'continuous'} } } return PipelineSearchSpace(custom_search_space=custom_search_space) @@ -170,54 +201,30 @@ def custom_minimized_metrics(real_data: InputData, pred_data: OutputData): def run_pipeline_tuner(train_data, pipeline, loss_function, + tuner=SimultaneousTuner, search_space=PipelineSearchSpace(), cv=None, - algo=tpe.suggest, iterations=1, early_stopping_rounds=None): # Pipeline tuning pipeline_tuner = TunerBuilder(train_data.task) \ - .with_tuner(SimultaneousTuner) \ + .with_tuner(tuner) \ .with_metric(loss_function) \ .with_cv_folds(cv) \ .with_iterations(iterations) \ .with_early_stopping_rounds(early_stopping_rounds) \ .with_search_space(search_space) \ - .with_algo(algo) \ .build(train_data) tuned_pipeline = pipeline_tuner.tune(pipeline) return pipeline_tuner, tuned_pipeline -def run_sequential_tuner(train_data, - pipeline, - loss_function, - search_space=PipelineSearchSpace(), - cv=None, - algo=tpe.suggest, - iterations=1, - early_stopping_rounds=None): - # Pipeline tuning - sequential_tuner = TunerBuilder(train_data.task) \ - .with_tuner(SequentialTuner) \ - .with_metric(loss_function) \ - .with_cv_folds(cv) \ - .with_iterations(iterations) \ - .with_early_stopping_rounds(early_stopping_rounds) \ - .with_search_space(search_space) \ - .with_algo(algo) \ - .build(train_data) - tuned_pipeline = sequential_tuner.tune(pipeline) - return sequential_tuner, tuned_pipeline - - def run_node_tuner(train_data, pipeline, loss_function, search_space=PipelineSearchSpace(), cv=None, node_index=0, - algo=tpe.suggest, iterations=1, early_stopping_rounds=None): # Pipeline tuning @@ -226,7 +233,6 @@ def run_node_tuner(train_data, .with_metric(loss_function) \ .with_cv_folds(cv) \ .with_iterations(iterations) \ - .with_algo(algo) \ .with_search_space(search_space) \ .with_early_stopping_rounds(early_stopping_rounds) \ .build(train_data) @@ -252,15 +258,17 @@ def test_custom_params_setter(data_fixture, request): [('regression_dataset', get_regr_pipelines(), get_regr_losses()), ('classification_dataset', get_class_pipelines(), get_class_losses()), ('multi_classification_dataset', get_class_pipelines(), get_class_losses())]) -def test_pipeline_tuner_correct(data_fixture, pipelines, loss_functions, request): - """ Test SimultaneousTuner for pipeline based on hyperopt library """ +@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner]) +def test_pipeline_tuner_correct(data_fixture, pipelines, loss_functions, request, tuner): + """ Test all tuners for pipeline """ data = request.getfixturevalue(data_fixture) cvs = [None, 2] for pipeline in pipelines: for loss_function in loss_functions: for cv in cvs: - pipeline_tuner, tuned_pipeline = run_pipeline_tuner(train_data=data, + pipeline_tuner, tuned_pipeline = run_pipeline_tuner(tuner=tuner, + train_data=data, pipeline=pipeline, loss_function=loss_function, cv=cv) @@ -272,9 +280,11 @@ def test_pipeline_tuner_correct(data_fixture, pipelines, loss_functions, request assert is_tuning_finished -def test_pipeline_tuner_with_no_parameters_to_tune(classification_dataset): +@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner]) +def test_pipeline_tuner_with_no_parameters_to_tune(classification_dataset, tuner): pipeline = get_pipeline_with_no_params_to_tune() - pipeline_tuner, tuned_pipeline = run_pipeline_tuner(train_data=classification_dataset, + pipeline_tuner, tuned_pipeline = run_pipeline_tuner(tuner=tuner, + train_data=classification_dataset, pipeline=pipeline, loss_function=ClassificationMetricsEnum.ROCAUC, iterations=20) @@ -283,14 +293,16 @@ def test_pipeline_tuner_with_no_parameters_to_tune(classification_dataset): assert not tuned_pipeline.is_fitted -def test_pipeline_tuner_with_initial_params(classification_dataset): - """ Test SimultaneousTuner based on hyperopt library for pipeline with initial parameters """ +@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner]) +def test_pipeline_tuner_with_initial_params(classification_dataset, tuner): + """ Test all tuners for pipeline with initial parameters """ # a model node = PipelineNode(content={'name': 'xgboost', 'params': {'max_depth': 3, 'learning_rate': 0.03, 'min_child_weight': 2}}) pipeline = Pipeline(node) - pipeline_tuner, tuned_pipeline = run_pipeline_tuner(train_data=classification_dataset, + pipeline_tuner, tuned_pipeline = run_pipeline_tuner(tuner=tuner, + train_data=classification_dataset, pipeline=pipeline, loss_function=ClassificationMetricsEnum.ROCAUC, iterations=20) @@ -302,14 +314,16 @@ def test_pipeline_tuner_with_initial_params(classification_dataset): [('regression_dataset', get_regr_pipelines(), get_regr_losses()), ('classification_dataset', get_class_pipelines(), get_class_losses()), ('multi_classification_dataset', get_class_pipelines(), get_class_losses())]) -def test_pipeline_tuner_with_custom_search_space(data_fixture, pipelines, loss_functions, request): - """ Test SimultaneousTuner with different search spaces """ +@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner]) +def test_pipeline_tuner_with_custom_search_space(data_fixture, pipelines, loss_functions, request, tuner): + """ Test tuners with different search spaces """ data = request.getfixturevalue(data_fixture) train_data, test_data = train_test_data_setup(data=data) search_spaces = [PipelineSearchSpace(), get_not_default_search_space()] for search_space in search_spaces: - pipeline_tuner, _ = run_pipeline_tuner(train_data=train_data, + pipeline_tuner, _ = run_pipeline_tuner(tuner=tuner, + train_data=train_data, pipeline=pipelines[0], loss_function=loss_functions[0], search_space=search_space) @@ -320,52 +334,6 @@ def test_pipeline_tuner_with_custom_search_space(data_fixture, pipelines, loss_f assert is_tuning_finished -@pytest.mark.parametrize('data_fixture, pipelines, loss_functions', - [('regression_dataset', get_regr_pipelines(), get_regr_losses()), - ('classification_dataset', get_class_pipelines(), get_class_losses()), - ('multi_classification_dataset', get_class_pipelines(), get_class_losses())]) -def test_sequential_tuner_correct(data_fixture, pipelines, loss_functions, request): - """ Test SequentialTuner for pipeline based on hyperopt library """ - data = request.getfixturevalue(data_fixture) - cvs = [None, 2] - - for pipeline in pipelines: - for loss_function in loss_functions: - for cv in cvs: - sequential_tuner, tuned_pipeline = run_sequential_tuner(train_data=data, - pipeline=pipeline, - loss_function=loss_function, - cv=cv) - assert sequential_tuner.obtained_metric is not None - assert not tuned_pipeline.is_fitted - - is_tuning_finished = True - - assert is_tuning_finished - - -@pytest.mark.parametrize('data_fixture, pipelines, loss_functions', - [('regression_dataset', get_regr_pipelines(), get_regr_losses()), - ('classification_dataset', get_class_pipelines(), get_class_losses()), - ('multi_classification_dataset', get_class_pipelines(), get_class_losses())]) -def test_sequential_tuner_with_custom_search_space(data_fixture, pipelines, loss_functions, request): - """ Test SequentialTuner with different search spaces """ - data = request.getfixturevalue(data_fixture) - train_data, test_data = train_test_data_setup(data=data) - search_spaces = [PipelineSearchSpace(), get_not_default_search_space()] - - for search_space in search_spaces: - sequential_tuner, _ = run_sequential_tuner(train_data=train_data, - pipeline=pipelines[0], - loss_function=loss_functions[0], - search_space=search_space) - assert sequential_tuner.obtained_metric is not None - - is_tuning_finished = True - - assert is_tuning_finished - - @pytest.mark.parametrize('data_fixture, pipelines, loss_functions', [('regression_dataset', get_regr_pipelines(), get_regr_losses()), ('classification_dataset', get_class_pipelines(), get_class_losses()), @@ -413,8 +381,9 @@ def test_certain_node_tuner_with_custom_search_space(data_fixture, pipelines, lo @pytest.mark.parametrize('n_steps', [100, 133, 217, 300]) -def test_ts_pipeline_with_stats_model(n_steps): - """ Tests SimultaneousTuner for time series forecasting task with AR model """ +@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner]) +def test_ts_pipeline_with_stats_model(n_steps, tuner): + """ Tests tuners for time series forecasting task with AR model """ train_data, test_data = get_ts_data(n_steps=n_steps, forecast_length=5) ar_pipeline = Pipeline(PipelineNode('ar')) @@ -422,10 +391,10 @@ def test_ts_pipeline_with_stats_model(n_steps): for search_space in [PipelineSearchSpace(), get_not_default_search_space()]: # Tune AR model tuner_ar = TunerBuilder(train_data.task) \ - .with_tuner(SimultaneousTuner) \ + .with_tuner(tuner) \ .with_metric(RegressionMetricsEnum.MSE) \ .with_iterations(3) \ - .with_search_space(search_space).with_algo(rand.suggest).build(train_data) + .with_search_space(search_space).build(train_data) _ = tuner_ar.tune(ar_pipeline) is_tuning_finished = True @@ -439,7 +408,8 @@ def test_early_stop_in_tuning(data_fixture, request): train_data, test_data = train_test_data_setup(data=data) start_pipeline_tuner = time() - _ = run_pipeline_tuner(train_data=train_data, + _ = run_pipeline_tuner(tuner=SimultaneousTuner, + train_data=train_data, pipeline=get_class_pipelines()[0], loss_function=ClassificationMetricsEnum.ROCAUC, iterations=1000, @@ -447,11 +417,12 @@ def test_early_stop_in_tuning(data_fixture, request): assert time() - start_pipeline_tuner < 1 start_sequential_tuner = time() - _ = run_sequential_tuner(train_data=train_data, - pipeline=get_class_pipelines()[0], - loss_function=ClassificationMetricsEnum.ROCAUC, - iterations=1000, - early_stopping_rounds=1) + _ = run_pipeline_tuner(tuner=SequentialTuner, + train_data=train_data, + pipeline=get_class_pipelines()[0], + loss_function=ClassificationMetricsEnum.ROCAUC, + iterations=1000, + early_stopping_rounds=1) assert time() - start_sequential_tuner < 1 start_node_tuner = time() @@ -466,18 +437,24 @@ def test_early_stop_in_tuning(data_fixture, request): def test_search_space_correctness_after_customization(): default_search_space = PipelineSearchSpace() - custom_search_space = {'gbr': {'max_depth': (hp.choice, [[3, 7, 31, 127, 8191, 131071]])}} + custom_search_space = {'gbr': {'max_depth': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[3, 7, 31, 127, 8191, 131071]], + 'type': 'categorical'}}} custom_search_space_without_replace = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=False) custom_search_space_with_replace = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=True) - default_params = default_search_space.get_node_params(node_id=0, - operation_name='gbr') - custom_without_replace_params = custom_search_space_without_replace.get_node_params(node_id=0, - operation_name='gbr') - custom_with_replace_params = custom_search_space_with_replace.get_node_params(node_id=0, - operation_name='gbr') + default_params = get_node_parameters_for_hyperopt(default_search_space, + node_id=0, + operation_name='gbr') + custom_without_replace_params = get_node_parameters_for_hyperopt(custom_search_space_without_replace, + node_id=0, + operation_name='gbr') + custom_with_replace_params = get_node_parameters_for_hyperopt(custom_search_space_with_replace, + node_id=0, + operation_name='gbr') assert default_params.keys() == custom_without_replace_params.keys() assert default_params.keys() != custom_with_replace_params.keys() @@ -490,15 +467,18 @@ def test_search_space_get_operation_parameter_range(): gbr_operations = ['loss', 'learning_rate', 'max_depth', 'min_samples_split', 'min_samples_leaf', 'subsample', 'max_features', 'alpha'] - custom_search_space = {'gbr': {'max_depth': (hp.choice, [[3, 7, 31, 127, 8191, 131071]])}} + custom_search_space = {'gbr': {'max_depth': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[3, 7, 31, 127, 8191, 131071]], + 'type': 'categorical'}}} custom_search_space_without_replace = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=False) custom_search_space_with_replace = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=True) - default_operations = default_search_space.get_operation_parameter_range('gbr') - custom_without_replace_operations = custom_search_space_without_replace.get_operation_parameter_range('gbr') - custom_with_replace_operations = custom_search_space_with_replace.get_operation_parameter_range('gbr') + default_operations = default_search_space.get_parameters_for_operation('gbr') + custom_without_replace_operations = custom_search_space_without_replace.get_parameters_for_operation('gbr') + custom_with_replace_operations = custom_search_space_with_replace.get_parameters_for_operation('gbr') assert default_operations == gbr_operations assert custom_without_replace_operations == gbr_operations @@ -510,18 +490,19 @@ def test_complex_search_space(): for i in range(20): operation_parameters = space.parameters_per_operation.get("glm") new_value = hp_sample(operation_parameters["nested_space"]) - for params in new_value[1][0]: + for params in new_value['sampling-scope'][0]: assert params['link'] in GLMImplementation.family_distribution[params['family']]['available_links'] -def test_complex_search_space_tuning_correct(): +@pytest.mark.parametrize('tuner', [SimultaneousTuner, SequentialTuner, IOptTuner]) +def test_complex_search_space_tuning_correct(tuner): """ Tests SimultaneousTuner for time series forecasting task with GLM model that has a complex glm search space""" train_data, test_data = get_ts_data(n_steps=200, forecast_length=5) glm_pipeline = Pipeline(PipelineNode('glm')) glm_custom_params = glm_pipeline.nodes[0].parameters tuner = TunerBuilder(train_data.task) \ - .with_tuner(SimultaneousTuner) \ + .with_tuner(tuner) \ .with_metric(RegressionMetricsEnum.MSE) \ .with_iterations(100) \ .build(train_data) diff --git a/test/integration/pipelines/tuning/test_tuner_builder.py b/test/integration/pipelines/tuning/test_tuner_builder.py index 978353dbfa..0694cc850c 100644 --- a/test/integration/pipelines/tuning/test_tuner_builder.py +++ b/test/integration/pipelines/tuning/test_tuner_builder.py @@ -3,10 +3,11 @@ import numpy as np import pytest +from golem.core.tuning.hyperopt_tuner import HyperoptTuner +from golem.core.tuning.iopt_tuner import IOptTuner from golem.core.tuning.sequential import SequentialTuner from golem.core.tuning.simultaneous import SimultaneousTuner -from golem.core.tuning.tuner_interface import HyperoptTuner -from hyperopt import tpe, rand +from hyperopt import tpe from fedot.core.constants import DEFAULT_TUNING_ITERATIONS_NUMBER from fedot.core.data.data import InputData @@ -43,7 +44,7 @@ def test_tuner_builder_with_default_params(): assert tuner.max_seconds == 300 -@pytest.mark.parametrize('tuner_class', [SimultaneousTuner, SequentialTuner]) +@pytest.mark.parametrize('tuner_class', [SimultaneousTuner, SequentialTuner, IOptTuner]) def test_tuner_builder_with_custom_params(tuner_class): data = get_classification_data() pipeline = pipeline_first_test() @@ -55,7 +56,6 @@ def test_tuner_builder_with_custom_params(tuner_class): timeout = timedelta(minutes=2) early_stopping = 100 iterations = 10 - algo = rand.suggest search_space = get_not_default_search_space() tuner = ( @@ -67,7 +67,6 @@ def test_tuner_builder_with_custom_params(tuner_class): .with_timeout(timeout) .with_early_stopping_rounds(early_stopping) .with_iterations(iterations) - .with_algo(algo) .with_search_space(search_space) .build(data) ) @@ -76,5 +75,4 @@ def test_tuner_builder_with_custom_params(tuner_class): assert np.isclose(tuner.objective_evaluate(pipeline).value, objective_evaluate.evaluate(pipeline).value) assert tuner.search_space == search_space assert tuner.iterations == iterations - assert tuner.algo == algo - assert tuner.max_seconds == int(timeout.seconds) + assert tuner.timeout.seconds == int(timeout.seconds)