Add IOptTuner (#1102)

* Refactor search space * Fix ParametersChanges * Redact tuner builder * PEP 8 * Fix examples and correct tuning docs * Add IOpt example to docs * Add simple IOpt example * Fix integration tests (6 steal not work) * Fix integration test test_tuner_builder_with_custom_params (6 steal not work) * Fix pep8 * Add tuners comparison in docs * Update golem version in requirements * Fix warn_requirements * Fix warn_requirements * Fix table in tuning docs * Fix credit_scoring_problem_multiobj.py * Fix extra requirements * Edit docs * Fix table in docs * Change requirements * Test requirements * Set stable branch
aimclub · Jun 15, 2023 · 16d507f · 16d507f
1 parent 40312cb
commit 16d507f
Show file tree

Hide file tree

Showing 25 changed files with 1,022 additions and 388 deletions.
diff --git a/cases/credit_scoring/credit_scoring_problem_multiobj.py b/cases/credit_scoring/credit_scoring_problem_multiobj.py
@@ -26,10 +26,10 @@
 
 
 def results_visualization(history, composed_pipelines):
-    visualiser = OptHistoryExtraVisualizer()
-    visualiser.visualise_history(history)
-    visualiser.pareto_gif_create(history.archive_history, history.individuals)
-    visualiser.boxplots_gif_create(history.individuals)
+    visualiser = OptHistoryExtraVisualizer(history)
+    visualiser.visualise_history()
+    visualiser.pareto_gif_create()
+    visualiser.boxplots_gif_create()
     for pipeline_evo_composed in composed_pipelines:
         pipeline_evo_composed.show()
 

diff --git a/cases/river_levels_prediction/river_level_case_composer.py b/cases/river_levels_prediction/river_level_case_composer.py
@@ -168,6 +168,6 @@ def run_river_composer_experiment(file_path, init_pipeline, file_to_save,
     # Available tuners for application: SimultaneousTuner, SequentialTuner
     run_river_composer_experiment(file_path='../data/river_levels/station_levels.csv',
                                   init_pipeline=init_pipeline,
-                                  file_to_save='data/river_levels/old_composer_new_preprocessing_report.csv',
+                                  file_to_save='../data/river_levels/old_composer_new_preprocessing_report.csv',
                                   iterations=20,
                                   tuner=SimultaneousTuner)
diff --git a/docs/source/advanced/hyperparameters_tuning.rst b/docs/source/advanced/hyperparameters_tuning.rst
@@ -2,14 +2,70 @@ Tuning of Hyperparameters
 =========================
 To tune pipeline hyperparameters you can use GOLEM. There are two ways:
 
-1. Tuning of all models hyperparameters simultaneously. Implemented via ``SimultaneousTuner`` class.
+1. Tuning of all models hyperparameters simultaneously. Implemented via ``SimultaneousTuner`` and ``IOptTuner`` classes.
 
-2. Tuning of models hyperparameters sequentially node by node optimizing metric value for the whole pipeline. Implemented via ``SequentialTuner`` class.
+2. Tuning of models hyperparameters sequentially node by node optimizing metric value for the whole pipeline or tuning
+   only one node hyperparametrs. Implemented via ``SequentialTuner`` class.
 
 More information about these approaches can be found
 `here <https://towardsdatascience.com/hyperparameters-tuning-for-machine-learning-model-ensembles-8051782b538b>`_.
 
-If ``with_tuning`` flag is set to ``True`` when using :doc:`FEDOT API </api/api>`, simultaneous hyperparameters tuning is applied for composed pipeline and ``metric`` value is used as a metric for tuning.
+If ``with_tuning`` flag is set to ``True`` when using :doc:`FEDOT API </api/api>`, simultaneous hyperparameters tuning
+using ``SimultaneousTuner`` is applied for composed pipeline and ``metric`` value is used as a metric for tuning.
+
+FEDOT uses tuners implementation from GOLEM, see `GOLEM documentation`_ for more information.
+
+.. list-table:: Tuners comparison
+   :widths: 10 30 30 30
+   :header-rows: 1
+
+   * -
+     - ``SimultaneousTuner``
+     - ``SequentialTuner``
+     - ``IOptTuner``
+   * - Based on
+     - Hyperopt
+     - Hyperopt
+     - iOpt
+   * - Type of tuning
+     - Simultaneous
+     - | Sequential or
+       | for one node only
+     - Simultaneous
+   * - | Optimized
+       | parameters
+     - | categorical
+       | discrete
+       | continuous
+     - | categorical
+       | discrete
+       | continuous
+     - | discrete
+       | continuous
+   * - Algorithm type
+     - stochastic
+     - stochastic
+     - deterministic
+   * - | Supported
+       | constraints
+     - | timeout
+       | iterations
+       | early_stopping_rounds
+       | eval_time_constraint
+     - | timeout
+       | iterations
+       | early_stopping_rounds
+       | eval_time_constraint
+     - | iterations
+       | eval_time_constraint
+   * - | Supports initial
+       | point
+     - Yes
+     - No
+     - No
+
+Hyperopt based tuners usually take less time for one iteration, but ``IOptTuner`` is able to obtain much more stable results.
+
 
 Simple example
 ~~~~~~~~~~~~~~
@@ -45,7 +101,7 @@ To initialize a tuner you can use ``TunerBuilder``.
 * with_timeout_
 * with_eval_time_constraint_
 * with_search_space_
-* with_algo_
+* with_additional_params_
 
 Tuner class
 -----------
@@ -183,15 +239,31 @@ To customize search space use ``PipelineSearchSpace`` class.
 
     custom_search_space = {
         'logit': {
-            'C': (hp.uniform, [0.01, 5.0])
+            'C': {
+                'hyperopt-dist': hp.uniform,
+                'sampling-scope': [1e-1, 5.0],
+                'type': 'continuous'}
         },
         'pca': {
-            'n_components': (hp.uniform, [0.2, 0.8])
+            'n_components': {
+                'hyperopt-dist': hp.uniform,
+                'sampling-scope': [0.1, 0.5],
+                'type': 'continuous'}
         },
         'knn': {
-            'n_neighbors': (hp.uniformint, [1, 6]),
-            'weights': (hp.choice, [["uniform", "distance"]]),
-            'p': (hp.choice, [[1, 2]])}
+            'n_neighbors': {
+                'hyperopt-dist': hp.uniformint,
+                'sampling-scope': [1, 20],
+                'type': 'discrete'},
+            'weights': {
+                'hyperopt-dist': hp.choice,
+                'sampling-scope': [["uniform", "distance"]],
+                'type': 'categorical'},
+            'p': {
+                'hyperopt-dist': hp.choice,
+                'sampling-scope': [[1, 2]],
+                'type': 'categorical'}
+        }
     }
     search_space = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=True)
 
@@ -201,20 +273,35 @@ To customize search space use ``PipelineSearchSpace`` class.
 
     tuned_pipeline = pipeline_tuner.tune(pipeline)
 
-Algorithm
----------
+Additional parameters
+---------------------
+
+.. _with_additional_params:
+
+If there is no ``TunerBuilder`` function to set a specific parameter of a tuner use ``.with_additional_params()``.
+
+Possible additional parameters you can see in the `GOLEM documentation`_.
 
-.. _with_algo:
+For example, you can set algorithm for with signature similar to ``hyperopt.tse.suggest`` for ``SimultaneousTuner`` or
+``SequentialTuner``.
 
-You can set algorithm for hyperparameters optimization with signature similar to ``hyperopt.tse.suggest``.
 By default, ``hyperopt.tse.suggest`` is used.
 
 .. code-block:: python
 
-    algo = hyperopt.rand.suggest
+    pipeline_tuner = TunerBuilder(Task(TaskTypesEnum.classification)) \
+        .with_additional_params(algo = hyperopt.rand.suggest) \
+        .build(train_data)
+
+    tuned_pipeline = pipeline_tuner.tune(pipeline)
+
+For ``IOptTuner`` such parameters as ``r``, ``evolvent_density``, ``eps_r`` and etc can be set.
+
+.. code-block:: python
 
     pipeline_tuner = TunerBuilder(Task(TaskTypesEnum.classification)) \
-        .with_algo(algo) \
+        .with_tuner(IOptTuner) \
+        .with_additional_params(r = 1, evolvent_density = 5) \
         .build(train_data)
 
     tuned_pipeline = pipeline_tuner.tune(pipeline)
@@ -260,8 +347,9 @@ Constraints
 Examples
 ~~~~~~~~
 
-Tuning all hyperparameters simultaniously
+Tuning all hyperparameters simultaneously
 -----------------------------------------
+Example for ``SimultaneousTuner``:
 
 .. code-block:: python
 
@@ -295,12 +383,24 @@ Tuning all hyperparameters simultaniously
 
     custom_search_space = {
         'logit': {
-            'C': (hp.uniform, [0.01, 5.0])
+            'C': {
+                'hyperopt-dist': hp.uniform,
+                'sampling-scope': [0.01, 5.0],
+                'type': 'continuous'}
         },
         'knn': {
-            'n_neighbors': (hp.uniformint, [1, 6]),
-            'weights': (hp.choice, [["uniform", "distance"]]),
-            'p': (hp.choice, [[1, 2]])}
+            'n_neighbors': {
+                'hyperopt-dist': hp.uniformint,
+                'sampling-scope': [1, 20],
+                'type': 'discrete'},
+            'weights': {
+                'hyperopt-dist': hp.choice,
+                'sampling-scope': [["uniform", "distance"]],
+                'type': 'categorical'},
+            'p': {
+                'hyperopt-dist': hp.choice,
+                'sampling-scope': [[1, 2]],
+                'type': 'categorical'}}
     }
     search_space = PipelineSearchSpace(custom_search_space=custom_search_space, replace_default_search_space=True)
 
@@ -319,7 +419,7 @@ Tuning all hyperparameters simultaniously
         .with_early_stopping_rounds(early_stopping_rounds) \
         .with_timeout(timeout) \
         .with_search_space(search_space) \
-        .with_algo(algo) \
+        .with_additional_params(algo=algo) \
         .with_eval_time_constraint(eval_time_constraint) \
         .build(train_data)
 
@@ -339,6 +439,60 @@ Tuned pipeline structure:
     rf - {'n_jobs': 1, 'bootstrap': True, 'criterion': 'entropy', 'max_features': 0.46348491415788157, 'min_samples_leaf': 11, 'min_samples_split': 2, 'n_estimators': 100}
     logit - {'C': 3.056080157518786}
 
+
+Example for ``IOptTuner``:
+
+.. code-block:: python
+
+    import datetime
+    from golem.core.tuning.iopt_tuner import IOptTuner
+    from fedot.core.data.data import InputData
+    from fedot.core.pipelines.pipeline_builder import PipelineBuilder
+    from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
+    from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
+    from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum
+    from fedot.core.repository.tasks import TaskTypesEnum, Task
+
+    task = Task(TaskTypesEnum.regression)
+
+    tuner = IOptTuner
+
+    requirements = PipelineComposerRequirements(cv_folds=2, n_jobs=2)
+
+    metric = RegressionMetricsEnum.MSE
+
+    iterations = 100
+
+    eval_time_constraint = datetime.timedelta(seconds=30)
+
+    train_data = InputData.from_csv('train_data.csv', task='regression')
+
+    pipeline = PipelineBuilder().add_node('knnreg', branch_idx=0).add_branch('rfr', branch_idx=1) \
+        .join_branches('knnreg').build()
+
+    pipeline_tuner = TunerBuilder(task) \
+        .with_tuner(tuner) \
+        .with_requirements(requirements) \
+        .with_metric(metric) \
+        .with_iterations(iterations) \
+        .with_additional_params(eps=0.02, r=1, refine_solution=True) \
+        .with_eval_time_constraint(eval_time_constraint) \
+        .build(train_data)
+
+    tuned_pipeline = pipeline_tuner.tune(pipeline)
+
+    tuned_pipeline.print_structure()
+
+Tuned pipeline structure:
+
+.. code-block:: python
+
+    Pipeline structure:
+    {'depth': 2, 'length': 3, 'nodes': [knnreg, knnreg, rfr]}
+    knnreg - {'n_neighbors': 51}
+    knnreg - {'n_neighbors': 40}
+    rfr - {'n_jobs': 1, 'max_features': 0.05324707031250003, 'min_samples_split': 12, 'min_samples_leaf': 11}
+
 Sequential tuning
 -----------------
 
@@ -476,3 +630,5 @@ Another examples can be found here:
 **Multitask**
 
 * `Multitask pipeline: classification and regression <https://github.com/nccr-itmo/FEDOT/blob/master/examples/advanced/multitask_classification_regression.py>`_
+
+.. _GOLEM documentation: https://thegolem.readthedocs.io/en/latest/api/tuning.html
diff --git a/examples/advanced/time_series_forecasting/custom_model_tuning.py b/examples/advanced/time_series_forecasting/custom_model_tuning.py
@@ -94,16 +94,23 @@ def run_pipeline_tuning(time_series, len_forecast, pipeline_type):
         # Setting custom search space for tuner (necessary)
         # model and output_type should be wrapped into hyperopt
         custom_search_space = {'custom': {
-            'alpha': (hp.uniform, [0.01, 10]),
-            'model_predict': (hp.choice, [[custom_ml_model_imitation_predict]]),
-            'model_fit': (hp.choice, [[custom_ml_model_imitation_fit]])}}
+            'alpha': {
+                'hyperopt-dist': hp.uniform,
+                'sampling-scope': [0.01, 10],
+                'type': 'continuous'}}}
     elif pipeline_type == 'without_fit':
         pipeline = get_domain_pipeline()
         # Setting custom search space for tuner (necessary)
         # model and output_type should be wrapped into hyperopt
-        custom_search_space = {'custom': {'a': (hp.uniform, [-100, 100]),
-                                          'b': (hp.uniform, [0, 1000]),
-                                          'model_predict': (hp.choice, [[domain_model_imitation_predict]])}}
+        custom_search_space = {'custom': {
+            'a': {
+                'hyperopt-dist': hp.uniform,
+                'sampling-scope': [-100, 100],
+                'type': 'continuous'},
+            'b': {
+                'hyperopt-dist': hp.uniform,
+                'sampling-scope': [0, 1000],
+                'type': 'continuous'}}}
     pipeline.fit_from_scratch(train_input)
     pipeline.print_structure()
     # Get prediction with initial approximation

diff --git a/examples/simple/classification/image_classification_problem.py b/examples/simple/classification/image_classification_problem.py
@@ -6,7 +6,7 @@
 try:
     import tensorflow as tf
 except ModuleNotFoundError:
-    warn_requirement('tensorflow')
+    warn_requirement('tensorflow', 'fedot[extra]')
 
 from sklearn.metrics import roc_auc_score as roc_auc