Bugfixes (#1153)

1. Fix #1148 with fixed denominator in CGRU and add test for new code 2. Fix #1151 with set n_jobs=1 for some operations 3. Add initial assumption with AR (#1074), enable AR (#1137) 4. Check and add test in accordance with #739 5. Fix integration test `test_result_changing`
aimclub · Sep 3, 2023 · 5da1447 · 5da1447
1 parent cebd493
commit 5da1447
Show file tree

Hide file tree

Showing 9 changed files with 137 additions and 17 deletions.
diff --git a/fedot/api/api_utils/assumptions/task_assumptions.py b/fedot/api/api_utils/assumptions/task_assumptions.py
@@ -58,7 +58,10 @@ def builders(self):
                 PipelineBuilder()
                 .add_branch('polyfit', 'lagged')
                 .grow_branches(None, 'ridge')
-                .join_branches('ridge')
+                .join_branches('ridge'),
+            'smoothing_ar':
+                PipelineBuilder()
+                .add_sequence('smoothing', 'ar'),
         }
 
     def ensemble_operation(self) -> str:

diff --git a/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/cgru.py b/fedot/core/operations/evaluation/operation_implementations/models/ts_implementations/cgru.py
@@ -150,7 +150,7 @@ def _fit_transform_scaler(self, data: InputData):
         return f_scaled, t_scaled
 
     def _scale(self, array: np.ndarray):
-        return (array - self.mu) / self.std
+        return (array - self.mu) / (self.std + 1e-6)
 
     def _inverse_scale(self, array: np.ndarray):
         return array * self.std + self.mu

diff --git a/fedot/core/pipelines/pipeline.py b/fedot/core/pipelines/pipeline.py
@@ -404,10 +404,6 @@ def replace_n_jobs_in_nodes(self, n_jobs: int):
             for param in ['n_jobs', 'num_threads']:
                 if param in node.content['params']:
                     node.content['params'][param] = n_jobs
-                    # workaround for lgbm paramaters
-                    if node.content['name'] == 'lgbm':
-                        node.content['params']['num_threads'] = n_jobs
-                        node.content['params']['n_jobs'] = n_jobs
 
     @copy_doc(Graph.show)
     def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optional[str] = None,

diff --git a/fedot/core/pipelines/pipeline_node_factory.py b/fedot/core/pipelines/pipeline_node_factory.py
@@ -1,5 +1,5 @@
 from random import choice
-from typing import Optional
+from typing import Optional, List
 
 from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
 from golem.core.optimisers.graph import OptNode
@@ -59,3 +59,11 @@ def _return_node(candidates) -> Optional[OptNode]:
     @staticmethod
     def filter_specific_candidates(candidates: list):
         return sorted(list(filter(lambda x: not check_for_specific_operations(x), candidates)))
+
+    def get_all_available_operations(self) -> Optional[List[str]]:
+        """
+        Returns all available models and data operations.
+        """
+        # TODO: get_all_available_operations is abstract method in OptNodeFactory
+        #       PipelineOptNodeFactory cannot be instantiate without that method
+        raise NotImplementedError()
diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json
@@ -7,31 +7,36 @@
   },
   "xgboost": {
     "eval_metric": "mlogloss",
-    "nthread": -1
+    "nthread": 1,
+    "n_jobs": 1
   },
   "catboost": {
     "allow_writing_files": false,
-    "verbose": false
+    "verbose": false,
+    "thread_count": 1
   },
   "catboostreg": {
     "allow_writing_files": false,
-    "verbose": false
+    "verbose": false,
+    "thread_count": 1
   },
   "lgbm": {
     "num_leaves": 32,
     "colsample_bytree": 0.8,
     "subsample": 0.8,
     "subsample_freq": 10,
     "learning_rate": 0.03,
-    "n_estimators": 100
+    "n_estimators": 100,
+    "n_jobs": 1
   },
   "lgbmreg": {
     "num_leaves": 32,
     "colsample_bytree": 0.8,
     "subsample": 0.8,
     "subsample_freq": 10,
     "learning_rate": 0.03,
-    "n_estimators": 100
+    "n_estimators": 100,
+    "n_jobs": 1
   },
   "lagged": {
     "window_size": 10

diff --git a/fedot/core/repository/data/model_repository.json b/fedot/core/repository/data/model_repository.json
@@ -78,7 +78,7 @@
       ],
       "description": "Implementations of the regression models from scikit-learn framework",
       "forbidden_node_types": "[]",
-      "input_type": "[DataTypesEnum.table, DataTypesEnum.multi_ts]",
+      "input_type": "[DataTypesEnum.table]",
       "output_type": "[DataTypesEnum.table]",
       "strategies": [
         "fedot.core.operations.evaluation.regression",
@@ -136,8 +136,7 @@
         "interpretable",
         "non_lagged",
         "linear",
-        "correct_params",
-        "non-default"
+        "correct_params"
       ],
 	  "input_type": "[DataTypesEnum.ts]"
     },

diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py
@@ -23,7 +23,7 @@
 from fedot.core.pipelines.node import PipelineNode
 from fedot.core.pipelines.pipeline import Pipeline
 from fedot.core.repository.dataset_types import DataTypesEnum
-from fedot.core.repository.operation_types_repository import OperationTypesRepository
+from fedot.core.repository.operation_types_repository import OperationTypesRepository, AVAILABLE_REPO_NAMES
 from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
 from test.unit.common_tests import is_predict_ignores_target
 from test.unit.data_operations.test_time_series_operations import synthetic_univariate_ts
@@ -40,6 +40,49 @@ def check_predict_correct(model, fitted_operation, test_data):
     )
 
 
+def get_data_for_testing(task_type, data_type, length=100, features_count=1,
+                         value=0, random=True, random_seed=0):
+    allowed_data_type = {TaskTypesEnum.ts_forecasting: [DataTypesEnum.ts, DataTypesEnum.multi_ts],
+                         TaskTypesEnum.classification: [DataTypesEnum.table],
+                         TaskTypesEnum.regression: [DataTypesEnum.table]}
+    if task_type not in allowed_data_type or data_type not in allowed_data_type[task_type]:
+        return None
+
+    if task_type is TaskTypesEnum.ts_forecasting:
+        task = Task(task_type, TsForecastingParams(max(length // 10, 2)))
+        if data_type is DataTypesEnum.ts:
+            features = np.zeros(length) + value
+        else:
+            features = np.zeros((length, features_count)) + value
+        if data_type is DataTypesEnum.table:
+            target = np.zeros(length) + value
+        else:
+            target = features
+
+    else:
+        task = Task(task_type)
+        data_type = DataTypesEnum.table
+        features = np.zeros((length, features_count)) + value
+        target = np.zeros(length) + value
+        if task_type is TaskTypesEnum.classification:
+            target[:int(len(target) // 2)] = 2 * value + 1
+
+    if random and task_type is not TaskTypesEnum.classification:
+        generator = np.random.RandomState(random_seed)
+        features += generator.rand(*features.shape)
+        if task_type is TaskTypesEnum.ts_forecasting:
+            target = features
+        else:
+            target += generator.rand(*target.shape)
+
+    data = InputData(idx=np.arange(length),
+                     features=features,
+                     target=target,
+                     data_type=data_type,
+                     task=task)
+    return data
+
+
 def get_roc_auc(valid_data: InputData, predicted_data: OutputData) -> float:
     n_classes = valid_data.num_classes
     if n_classes > 2:
@@ -372,3 +415,35 @@ def test_locf_forecast_correctly():
     assert np.array_equal(fit_forecast.idx, np.array([3, 4, 5, 6, 7, 8, 9, 10]))
     # Repeated pattern (3 elements to repeat and 4 forecast horizon)
     assert np.array_equal(predict_forecast.predict, np.array([[110, 120, 130, 110]]))
+
+
+def test_models_does_not_fall_on_constant_data():
+    """ Run models on constant data """
+    # models that raise exception
+    to_skip = ['custom', 'arima', 'catboost', 'catboostreg',
+               'lda', 'fast_ica', 'decompose', 'class_decompose']
+
+    for repo_name in AVAILABLE_REPO_NAMES:
+        operation_repo = OperationTypesRepository(repo_name)
+        if operation_repo._repo is None:
+            continue
+        for operation in operation_repo._repo:
+            if operation.id in to_skip:
+                continue
+            for task_type in operation.task_type:
+                for data_type in operation.input_types:
+                    data = get_data_for_testing(task_type, data_type,
+                                                length=100, features_count=2,
+                                                random=False)
+                    if data is not None:
+                        try:
+                            nodes_from = []
+                            if task_type is TaskTypesEnum.ts_forecasting:
+                                if 'non_lagged' not in operation.tags:
+                                    nodes_from = [PipelineNode('lagged')]
+                            node = PipelineNode(operation.id, nodes_from=nodes_from)
+                            pipeline = Pipeline(node)
+                            pipeline.fit(data)
+                            assert pipeline.predict(data) is not None
+                        except NotImplementedError:
+                            pass
diff --git a/test/integration/multimodal/test_multimodal.py b/test/integration/multimodal/test_multimodal.py
@@ -1,9 +1,14 @@
+import pytest
+
 from examples.advanced.multi_modal_pipeline import prepare_multi_modal_data
 from fedot.core.data.multi_modal import MultiModalData
 from fedot.core.pipelines.node import PipelineNode
 from fedot.core.pipelines.pipeline import Pipeline
+from fedot.core.pipelines.pipeline_builder import PipelineBuilder
+from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.core.repository.tasks import Task, TaskTypesEnum
 from fedot.core.utils import fedot_project_root
+from test.integration.models.test_model import get_data_for_testing
 
 
 def generate_multi_modal_pipeline(data: MultiModalData):
@@ -31,6 +36,15 @@ def generate_multi_modal_pipeline(data: MultiModalData):
     return pipeline
 
 
+def get_simple_multimodal_data(task_type, data_type):
+    data = MultiModalData()
+    for i in range(2):
+        type_name = 'ts' if data_type is DataTypesEnum.multi_ts else data_type.name
+        data[f"data_source_{type_name}/{i}"] = get_data_for_testing(task_type=task_type, data_type=data_type,
+                                                                    length=200, features_count=2)
+    return data
+
+
 def test_multi_modal_pipeline():
     path = fedot_project_root().joinpath('test', 'data', 'multi_modal')
     task = Task(TaskTypesEnum.classification)
@@ -43,3 +57,22 @@ def test_multi_modal_pipeline():
     prediction = pipeline.predict(fit_data)
 
     assert prediction is not None
+
+
+@pytest.mark.parametrize(['task_type', 'data_type', 'pipeline'],
+                         [(TaskTypesEnum.ts_forecasting,
+                           DataTypesEnum.multi_ts,
+                           (PipelineBuilder().add_branch('data_source_ts/0', 'data_source_ts/1')
+                                             .grow_branches('lagged', 'lagged')
+                                             .join_branches('ridge')
+                                             .build()
+                            )
+                           ),
+                          ]
+                         )
+def test_multimodaldata_with_pipeline(task_type, data_type, pipeline):
+    """ Test pipeline with MultiModalData """
+    data = get_simple_multimodal_data(task_type, data_type)
+    pipeline.fit(data)
+    prediction = pipeline.predict(data)
+    assert prediction is not None
diff --git a/test/integration/real_applications/test_model_result_reproducing.py b/test/integration/real_applications/test_model_result_reproducing.py
@@ -31,7 +31,8 @@ def get_fitted_fedot(forecast_length, train_data, **kwargs):
               'timeout': None,
               'pop_size': 50,
               'num_of_generations': 5}
-    fedot = Fedot(**(params | kwargs))
+    params.update(kwargs)
+    fedot = Fedot(**params)
     fedot.fit(train_data)
     return fedot