Skip to content

Commit

Permalink
Bugfixes (#1153)
Browse files Browse the repository at this point in the history
1. Fix #1148 with fixed denominator in CGRU and add test for new code
2. Fix #1151 with set n_jobs=1 for some operations
3. Add initial assumption with AR (#1074), enable AR (#1137)
4. Check and add test in accordance with #739
5. Fix integration test `test_result_changing`
  • Loading branch information
kasyanovse authored Sep 3, 2023
1 parent cebd493 commit 5da1447
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 17 deletions.
5 changes: 4 additions & 1 deletion fedot/api/api_utils/assumptions/task_assumptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ def builders(self):
PipelineBuilder()
.add_branch('polyfit', 'lagged')
.grow_branches(None, 'ridge')
.join_branches('ridge')
.join_branches('ridge'),
'smoothing_ar':
PipelineBuilder()
.add_sequence('smoothing', 'ar'),
}

def ensemble_operation(self) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def _fit_transform_scaler(self, data: InputData):
return f_scaled, t_scaled

def _scale(self, array: np.ndarray):
return (array - self.mu) / self.std
return (array - self.mu) / (self.std + 1e-6)

def _inverse_scale(self, array: np.ndarray):
return array * self.std + self.mu
Expand Down
4 changes: 0 additions & 4 deletions fedot/core/pipelines/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,10 +404,6 @@ def replace_n_jobs_in_nodes(self, n_jobs: int):
for param in ['n_jobs', 'num_threads']:
if param in node.content['params']:
node.content['params'][param] = n_jobs
# workaround for lgbm paramaters
if node.content['name'] == 'lgbm':
node.content['params']['num_threads'] = n_jobs
node.content['params']['n_jobs'] = n_jobs

@copy_doc(Graph.show)
def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optional[str] = None,
Expand Down
10 changes: 9 additions & 1 deletion fedot/core/pipelines/pipeline_node_factory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from random import choice
from typing import Optional
from typing import Optional, List

from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
from golem.core.optimisers.graph import OptNode
Expand Down Expand Up @@ -59,3 +59,11 @@ def _return_node(candidates) -> Optional[OptNode]:
@staticmethod
def filter_specific_candidates(candidates: list):
return sorted(list(filter(lambda x: not check_for_specific_operations(x), candidates)))

def get_all_available_operations(self) -> Optional[List[str]]:
"""
Returns all available models and data operations.
"""
# TODO: get_all_available_operations is abstract method in OptNodeFactory
# PipelineOptNodeFactory cannot be instantiate without that method
raise NotImplementedError()
15 changes: 10 additions & 5 deletions fedot/core/repository/data/default_operation_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,36 @@
},
"xgboost": {
"eval_metric": "mlogloss",
"nthread": -1
"nthread": 1,
"n_jobs": 1
},
"catboost": {
"allow_writing_files": false,
"verbose": false
"verbose": false,
"thread_count": 1
},
"catboostreg": {
"allow_writing_files": false,
"verbose": false
"verbose": false,
"thread_count": 1
},
"lgbm": {
"num_leaves": 32,
"colsample_bytree": 0.8,
"subsample": 0.8,
"subsample_freq": 10,
"learning_rate": 0.03,
"n_estimators": 100
"n_estimators": 100,
"n_jobs": 1
},
"lgbmreg": {
"num_leaves": 32,
"colsample_bytree": 0.8,
"subsample": 0.8,
"subsample_freq": 10,
"learning_rate": 0.03,
"n_estimators": 100
"n_estimators": 100,
"n_jobs": 1
},
"lagged": {
"window_size": 10
Expand Down
5 changes: 2 additions & 3 deletions fedot/core/repository/data/model_repository.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
],
"description": "Implementations of the regression models from scikit-learn framework",
"forbidden_node_types": "[]",
"input_type": "[DataTypesEnum.table, DataTypesEnum.multi_ts]",
"input_type": "[DataTypesEnum.table]",
"output_type": "[DataTypesEnum.table]",
"strategies": [
"fedot.core.operations.evaluation.regression",
Expand Down Expand Up @@ -136,8 +136,7 @@
"interpretable",
"non_lagged",
"linear",
"correct_params",
"non-default"
"correct_params"
],
"input_type": "[DataTypesEnum.ts]"
},
Expand Down
77 changes: 76 additions & 1 deletion test/integration/models/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.operation_types_repository import OperationTypesRepository
from fedot.core.repository.operation_types_repository import OperationTypesRepository, AVAILABLE_REPO_NAMES
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from test.unit.common_tests import is_predict_ignores_target
from test.unit.data_operations.test_time_series_operations import synthetic_univariate_ts
Expand All @@ -40,6 +40,49 @@ def check_predict_correct(model, fitted_operation, test_data):
)


def get_data_for_testing(task_type, data_type, length=100, features_count=1,
value=0, random=True, random_seed=0):
allowed_data_type = {TaskTypesEnum.ts_forecasting: [DataTypesEnum.ts, DataTypesEnum.multi_ts],
TaskTypesEnum.classification: [DataTypesEnum.table],
TaskTypesEnum.regression: [DataTypesEnum.table]}
if task_type not in allowed_data_type or data_type not in allowed_data_type[task_type]:
return None

if task_type is TaskTypesEnum.ts_forecasting:
task = Task(task_type, TsForecastingParams(max(length // 10, 2)))
if data_type is DataTypesEnum.ts:
features = np.zeros(length) + value
else:
features = np.zeros((length, features_count)) + value
if data_type is DataTypesEnum.table:
target = np.zeros(length) + value
else:
target = features

else:
task = Task(task_type)
data_type = DataTypesEnum.table
features = np.zeros((length, features_count)) + value
target = np.zeros(length) + value
if task_type is TaskTypesEnum.classification:
target[:int(len(target) // 2)] = 2 * value + 1

if random and task_type is not TaskTypesEnum.classification:
generator = np.random.RandomState(random_seed)
features += generator.rand(*features.shape)
if task_type is TaskTypesEnum.ts_forecasting:
target = features
else:
target += generator.rand(*target.shape)

data = InputData(idx=np.arange(length),
features=features,
target=target,
data_type=data_type,
task=task)
return data


def get_roc_auc(valid_data: InputData, predicted_data: OutputData) -> float:
n_classes = valid_data.num_classes
if n_classes > 2:
Expand Down Expand Up @@ -372,3 +415,35 @@ def test_locf_forecast_correctly():
assert np.array_equal(fit_forecast.idx, np.array([3, 4, 5, 6, 7, 8, 9, 10]))
# Repeated pattern (3 elements to repeat and 4 forecast horizon)
assert np.array_equal(predict_forecast.predict, np.array([[110, 120, 130, 110]]))


def test_models_does_not_fall_on_constant_data():
""" Run models on constant data """
# models that raise exception
to_skip = ['custom', 'arima', 'catboost', 'catboostreg',
'lda', 'fast_ica', 'decompose', 'class_decompose']

for repo_name in AVAILABLE_REPO_NAMES:
operation_repo = OperationTypesRepository(repo_name)
if operation_repo._repo is None:
continue
for operation in operation_repo._repo:
if operation.id in to_skip:
continue
for task_type in operation.task_type:
for data_type in operation.input_types:
data = get_data_for_testing(task_type, data_type,
length=100, features_count=2,
random=False)
if data is not None:
try:
nodes_from = []
if task_type is TaskTypesEnum.ts_forecasting:
if 'non_lagged' not in operation.tags:
nodes_from = [PipelineNode('lagged')]
node = PipelineNode(operation.id, nodes_from=nodes_from)
pipeline = Pipeline(node)
pipeline.fit(data)
assert pipeline.predict(data) is not None
except NotImplementedError:
pass
33 changes: 33 additions & 0 deletions test/integration/multimodal/test_multimodal.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import pytest

from examples.advanced.multi_modal_pipeline import prepare_multi_modal_data
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.utils import fedot_project_root
from test.integration.models.test_model import get_data_for_testing


def generate_multi_modal_pipeline(data: MultiModalData):
Expand Down Expand Up @@ -31,6 +36,15 @@ def generate_multi_modal_pipeline(data: MultiModalData):
return pipeline


def get_simple_multimodal_data(task_type, data_type):
data = MultiModalData()
for i in range(2):
type_name = 'ts' if data_type is DataTypesEnum.multi_ts else data_type.name
data[f"data_source_{type_name}/{i}"] = get_data_for_testing(task_type=task_type, data_type=data_type,
length=200, features_count=2)
return data


def test_multi_modal_pipeline():
path = fedot_project_root().joinpath('test', 'data', 'multi_modal')
task = Task(TaskTypesEnum.classification)
Expand All @@ -43,3 +57,22 @@ def test_multi_modal_pipeline():
prediction = pipeline.predict(fit_data)

assert prediction is not None


@pytest.mark.parametrize(['task_type', 'data_type', 'pipeline'],
[(TaskTypesEnum.ts_forecasting,
DataTypesEnum.multi_ts,
(PipelineBuilder().add_branch('data_source_ts/0', 'data_source_ts/1')
.grow_branches('lagged', 'lagged')
.join_branches('ridge')
.build()
)
),
]
)
def test_multimodaldata_with_pipeline(task_type, data_type, pipeline):
""" Test pipeline with MultiModalData """
data = get_simple_multimodal_data(task_type, data_type)
pipeline.fit(data)
prediction = pipeline.predict(data)
assert prediction is not None
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def get_fitted_fedot(forecast_length, train_data, **kwargs):
'timeout': None,
'pop_size': 50,
'num_of_generations': 5}
fedot = Fedot(**(params | kwargs))
params.update(kwargs)
fedot = Fedot(**params)
fedot.fit(train_data)
return fedot

Expand Down

0 comments on commit 5da1447

Please sign in to comment.