Skip to content

Commit

Permalink
Remove redundant metrics mapping (#1167)
Browse files Browse the repository at this point in the history
* Remove redundant metrics mapping

* Convert str to MetricsEnum

* Refactor MetricsObjective

* Remove ApiMetrics

* Fix test prediction intervals

* Fix tests

* PEP 8

* Fix paths

* Move to init

* зуз8

* Add MASE metric (#1166)

MASE metric for time series forecasting task was implemented https://en.wikipedia.org/wiki/Mean_absolute_scaled_error
TimeSeriesForecastingMetricsEnum was created

* Review fixes

* Remove redundant metrics mapping

* Convert str to MetricsEnum

* Refactor MetricsObjective

* Remove ApiMetrics

* Fix test prediction intervals

* Fix tests

* PEP 8

* Fix paths

* Move to init

* зуз8

* Review fixes

* Convert str to MetricsEnum

* Refactor MetricsObjective

---------

Co-authored-by: Valerii Pokrovskii <[email protected]>
  • Loading branch information
YamLyubov and valer1435 authored Sep 14, 2023
1 parent 86813fa commit 17b2ecd
Show file tree
Hide file tree
Showing 17 changed files with 99 additions and 147 deletions.
2 changes: 1 addition & 1 deletion examples/simple/classification/api_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def run_classification_example(timeout: float = None, visualization=False, with_
print(baseline_model.get_metrics())

auto_model = Fedot(problem=problem, timeout=timeout, n_jobs=-1, preset='best_quality',
max_pipeline_fit_time=5, metric='roc_auc', with_tuning=with_tuning)
max_pipeline_fit_time=5, metric=['roc_auc', 'precision'], with_tuning=with_tuning)
auto_model.fit(features=train_data_path, target='target')
prediction = auto_model.predict_proba(features=test_data_path)

Expand Down
12 changes: 6 additions & 6 deletions fedot/api/api_utils/api_composer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import datetime
import gc
from typing import List, Optional, Sequence, Tuple
from typing import List, Optional, Sequence, Tuple, Union

from golem.core.log import default_log
from golem.core.optimisers.opt_history_objects.opt_history import OptHistory
from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot.api.api_utils.assumptions.assumptions_handler import AssumptionsHandler
from fedot.api.api_utils.metrics import ApiMetrics
from fedot.api.api_utils.params import ApiParams
from fedot.api.time import ApiTime
from fedot.core.caching.pipelines_cache import OperationsCache
Expand All @@ -18,11 +17,12 @@
from fedot.core.data.data import InputData
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.quality_metrics_repository import MetricType


class ApiComposer:

def __init__(self, api_params: ApiParams, metrics: ApiMetrics):
def __init__(self, api_params: ApiParams, metrics: Union[str, MetricType, Sequence]):
self.log = default_log(self)
self.params = api_params
self.metrics = metrics
Expand Down Expand Up @@ -58,7 +58,7 @@ def obtain_model(self, train_data: InputData) -> Tuple[Pipeline, Sequence[Pipeli

initial_assumption, fitted_assumption = self.propose_and_fit_initial_assumption(train_data)

multi_objective = len(self.metrics.metric_functions) > 1
multi_objective = len(self.metrics) > 1
self.params.init_params_for_composing(self.timer.timedelta_composing, multi_objective)

self.log.message(f"AutoML configured."
Expand Down Expand Up @@ -117,7 +117,7 @@ def compose_pipeline(self, train_data: InputData, initial_assumption: Sequence[P
.with_initial_pipelines(initial_assumption)
.with_optimizer(self.params.get('optimizer'))
.with_optimizer_params(parameters=self.params.optimizer_params)
.with_metrics(self.metrics.metric_functions)
.with_metrics(self.metrics)
.with_cache(self.pipelines_cache, self.preprocessing_cache)
.with_graph_generation_param(self.params.graph_generation_params)
.build())
Expand Down Expand Up @@ -147,7 +147,7 @@ def tune_final_pipeline(self, train_data: InputData, pipeline_gp_composed: Pipel
timeout_for_tuning = abs(self.timer.determine_resources_for_tuning()) / 60
tuner = (TunerBuilder(self.params.task)
.with_tuner(SimultaneousTuner)
.with_metric(self.metrics.metric_functions)
.with_metric(self.metrics[0])
.with_iterations(DEFAULT_TUNING_ITERATIONS_NUMBER)
.with_timeout(datetime.timedelta(minutes=timeout_for_tuning))
.with_eval_time_constraint(self.params.composer_requirements.max_graph_fit_time)
Expand Down
72 changes: 0 additions & 72 deletions fedot/api/api_utils/metrics.py

This file was deleted.

18 changes: 11 additions & 7 deletions fedot/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
from golem.core.log import Log, default_log
from golem.core.optimisers.opt_history_objects.opt_history import OptHistory
from golem.core.tuning.simultaneous import SimultaneousTuner
from golem.core.utilities.data_structures import ensure_wrapped_in_sequence
from golem.visualisation.opt_viz_extra import visualise_pareto

from fedot.api.api_utils.api_composer import ApiComposer
from fedot.api.api_utils.api_data import ApiDataProcessor
from fedot.api.api_utils.data_definition import FeaturesType, TargetType
from fedot.api.api_utils.input_analyser import InputAnalyser
from fedot.api.api_utils.metrics import ApiMetrics
from fedot.api.api_utils.params import ApiParams
from fedot.api.api_utils.predefined_model import PredefinedModel
from fedot.core.constants import DEFAULT_API_TIMEOUT_MINUTES, DEFAULT_TUNING_ITERATIONS_NUMBER
Expand All @@ -32,6 +32,7 @@
from fedot.explainability.explainers import explain_pipeline
from fedot.preprocessing.base_preprocessing import BasePreprocessor
from fedot.remote.remote_evaluator import RemoteEvaluator
from fedot.utilities.define_metric_by_task import MetricByTask
from fedot.utilities.memory import MemoryAnalytics
from fedot.utilities.project_import_export import export_project_to_zip, import_project_from_zip

Expand Down Expand Up @@ -178,9 +179,12 @@ def __init__(self,
set_random_seed(seed)
self.log = self._init_logger(logging_level)

# Classes for dealing with metrics, data sources and hyperparameters
# Attributes for dealing with metrics, data sources and hyperparameters
self.params = ApiParams(composer_tuner_params, problem, task_params, n_jobs, timeout)
self.metrics = ApiMetrics(self.params.task, self.params.get('metric'))

default_metrics = MetricByTask.get_default_quality_metrics(self.params.task.task_type)
passed_metrics = self.params.get('metric')
self.metrics = ensure_wrapped_in_sequence(passed_metrics) if passed_metrics else default_metrics

self.api_composer = ApiComposer(self.params, self.metrics)

Expand Down Expand Up @@ -299,7 +303,7 @@ def tune(self,
cv_folds = cv_folds or self.params.get('cv_folds')
n_jobs = n_jobs or self.params.n_jobs

metric = self.metrics.obtain_metrics(metric_name)[0] if metric_name else self.metrics.metric_functions[0]
metric = metric_name if metric_name else self.metrics[0]

pipeline_tuner = (TunerBuilder(self.params.task)
.with_tuner(SimultaneousTuner)
Expand Down Expand Up @@ -437,7 +441,7 @@ def load(self, path):
self.data_processor.preprocessor = self.current_pipeline.preprocessor

def plot_pareto(self):
metric_names = self.metrics.metric_names
metric_names = [str(metric) for metric in self.metrics]
# archive_history stores archives of the best models.
# Each archive is sorted from the best to the worst model,
# so the best_candidates is sorted too.
Expand Down Expand Up @@ -505,8 +509,8 @@ def get_metrics(self,
else:
self.test_data.target = target[:len(self.prediction.predict)]

metrics = self.metrics.obtain_metrics(metric_names) if metric_names else self.metrics.metric_functions
metric_names = self.metrics.get_metric_names(metrics)
metrics = ensure_wrapped_in_sequence(metric_names) if metric_names else self.metrics
metric_names = [str(metric) for metric in metrics]

in_sample = in_sample if in_sample is not None else self._is_in_sample_prediction

Expand Down
21 changes: 11 additions & 10 deletions fedot/core/optimisers/objective/metrics_objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from golem.core.optimisers.objective import Objective
from golem.core.utilities.data_structures import ensure_wrapped_in_sequence

from fedot.core.repository.quality_metrics_repository import \
MetricType, MetricsEnum, MetricsRepository, ComplexityMetricsEnum
from fedot.core.repository.quality_metrics_repository import MetricType, MetricsRepository, ComplexityMetricsEnum


class MetricsObjective(Objective):
Expand All @@ -15,15 +14,17 @@ def __init__(self,
complexity_metrics = {}

for metric in ensure_wrapped_in_sequence(metrics):
if isinstance(metric, MetricsEnum):
metric_func = MetricsRepository().metric_by_id(metric)

if isinstance(metric, ComplexityMetricsEnum):
complexity_metrics[metric] = metric_func
else:
quality_metrics[metric] = metric_func
elif isinstance(metric, Callable):
if callable(metric):
metric_id = str(metric)
quality_metrics[metric_id] = metric
else:
metric_func = MetricsRepository.metric_by_id(metric)
if metric_func:
if ComplexityMetricsEnum.has_value(metric):
complexity_metrics[metric] = metric_func
else:
quality_metrics[metric] = metric_func
else:
raise ValueError(f'Incorrect metric {metric}')

super().__init__(quality_metrics, complexity_metrics, is_multi_objective)
4 changes: 4 additions & 0 deletions fedot/core/repository/quality_metrics_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ class MetricsEnum(Enum):
def __str__(self):
return self.value

@classmethod
def has_value(cls, value):
return value in cls._value2member_map_


G = TypeVar('G', bound=Graph, covariant=True)
MetricCallable = Callable[[G], Real]
Expand Down
Binary file not shown.
6 changes: 6 additions & 0 deletions test/unit/pipelines/prediction_intervals/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import pathlib
import platform

plt = platform.system()
if plt == 'Linux':
pathlib.WindowsPath = pathlib.PosixPath
Empty file.
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
# This script generates train ts, test ts and fedot model for testing prediction intervals
# This script generates fedot model for testing prediction intervals

import numpy as np
import matplotlib.pyplot as plt
import pickle

from numpy import genfromtxt

from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum
from fedot.core.data.data import InputData
from fedot.api.main import Fedot
from fedot.core.repository.dataset_types import DataTypesEnum


def synthetic_series(start, end):

trend = np.array([5 * np.sin(x / 20) + 0.1 * x - 2 * np.sqrt(x) for x in range(start, end)])
noise = np.random.normal(loc=0, scale=1, size=end - start)

return trend + noise


ts_train = synthetic_series(0, 200)
ts_test = synthetic_series(200, 220)
np.savetxt("train_ts.csv", ts_train, delimiter=",")
np.savetxt("test_ts.csv", ts_test, delimiter=",")
ts_train = genfromtxt('train_ts.csv', delimiter=',')
ts_test = genfromtxt('test_ts.csv', delimiter=',')

fig, ax = plt.subplots()
ax.plot(range(200), ts_train)
Expand All @@ -42,5 +34,5 @@ def synthetic_series(start, end):
model.fit(train_input)
model.forecast()

with open('prediction_intervals_fedot_model.pickle', 'wb') as f:
with open('pred_ints_model_test.pickle', 'wb') as f:
pickle.dump(model, f)
17 changes: 17 additions & 0 deletions test/unit/pipelines/prediction_intervals/data/generate_ts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# This script generates train ts, test ts for testing prediction intervals

import numpy as np


def synthetic_series(start, end):

trend = np.array([5 * np.sin(x / 20) + 0.1 * x - 2 * np.sqrt(x) for x in range(start, end)])
noise = np.random.normal(loc=0, scale=1, size=end - start)

return trend + noise


ts_train = synthetic_series(0, 200)
ts_test = synthetic_series(200, 220)
np.savetxt("train_ts.csv", ts_train, delimiter=",")
np.savetxt("test_ts.csv", ts_test, delimiter=",")
Binary file not shown.
15 changes: 7 additions & 8 deletions test/unit/pipelines/prediction_intervals/test_mutations.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
import pytest
import itertools
import pickle
from typing import List
import itertools

from golem.core.optimisers.opt_history_objects.individual import Individual
import pytest
from golem.core.log import default_log, Log

from fedot.core.utils import fedot_project_root
from fedot.core.pipelines.prediction_intervals.ts_mutation import get_ts_mutation, get_different_mutations
from fedot.core.pipelines.prediction_intervals.utils import get_last_generations
from golem.core.optimisers.opt_history_objects.individual import Individual

from fedot.core.pipelines.prediction_intervals.graph_distance import get_distance_between
from fedot.core.pipelines.prediction_intervals.params import PredictionIntervalsParams
from fedot.core.pipelines.prediction_intervals.ts_mutation import get_ts_mutation, get_different_mutations
from fedot.core.pipelines.prediction_intervals.utils import get_last_generations
from fedot.core.utils import fedot_project_root


@pytest.fixture
def params():

model_name = f'{fedot_project_root()}/test/unit/data/prediction_intervals/pred_ints_model_test.pickle'
model_name = f'{fedot_project_root()}/test/unit/pipelines/prediction_intervals/data/pred_ints_model_test.pickle'
with open(model_name, 'rb') as f:
model = pickle.load(f)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
import pytest
import pickle

import numpy as np
import pytest

from fedot.core.utils import fedot_project_root
from fedot.core.data.data import InputData
from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum
from fedot.core.repository.dataset_types import DataTypesEnum

from fedot.core.pipelines.prediction_intervals.main import PredictionIntervals
from fedot.core.pipelines.prediction_intervals.params import PredictionIntervalsParams
from fedot.core.pipelines.prediction_intervals.metrics import interval_score, picp
from fedot.core.pipelines.prediction_intervals.params import PredictionIntervalsParams
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum
from fedot.core.utils import fedot_project_root


@pytest.fixture
def params():

with open(f'{fedot_project_root()}/test/unit/data/prediction_intervals/pred_ints_model_test.pickle', 'rb') as f:
with open(f'{fedot_project_root()}'
f'/test/unit/pipelines/prediction_intervals/data/pred_ints_model_test.pickle', 'rb') as f:
model = pickle.load(f)
ts_train = np.genfromtxt(f'{fedot_project_root()}/test/unit/data/prediction_intervals/train_ts.csv')
ts_test = np.genfromtxt(f'{fedot_project_root()}/test/unit/data/prediction_intervals/test_ts.csv')
ts_train = np.genfromtxt(f'{fedot_project_root()}/test/unit/pipelines/prediction_intervals/data/train_ts.csv')
ts_test = np.genfromtxt(f'{fedot_project_root()}/test/unit/pipelines/prediction_intervals/data/test_ts.csv')
task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=20))
idx = np.arange(len(ts_train))
train_input = InputData(idx=idx,
Expand Down
Loading

0 comments on commit 17b2ecd

Please sign in to comment.