From bf2bd9175ade12c42f2686305865002e455203f6 Mon Sep 17 00:00:00 2001 From: chrislisbon Date: Wed, 18 Oct 2023 19:46:42 +0300 Subject: [PATCH 1/6] add InputData from pd and numpy --- fedot/core/data/data.py | 51 +++++++++++++++++++++++++++++++++++++ test/unit/data/test_data.py | 7 +++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index c75c0e9483..1bfdf7c553 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -53,6 +53,57 @@ class Data: # Object with supplementary info supplementary_data: SupplementaryData = field(default_factory=SupplementaryData) + @classmethod + def from_numpy(cls, + features_array: np.ndarray, + target_array: np.ndarray, + idx: Optional[np.ndarray] = None, + task: Task = Task(TaskTypesEnum.classification), + data_type: Optional[DataTypesEnum] = None) -> InputData: + """Import data from numpy array. + + Args: + features_array: numpy array with features. + target_array: numpy array with target. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + + Returns: + data + """ + return array_to_input_data(features_array, target_array, idx, task, data_type) + + @classmethod + def from_dataframe(cls, + df: pd.DataFrame, + task: Union[Task, str] = 'classification', + data_type: DataTypesEnum = DataTypesEnum.table, + target_columns: Union[str, List[Union[str, int]]] = '') -> InputData: + """Import data from pandas DataFrame. + + Args: + df: loaded pandas DataFrame. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + target_columns: name of the target column (the last column if empty and no target if ``None``). + + Returns: + data + """ + + if isinstance(task, str): + task = Task(TaskTypesEnum(task)) + + idx = df.index.to_numpy() + if not target_columns: + features_names = df.columns.to_numpy()[:-1] + else: + features_names = df.drop(target_columns, axis=1).columns.to_numpy() + features, target = process_target_and_features(df, target_columns) + + return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type, + features_names=features_names) + @classmethod def from_csv(cls, file_path: PathType, diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index a3eb55bfaa..20e86ae448 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -59,9 +59,12 @@ def test_data_from_csv(): idx=idx, task=task, data_type=DataTypesEnum.table).features - actual_features = InputData.from_csv( + actual_features_from_csv = InputData.from_csv( os.path.join(test_file_path, file)).features - assert np.array_equal(expected_features, actual_features) + assert np.array_equal(expected_features, actual_features_from_csv) + df.set_index('ID', drop=True, inplace=True) + actual_features_from_df = InputData.from_dataframe(df).features + assert np.array_equal(expected_features, actual_features_from_df) def test_with_custom_target(): From 496b7a6e6568d204b22fcb1c6d3bd552bc711442 Mon Sep 17 00:00:00 2001 From: chrislisbon Date: Thu, 19 Oct 2023 11:57:58 +0300 Subject: [PATCH 2/6] numpy_ts add, fixes --- fedot/core/data/data.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 1bfdf7c553..230fbc9029 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -58,19 +58,45 @@ def from_numpy(cls, features_array: np.ndarray, target_array: np.ndarray, idx: Optional[np.ndarray] = None, - task: Task = Task(TaskTypesEnum.classification), - data_type: Optional[DataTypesEnum] = None) -> InputData: + task: Union[Task, str] = 'classification', + data_type: Optional[DataTypesEnum] = DataTypesEnum.table) -> InputData: """Import data from numpy array. Args: features_array: numpy array with features. target_array: numpy array with target. + idx: indices of arrays. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + + Returns: + data + """ + if isinstance(task, str): + task = Task(TaskTypesEnum(task)) + return array_to_input_data(features_array, target_array, idx, task, data_type) + + @classmethod + def from_numpy_time_series(cls, + features_array: np.ndarray, + target_array: np.ndarray, + idx: Optional[np.ndarray] = None, + task: Union[Task, str] = 'ts_forecasting', + data_type: Optional[DataTypesEnum] = DataTypesEnum.ts) -> InputData: + """Import time series from numpy array. + + Args: + features_array: numpy array with features. + target_array: numpy array with target. + idx: indices of arrays. task: the :obj:`Task` to solve with the data. data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. Returns: data """ + if isinstance(task, str): + task = Task(TaskTypesEnum(task)) return array_to_input_data(features_array, target_array, idx, task, data_type) @classmethod From 61c155dca4f932205b0848c2ab26e8a16df4ff1b Mon Sep 17 00:00:00 2001 From: chrislisbon Date: Thu, 19 Oct 2023 19:12:21 +0300 Subject: [PATCH 3/6] X, Y from_dataframe fix --- fedot/core/data/data.py | 19 +++++++++---------- test/unit/data/test_data.py | 4 +++- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 230fbc9029..b3cb4deb3a 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -101,17 +101,17 @@ def from_numpy_time_series(cls, @classmethod def from_dataframe(cls, - df: pd.DataFrame, + features_df: pd.DataFrame, + target_df: pd.DataFrame, task: Union[Task, str] = 'classification', - data_type: DataTypesEnum = DataTypesEnum.table, - target_columns: Union[str, List[Union[str, int]]] = '') -> InputData: + data_type: DataTypesEnum = DataTypesEnum.table) -> InputData: """Import data from pandas DataFrame. Args: - df: loaded pandas DataFrame. + features_df: loaded pandas DataFrame with features. + target_df: loaded pandas DataFrame with target. task: the :obj:`Task` to solve with the data. data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. - target_columns: name of the target column (the last column if empty and no target if ``None``). Returns: data @@ -120,11 +120,10 @@ def from_dataframe(cls, if isinstance(task, str): task = Task(TaskTypesEnum(task)) - idx = df.index.to_numpy() - if not target_columns: - features_names = df.columns.to_numpy()[:-1] - else: - features_names = df.drop(target_columns, axis=1).columns.to_numpy() + idx = features_df.index.to_numpy() + target_columns = target_df.columns.to_list() + features_names = features_df.columns.to_numpy() + df = pd.concat([features_df, target_df], axis=1) features, target = process_target_and_features(df, target_columns) return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type, diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index 20e86ae448..404f66c854 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -63,7 +63,9 @@ def test_data_from_csv(): os.path.join(test_file_path, file)).features assert np.array_equal(expected_features, actual_features_from_csv) df.set_index('ID', drop=True, inplace=True) - actual_features_from_df = InputData.from_dataframe(df).features + features = df[df.columns.to_numpy()[:-1]] + target = pd.DataFrame(df[df.columns.to_numpy()[-1]]) + actual_features_from_df = InputData.from_dataframe(features, target).features assert np.array_equal(expected_features, actual_features_from_df) From 8543b441560273e7c8401e37f0408ee769bc8701 Mon Sep 17 00:00:00 2001 From: chrislisbon Date: Fri, 20 Oct 2023 11:50:09 +0300 Subject: [PATCH 4/6] fixes --- fedot/core/data/data.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index b3cb4deb3a..e05d995289 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -79,15 +79,15 @@ def from_numpy(cls, @classmethod def from_numpy_time_series(cls, features_array: np.ndarray, - target_array: np.ndarray, + target_array: Optional[np.ndarray] = None, idx: Optional[np.ndarray] = None, task: Union[Task, str] = 'ts_forecasting', data_type: Optional[DataTypesEnum] = DataTypesEnum.ts) -> InputData: """Import time series from numpy array. Args: - features_array: numpy array with features. - target_array: numpy array with target. + features_array: numpy array with features time series. + target_array: numpy array with target time series (if None same as features). idx: indices of arrays. task: the :obj:`Task` to solve with the data. data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. @@ -97,6 +97,8 @@ def from_numpy_time_series(cls, """ if isinstance(task, str): task = Task(TaskTypesEnum(task)) + if not target_array: + target_array = features_array return array_to_input_data(features_array, target_array, idx, task, data_type) @classmethod From 822bb136eb3331bc2a165c96cb50106971cccd20 Mon Sep 17 00:00:00 2001 From: chrislisbon Date: Sat, 21 Oct 2023 16:24:15 +0300 Subject: [PATCH 5/6] pd.Series add --- fedot/core/data/data.py | 12 ++++++++---- test/unit/data/test_data.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index e05d995289..709f9dfd91 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -103,15 +103,15 @@ def from_numpy_time_series(cls, @classmethod def from_dataframe(cls, - features_df: pd.DataFrame, - target_df: pd.DataFrame, + features_df: Union[pd.DataFrame, pd.Series], + target_df: Union[pd.DataFrame, pd.Series], task: Union[Task, str] = 'classification', data_type: DataTypesEnum = DataTypesEnum.table) -> InputData: """Import data from pandas DataFrame. Args: - features_df: loaded pandas DataFrame with features. - target_df: loaded pandas DataFrame with target. + features_df: loaded pandas DataFrame or Series with features. + target_df: loaded pandas DataFrame or Series with target. task: the :obj:`Task` to solve with the data. data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. @@ -121,6 +121,10 @@ def from_dataframe(cls, if isinstance(task, str): task = Task(TaskTypesEnum(task)) + if isinstance(features_df, pd.Series): + features_df = pd.DataFrame(features_df) + if isinstance(target_df, pd.Series): + target_df = pd.DataFrame(target_df) idx = features_df.index.to_numpy() target_columns = target_df.columns.to_list() diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index 404f66c854..5a96f3ca63 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -64,7 +64,7 @@ def test_data_from_csv(): assert np.array_equal(expected_features, actual_features_from_csv) df.set_index('ID', drop=True, inplace=True) features = df[df.columns.to_numpy()[:-1]] - target = pd.DataFrame(df[df.columns.to_numpy()[-1]]) + target = df[df.columns.to_numpy()[-1]] actual_features_from_df = InputData.from_dataframe(features, target).features assert np.array_equal(expected_features, actual_features_from_df) From a9e04c3d728e6adecb633e92fcc5a4bcd23b0b05 Mon Sep 17 00:00:00 2001 From: chrislisbon Date: Sat, 21 Oct 2023 16:29:00 +0300 Subject: [PATCH 6/6] no_numpy fix --- test/unit/data/test_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index 5a96f3ca63..3f23fb289f 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -63,8 +63,8 @@ def test_data_from_csv(): os.path.join(test_file_path, file)).features assert np.array_equal(expected_features, actual_features_from_csv) df.set_index('ID', drop=True, inplace=True) - features = df[df.columns.to_numpy()[:-1]] - target = df[df.columns.to_numpy()[-1]] + features = df[df.columns[:-1]] + target = df[df.columns[-1]] actual_features_from_df = InputData.from_dataframe(features, target).features assert np.array_equal(expected_features, actual_features_from_df)