From 9cc0e353d4cef1806d1c2b42a40180e61c19403a Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:01:12 +0200 Subject: [PATCH 01/26] added RangeScaler class --- .../tabular/transformation/_rangeScaler.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 src/safeds/data/tabular/transformation/_rangeScaler.py diff --git a/src/safeds/data/tabular/transformation/_rangeScaler.py b/src/safeds/data/tabular/transformation/_rangeScaler.py new file mode 100644 index 000000000..fd77786a0 --- /dev/null +++ b/src/safeds/data/tabular/transformation/_rangeScaler.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import Any + +import pandas as pd + +from sklearn import MinMaxScaler +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation._table_transformer import TableTransformer + + +class RangeScaler(TableTransformer): + """ + Normalize Values in a Table + + Parameters + ---------- + + Examples + -------- + """ + pass From f4233accd172183c640ec7f140d510c7223a530b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 14:02:31 +0200 Subject: [PATCH 02/26] Refactor naming scheme to camel case --- .../tabular/transformation/{_rangeScaler.py => _range_scaler.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/safeds/data/tabular/transformation/{_rangeScaler.py => _range_scaler.py} (100%) diff --git a/src/safeds/data/tabular/transformation/_rangeScaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py similarity index 100% rename from src/safeds/data/tabular/transformation/_rangeScaler.py rename to src/safeds/data/tabular/transformation/_range_scaler.py From 13d861c426e64fe09f77a5e5a5e637d99a7e7262 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:09:43 +0200 Subject: [PATCH 03/26] added __init__ and imports --- .../data/tabular/transformation/_rangeScaler.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_rangeScaler.py b/src/safeds/data/tabular/transformation/_rangeScaler.py index fd77786a0..dc5931c07 100644 --- a/src/safeds/data/tabular/transformation/_rangeScaler.py +++ b/src/safeds/data/tabular/transformation/_rangeScaler.py @@ -4,12 +4,13 @@ import pandas as pd -from sklearn import MinMaxScaler +from sklearn.preprocessing import MinMaxScaler from safeds.data.tabular.containers import Table -from safeds.data.tabular.transformation._table_transformer import TableTransformer +from safeds.data.tabular.transformation._table_transformer import TableTransformer, InvertibleTableTransformer +from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError -class RangeScaler(TableTransformer): +class RangeScaler(InvertibleTableTransformer): """ Normalize Values in a Table @@ -19,4 +20,9 @@ class RangeScaler(TableTransformer): Examples -------- """ - pass + + def __init__(self, minimum:float = 0.0 , maximum:float = 1.0): + self._minimum = minimum + self._maximum = maximum + + From 2fee42f12bcd802c22be3feedf9586e7979cccab Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:18:44 +0200 Subject: [PATCH 04/26] added all methods --- .../tabular/transformation/_range_scaler.py | 171 +++++++++++++++++- 1 file changed, 170 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index dc5931c07..11e32d6e6 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -3,6 +3,7 @@ from typing import Any import pandas as pd +from _pytest import warnings from sklearn.preprocessing import MinMaxScaler from safeds.data.tabular.containers import Table @@ -12,7 +13,7 @@ class RangeScaler(InvertibleTableTransformer): """ - Normalize Values in a Table + The RangeScaler Parameters ---------- @@ -26,3 +27,171 @@ def __init__(self, minimum:float = 0.0 , maximum:float = 1.0): self._maximum = maximum + def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: + """ + Learn a transformation for a set of columns in a table. + + This transformer is not modified. + + Parameters + ---------- + table : Table + The table used to fit the transformer. + column_names : Optional[list[str]] + The list of columns from the table used to fit the transformer. If `None`, all columns are used. + + Returns + ------- + fitted_transformer : TableTransformer + The fitted transformer. + """ + if column_names is None: + column_names = table.column_names + else: + missing_columns = set(column_names) - set(table.column_names) + if len(missing_columns) > 0: + raise UnknownColumnNameError(list(missing_columns)) + + wrapped_transformer = MinMaxScaler() + wrapped_transformer.fit(table._data[column_names]) + + result = RangeScaler() + result._wrapped_transformer = wrapped_transformer + result._column_names = column_names + + return result + + def transform(self, table: Table) -> Table: + """ + Apply the learned transformation to a table. + + The table is not modified. + + Parameters + ---------- + table : Table + The table to which the learned transformation is applied. + + Returns + ------- + transformed_table : Table + The transformed table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + # Transformer has not been fitted yet + if self._wrapped_transformer is None or self._column_names is None: + raise TransformerNotFittedError + + # Input table does not contain all columns used to fit the transformer + missing_columns = set(self._column_names) - set(table.column_names) + if len(missing_columns) > 0: + raise UnknownColumnNameError(list(missing_columns)) + + data = table._data.copy() + data.columns = table.column_names + data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) + return Table._from_pandas_dataframe(data) + + def inverse_transform(self, transformed_table: Table) -> Table: + """ + Undo the learned transformation. + + The table is not modified. + + Parameters + ---------- + transformed_table : Table + The table to be transformed back to the original version. + + Returns + ------- + table : Table + The original table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + # Transformer has not been fitted yet + if self._wrapped_transformer is None or self._column_names is None: + raise TransformerNotFittedError + + data = transformed_table._data.copy() + data.columns = transformed_table.column_names + data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) + return Table._from_pandas_dataframe(data) + + def is_fitted(self) -> bool: + """ + Check if the transformer is fitted. + + Returns + ------- + is_fitted : bool + Whether the transformer is fitted. + """ + return self._wrapped_transformer is not None + + def get_names_of_added_columns(self) -> list[str]: + """ + Get the names of all new columns that have been added by the RangeScaler. + + Returns + ------- + added_columns : list[str] + A list of names of the added columns, ordered as they will appear in the table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + warnings.warn("RangeScaler only changes data within columns, but does not add any columns.", stacklevel=1) + if not self.is_fitted(): + raise TransformerNotFittedError + return [] + + # (Must implement abstract method, cannot instantiate class otherwise.) + def get_names_of_changed_columns(self) -> list[str]: + """ + Get the names of all columns that may have been changed by the RangeScaler. + + Returns + ------- + changed_columns : list[str] + The list of (potentially) changed column names, as passed to fit. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + if self._column_names is None: + raise TransformerNotFittedError + return self._column_names + + def get_names_of_removed_columns(self) -> list[str]: + """ + Get the names of all columns that have been removed by the RangeScaler. + + Returns + ------- + removed_columns : list[str] + A list of names of the removed columns, ordered as they appear in the table the RangeScaler was fitted on. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + warnings.warn("RangeScaler only changes data within columns, but does not remove any columns.", stacklevel=1) + if not self.is_fitted(): + raise TransformerNotFittedError + return [] + + From f096b7391596c70e97174eb1afef060717f5dd0c Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:27:23 +0200 Subject: [PATCH 05/26] added ValueError --- src/safeds/data/tabular/transformation/_range_scaler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 11e32d6e6..6fc11488a 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -20,9 +20,15 @@ class RangeScaler(InvertibleTableTransformer): Examples -------- + + Raises + ------ + ValueError + If the given minimum is greater or equal to the given maximum """ def __init__(self, minimum:float = 0.0 , maximum:float = 1.0): + if minimum >= maximum : raise ValueError self._minimum = minimum self._maximum = maximum From 7fdef9d1c9c5a903aa819c37656b96087c26caa5 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:34:46 +0200 Subject: [PATCH 06/26] updated imports and __init__ method fields --- .../tabular/transformation/_range_scaler.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 6fc11488a..0779c5b98 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,13 +1,9 @@ from __future__ import annotations - -from typing import Any - -import pandas as pd -from _pytest import warnings +imporfrom _pytest import warnings from sklearn.preprocessing import MinMaxScaler from safeds.data.tabular.containers import Table -from safeds.data.tabular.transformation._table_transformer import TableTransformer, InvertibleTableTransformer +from safeds.data.tabular.transformation._table_transformer import InvertibleTableTransformer from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError @@ -27,12 +23,14 @@ class RangeScaler(InvertibleTableTransformer): If the given minimum is greater or equal to the given maximum """ - def __init__(self, minimum:float = 0.0 , maximum:float = 1.0): - if minimum >= maximum : raise ValueError + def __init__(self, minimum: float = 0.0, maximum: float = 1.0): + self._column_names = None + self._wrapped_transformer = None + if minimum >= maximum: + raise ValueError self._minimum = minimum self._maximum = maximum - def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: """ Learn a transformation for a set of columns in a table. @@ -199,5 +197,3 @@ def get_names_of_removed_columns(self) -> list[str]: if not self.is_fitted(): raise TransformerNotFittedError return [] - - From 1e3f11c37e275bae8b3ca7c80855109cd098f359 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:36:29 +0200 Subject: [PATCH 07/26] merged main --- src/safeds/data/tabular/transformation/_range_scaler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 0779c5b98..055fc3aa1 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,5 +1,5 @@ from __future__ import annotations -imporfrom _pytest import warnings +from _pytest import warnings from sklearn.preprocessing import MinMaxScaler from safeds.data.tabular.containers import Table From d1a38d07950bbdfc1a48798cf9cabe4278a12ae7 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:40:10 +0200 Subject: [PATCH 08/26] added docstring to RangeScaler --- .../data/tabular/transformation/_range_scaler.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 055fc3aa1..9bac604cf 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -1,7 +1,9 @@ from __future__ import annotations -from _pytest import warnings + +import warnings from sklearn.preprocessing import MinMaxScaler + from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import InvertibleTableTransformer from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError @@ -9,13 +11,7 @@ class RangeScaler(InvertibleTableTransformer): """ - The RangeScaler - - Parameters - ---------- - - Examples - -------- + The RangeScaler transforms column values by scaling each value to a given range. Raises ------ From fa0e5a50694e9cbccfd34cb9593631e819da5d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 14:49:38 +0200 Subject: [PATCH 09/26] Add message ot ValueError in init --- src/safeds/data/tabular/transformation/_range_scaler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 9bac604cf..860fb3f20 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -23,7 +23,7 @@ def __init__(self, minimum: float = 0.0, maximum: float = 1.0): self._column_names = None self._wrapped_transformer = None if minimum >= maximum: - raise ValueError + raise ValueError('Parameter "maximum" must be higher than parameter "minimum".') self._minimum = minimum self._maximum = maximum From 8593b8532224e34c97139296f2ed5de8ab0b62a0 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:51:31 +0200 Subject: [PATCH 10/26] Updated Docstrings and imports --- .../data/tabular/transformation/_range_scaler.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 860fb3f20..2ab2e69cf 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -2,7 +2,7 @@ import warnings -from sklearn.preprocessing import MinMaxScaler +from sklearn.preprocessing import MinMaxScaler as sk_MinMaxScaler from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation._table_transformer import InvertibleTableTransformer @@ -13,6 +13,14 @@ class RangeScaler(InvertibleTableTransformer): """ The RangeScaler transforms column values by scaling each value to a given range. + Parameters + ---------- + minimum : float + The minimum of the new range after the transformation + maximum : float + The maximum of the new range after the transformation + column_names : list[str] | None + A List of column Names the RangeScale should be applied to Raises ------ ValueError @@ -20,7 +28,7 @@ class RangeScaler(InvertibleTableTransformer): """ def __init__(self, minimum: float = 0.0, maximum: float = 1.0): - self._column_names = None + self._column_names : list[str] | None = None self._wrapped_transformer = None if minimum >= maximum: raise ValueError('Parameter "maximum" must be higher than parameter "minimum".') @@ -52,7 +60,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> RangeScaler: if len(missing_columns) > 0: raise UnknownColumnNameError(list(missing_columns)) - wrapped_transformer = MinMaxScaler() + wrapped_transformer = sk_MinMaxScaler() wrapped_transformer.fit(table._data[column_names]) result = RangeScaler() From 724bffd16826e99fe92e8dbb714200043dc9bfe9 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 14:56:10 +0200 Subject: [PATCH 11/26] added test file --- .../data/tabular/transformation/__init__.py | 2 + .../tabular/transformation/_range_scaler.py | 2 - .../transformation/test_range_scaler.py | 238 ++++++++++++++++++ 3 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 tests/safeds/data/tabular/transformation/test_range_scaler.py diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py index 29d4aba23..e8af96cd7 100644 --- a/src/safeds/data/tabular/transformation/__init__.py +++ b/src/safeds/data/tabular/transformation/__init__.py @@ -3,6 +3,7 @@ from ._imputer import Imputer from ._label_encoder import LabelEncoder from ._one_hot_encoder import OneHotEncoder +from ._range_scaler import RangeScaler from ._table_transformer import InvertibleTableTransformer, TableTransformer __all__ = [ @@ -11,4 +12,5 @@ "OneHotEncoder", "InvertibleTableTransformer", "TableTransformer", + "RangeScaler", ] diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 2ab2e69cf..126f8f8fb 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -19,8 +19,6 @@ class RangeScaler(InvertibleTableTransformer): The minimum of the new range after the transformation maximum : float The maximum of the new range after the transformation - column_names : list[str] | None - A List of column Names the RangeScale should be applied to Raises ------ ValueError diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py new file mode 100644 index 000000000..1032f071b --- /dev/null +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -0,0 +1,238 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import RangeScaler +from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError + + +class TestFit: + def test_should_raise_if_column_not_found(self) -> None: + table = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + with pytest.raises(UnknownColumnNameError): + LabelEncoder().fit(table, ["col2"]) + + def test_should_not_change_original_transformer(self) -> None: + table = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + transformer = LabelEncoder() + transformer.fit(table, None) + + assert transformer._wrapped_transformer is None + assert transformer._column_names is None + + +class TestTransform: + def test_should_raise_if_column_not_found(self) -> None: + table_to_fit = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + transformer = LabelEncoder().fit(table_to_fit, None) + + table_to_transform = Table( + { + "col2": ["a", "b", "c"], + }, + ) + + with pytest.raises(UnknownColumnNameError): + transformer.transform(table_to_transform) + + def test_should_raise_if_not_fitted(self) -> None: + table = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + transformer = LabelEncoder() + + with pytest.raises(TransformerNotFittedError): + transformer.transform(table) + + +class TestIsFitted: + def test_should_return_false_before_fitting(self) -> None: + transformer = LabelEncoder() + assert not transformer.is_fitted() + + def test_should_return_true_after_fitting(self) -> None: + table = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + transformer = LabelEncoder() + fitted_transformer = transformer.fit(table, None) + assert fitted_transformer.is_fitted() + + +class TestFitAndTransform: + @pytest.mark.parametrize( + ("table", "column_names", "expected"), + [ + ( + Table( + { + "col1": ["a", "b", "b", "c"], + }, + ), + None, + Table( + { + "col1": [0.0, 1.0, 1.0, 2.0], + }, + ), + ), + ( + Table( + { + "col1": ["a", "b", "b", "c"], + "col2": ["a", "b", "b", "c"], + }, + ), + ["col1"], + Table( + { + "col1": [0.0, 1.0, 1.0, 2.0], + "col2": ["a", "b", "b", "c"], + }, + ), + ), + ], + ) + def test_should_return_transformed_table( + self, + table: Table, + column_names: list[str] | None, + expected: Table, + ) -> None: + assert LabelEncoder().fit_and_transform(table, column_names) == expected + + def test_should_not_change_original_table(self) -> None: + table = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + LabelEncoder().fit_and_transform(table) + + expected = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + assert table == expected + + def test_get_names_of_added_columns(self) -> None: + transformer = LabelEncoder() + with pytest.warns( + UserWarning, + match="LabelEncoder only changes data within columns, but does not add any columns.", + ), pytest.raises(TransformerNotFittedError): + transformer.get_names_of_added_columns() + + table = Table( + { + "a": ["b"], + }, + ) + transformer = transformer.fit(table, None) + with pytest.warns( + UserWarning, + match="LabelEncoder only changes data within columns, but does not add any columns.", + ): + assert transformer.get_names_of_added_columns() == [] + + def test_get_names_of_changed_columns(self) -> None: + transformer = LabelEncoder() + with pytest.raises(TransformerNotFittedError): + transformer.get_names_of_changed_columns() + table = Table( + { + "a": ["b"], + }, + ) + transformer = transformer.fit(table, None) + assert transformer.get_names_of_changed_columns() == ["a"] + + def test_get_names_of_removed_columns(self) -> None: + transformer = LabelEncoder() + with pytest.warns( + UserWarning, + match="LabelEncoder only changes data within columns, but does not remove any columns.", + ), pytest.raises(TransformerNotFittedError): + transformer.get_names_of_removed_columns() + + table = Table( + { + "a": ["b"], + }, + ) + transformer = transformer.fit(table, None) + with pytest.warns( + UserWarning, + match="LabelEncoder only changes data within columns, but does not remove any columns.", + ): + assert transformer.get_names_of_removed_columns() == [] + + +class TestInverseTransform: + @pytest.mark.parametrize( + "table", + [ + Table( + { + "col1": ["a", "b", "b", "c"], + }, + ), + ], + ) + def test_should_return_original_table(self, table: Table) -> None: + transformer = LabelEncoder().fit(table, None) + + assert transformer.inverse_transform(transformer.transform(table)) == table + + def test_should_not_change_transformed_table(self) -> None: + table = Table( + { + "col1": ["a", "b", "c"], + }, + ) + + transformer = LabelEncoder().fit(table, None) + transformed_table = transformer.transform(table) + transformer.inverse_transform(transformed_table) + + expected = Table( + { + "col1": [0.0, 1.0, 2.0], + }, + ) + + assert transformed_table == expected + + def test_should_raise_if_not_fitted(self) -> None: + table = Table( + { + "col1": [0.0, 1.0, 1.0, 2.0], + }, + ) + + transformer = LabelEncoder() + + with pytest.raises(TransformerNotFittedError): + transformer.inverse_transform(table) From 14480eced73a58c20f808ec208a74fba99efea71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 15:04:45 +0200 Subject: [PATCH 12/26] Add test for ValueError in init --- .../transformation/test_range_scaler.py | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index 1032f071b..4baa7d61d 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -4,6 +4,12 @@ from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError +class TestInit: + def test_should_raise_value_error(self) -> None: + with pytest.raises(ValueError, match='Parameter "maximum" must be higher than parameter "minimum".'): + _ = RangeScaler(minimum=10, maximum=0) + + class TestFit: def test_should_raise_if_column_not_found(self) -> None: table = Table( @@ -13,7 +19,7 @@ def test_should_raise_if_column_not_found(self) -> None: ) with pytest.raises(UnknownColumnNameError): - LabelEncoder().fit(table, ["col2"]) + RangeScaler().fit(table, ["col2"]) def test_should_not_change_original_transformer(self) -> None: table = Table( @@ -22,7 +28,7 @@ def test_should_not_change_original_transformer(self) -> None: }, ) - transformer = LabelEncoder() + transformer = RangeScaler() transformer.fit(table, None) assert transformer._wrapped_transformer is None @@ -37,7 +43,7 @@ def test_should_raise_if_column_not_found(self) -> None: }, ) - transformer = LabelEncoder().fit(table_to_fit, None) + transformer = RangeScaler().fit(table_to_fit, None) table_to_transform = Table( { @@ -55,7 +61,7 @@ def test_should_raise_if_not_fitted(self) -> None: }, ) - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.raises(TransformerNotFittedError): transformer.transform(table) @@ -63,7 +69,7 @@ def test_should_raise_if_not_fitted(self) -> None: class TestIsFitted: def test_should_return_false_before_fitting(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() assert not transformer.is_fitted() def test_should_return_true_after_fitting(self) -> None: @@ -73,7 +79,7 @@ def test_should_return_true_after_fitting(self) -> None: }, ) - transformer = LabelEncoder() + transformer = RangeScaler() fitted_transformer = transformer.fit(table, None) assert fitted_transformer.is_fitted() @@ -118,7 +124,7 @@ def test_should_return_transformed_table( column_names: list[str] | None, expected: Table, ) -> None: - assert LabelEncoder().fit_and_transform(table, column_names) == expected + assert RangeScaler().fit_and_transform(table, column_names) == expected def test_should_not_change_original_table(self) -> None: table = Table( @@ -127,7 +133,7 @@ def test_should_not_change_original_table(self) -> None: }, ) - LabelEncoder().fit_and_transform(table) + RangeScaler().fit_and_transform(table) expected = Table( { @@ -138,10 +144,10 @@ def test_should_not_change_original_table(self) -> None: assert table == expected def test_get_names_of_added_columns(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not add any columns.", + match="RangeScaler only changes data within columns, but does not add any columns.", ), pytest.raises(TransformerNotFittedError): transformer.get_names_of_added_columns() @@ -153,12 +159,12 @@ def test_get_names_of_added_columns(self) -> None: transformer = transformer.fit(table, None) with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not add any columns.", + match="RangeScaler only changes data within columns, but does not add any columns.", ): assert transformer.get_names_of_added_columns() == [] def test_get_names_of_changed_columns(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.raises(TransformerNotFittedError): transformer.get_names_of_changed_columns() table = Table( @@ -170,10 +176,10 @@ def test_get_names_of_changed_columns(self) -> None: assert transformer.get_names_of_changed_columns() == ["a"] def test_get_names_of_removed_columns(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not remove any columns.", + match="RangeScaler only changes data within columns, but does not remove any columns.", ), pytest.raises(TransformerNotFittedError): transformer.get_names_of_removed_columns() @@ -185,7 +191,7 @@ def test_get_names_of_removed_columns(self) -> None: transformer = transformer.fit(table, None) with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not remove any columns.", + match="RangeScaler only changes data within columns, but does not remove any columns.", ): assert transformer.get_names_of_removed_columns() == [] @@ -202,7 +208,7 @@ class TestInverseTransform: ], ) def test_should_return_original_table(self, table: Table) -> None: - transformer = LabelEncoder().fit(table, None) + transformer = RangeScaler().fit(table, None) assert transformer.inverse_transform(transformer.transform(table)) == table @@ -213,7 +219,7 @@ def test_should_not_change_transformed_table(self) -> None: }, ) - transformer = LabelEncoder().fit(table, None) + transformer = RangeScaler().fit(table, None) transformed_table = transformer.transform(table) transformer.inverse_transform(transformed_table) @@ -232,7 +238,7 @@ def test_should_raise_if_not_fitted(self) -> None: }, ) - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.raises(TransformerNotFittedError): transformer.inverse_transform(table) From 0e73126afcbd4b6b4a84e5618bf5be916c9c6fae Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 19 May 2023 13:07:40 +0000 Subject: [PATCH 13/26] style: apply automated linter fixes --- src/safeds/data/tabular/transformation/_range_scaler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 126f8f8fb..dcd824757 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -26,7 +26,7 @@ class RangeScaler(InvertibleTableTransformer): """ def __init__(self, minimum: float = 0.0, maximum: float = 1.0): - self._column_names : list[str] | None = None + self._column_names: list[str] | None = None self._wrapped_transformer = None if minimum >= maximum: raise ValueError('Parameter "maximum" must be higher than parameter "minimum".') From 0873bbb981f8595db52b8576e8b1df8bb88c77cf Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 19 May 2023 15:11:50 +0200 Subject: [PATCH 14/26] changed existing tests, copied from labelEncoder to fit rangeScaler --- .../transformation/test_range_scaler.py | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index 1032f071b..4ceaa2461 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -8,21 +8,21 @@ class TestFit: def test_should_raise_if_column_not_found(self) -> None: table = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) with pytest.raises(UnknownColumnNameError): - LabelEncoder().fit(table, ["col2"]) + RangeScaler().fit(table, ["col2"]) def test_should_not_change_original_transformer(self) -> None: table = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) - transformer = LabelEncoder() + transformer = RangeScaler() transformer.fit(table, None) assert transformer._wrapped_transformer is None @@ -33,11 +33,11 @@ class TestTransform: def test_should_raise_if_column_not_found(self) -> None: table_to_fit = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) - transformer = LabelEncoder().fit(table_to_fit, None) + transformer = RangeScaler().fit(table_to_fit, None) table_to_transform = Table( { @@ -51,11 +51,11 @@ def test_should_raise_if_column_not_found(self) -> None: def test_should_raise_if_not_fitted(self) -> None: table = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.raises(TransformerNotFittedError): transformer.transform(table) @@ -63,17 +63,17 @@ def test_should_raise_if_not_fitted(self) -> None: class TestIsFitted: def test_should_return_false_before_fitting(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() assert not transformer.is_fitted() def test_should_return_true_after_fitting(self) -> None: table = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) - transformer = LabelEncoder() + transformer = RangeScaler() fitted_transformer = transformer.fit(table, None) assert fitted_transformer.is_fitted() @@ -85,28 +85,28 @@ class TestFitAndTransform: ( Table( { - "col1": ["a", "b", "b", "c"], + "col1": [0.0, 5.0, 5.0, 10.0], }, ), None, Table( { - "col1": [0.0, 1.0, 1.0, 2.0], + "col1": [0.0, 0.5, 0.5, 1.0], }, ), ), ( Table( { - "col1": ["a", "b", "b", "c"], - "col2": ["a", "b", "b", "c"], + "col1": [0.0, 5.0, 5.0, 10.0], + "col2": [0.0, 5.0, 5.0, 10.0], }, ), ["col1"], Table( { - "col1": [0.0, 1.0, 1.0, 2.0], - "col2": ["a", "b", "b", "c"], + "col1": [0.0, 0.5, 0.5, 1.0], + "col2": [0.0, 5.0, 5.0, 10.0], }, ), ), @@ -118,74 +118,74 @@ def test_should_return_transformed_table( column_names: list[str] | None, expected: Table, ) -> None: - assert LabelEncoder().fit_and_transform(table, column_names) == expected + assert RangeScaler().fit_and_transform(table, column_names) == expected def test_should_not_change_original_table(self) -> None: table = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) - LabelEncoder().fit_and_transform(table) + RangeScaler().fit_and_transform(table) expected = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) assert table == expected def test_get_names_of_added_columns(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not add any columns.", + match="RangeScaler only changes data within columns, but does not add any columns.", ), pytest.raises(TransformerNotFittedError): transformer.get_names_of_added_columns() table = Table( { - "a": ["b"], + "a": [0.0], }, ) transformer = transformer.fit(table, None) with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not add any columns.", + match="RangeScaler only changes data within columns, but does not add any columns.", ): assert transformer.get_names_of_added_columns() == [] def test_get_names_of_changed_columns(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.raises(TransformerNotFittedError): transformer.get_names_of_changed_columns() table = Table( { - "a": ["b"], + "a": [0.0], }, ) transformer = transformer.fit(table, None) assert transformer.get_names_of_changed_columns() == ["a"] def test_get_names_of_removed_columns(self) -> None: - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not remove any columns.", + match="RangeScaler only changes data within columns, but does not remove any columns.", ), pytest.raises(TransformerNotFittedError): transformer.get_names_of_removed_columns() table = Table( { - "a": ["b"], + "a": [0.0], }, ) transformer = transformer.fit(table, None) with pytest.warns( UserWarning, - match="LabelEncoder only changes data within columns, but does not remove any columns.", + match="RangeScaler only changes data within columns, but does not remove any columns.", ): assert transformer.get_names_of_removed_columns() == [] @@ -196,30 +196,30 @@ class TestInverseTransform: [ Table( { - "col1": ["a", "b", "b", "c"], + "col1": [0.0, 5.0, 5.0, 10.0], }, ), ], ) def test_should_return_original_table(self, table: Table) -> None: - transformer = LabelEncoder().fit(table, None) + transformer = RangeScaler().fit(table, None) assert transformer.inverse_transform(transformer.transform(table)) == table def test_should_not_change_transformed_table(self) -> None: table = Table( { - "col1": ["a", "b", "c"], + "col1": [0.0, 5.0, 10.0], }, ) - transformer = LabelEncoder().fit(table, None) + transformer = RangeScaler().fit(table, None) transformed_table = transformer.transform(table) transformer.inverse_transform(transformed_table) expected = Table( { - "col1": [0.0, 1.0, 2.0], + "col1": [0.0, 5.0, 10.0], }, ) @@ -228,11 +228,11 @@ def test_should_not_change_transformed_table(self) -> None: def test_should_raise_if_not_fitted(self) -> None: table = Table( { - "col1": [0.0, 1.0, 1.0, 2.0], + "col1": [0.0, 5.0, 5.0, 10.0], }, ) - transformer = LabelEncoder() + transformer = RangeScaler() with pytest.raises(TransformerNotFittedError): transformer.inverse_transform(table) From c93211910e8d5d0d79d1b359bedf310fd1eb0fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 15:17:26 +0200 Subject: [PATCH 15/26] Fix test for should not change case --- tests/safeds/data/tabular/transformation/test_range_scaler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_range_scaler.py b/tests/safeds/data/tabular/transformation/test_range_scaler.py index b0bbeb424..6693b5853 100644 --- a/tests/safeds/data/tabular/transformation/test_range_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_range_scaler.py @@ -215,7 +215,7 @@ def test_should_return_original_table(self, table: Table) -> None: def test_should_not_change_transformed_table(self) -> None: table = Table( { - "col1": [0.0, 5.0, 10.0], + "col1": [0.0, 0.5, 1.0], }, ) @@ -225,7 +225,7 @@ def test_should_not_change_transformed_table(self) -> None: expected = Table( { - "col1": [0.0, 5.0, 10.0], + "col1": [0.0, 0.5, 1.0], }, ) From f2f9d21f588c12d7e56514fbf9e024167a03338e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 15:40:08 +0200 Subject: [PATCH 16/26] Add template for StandardScaler --- .../data/tabular/transformation/__init__.py | 2 + .../transformation/_standard_scaler.py | 201 ++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 src/safeds/data/tabular/transformation/_standard_scaler.py diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py index e8af96cd7..edf45242b 100644 --- a/src/safeds/data/tabular/transformation/__init__.py +++ b/src/safeds/data/tabular/transformation/__init__.py @@ -4,6 +4,7 @@ from ._label_encoder import LabelEncoder from ._one_hot_encoder import OneHotEncoder from ._range_scaler import RangeScaler +from ._standard_scaler import StandardScaler from ._table_transformer import InvertibleTableTransformer, TableTransformer __all__ = [ @@ -13,4 +14,5 @@ "InvertibleTableTransformer", "TableTransformer", "RangeScaler", + "StandardScaler", ] diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py new file mode 100644 index 000000000..a75db8c5a --- /dev/null +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -0,0 +1,201 @@ +from __future__ import annotations + +import warnings + +from sklearn.preprocessing import StandardScaler as sk_StandardScaler + +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation._table_transformer import InvertibleTableTransformer +from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError + + +class StandardScaler(InvertibleTableTransformer): + """ + The StandardScaler transforms column values by scaling each value to a given range. + + Parameters + ---------- + minimum : float + The minimum of the new range after the transformation + maximum : float + The maximum of the new range after the transformation + Raises + ------ + ValueError + If the given minimum is greater or equal to the given maximum + """ + + def __init__(self, minimum: float = 0.0, maximum: float = 1.0): + self._column_names: list[str] | None = None + self._wrapped_transformer = None + if minimum >= maximum: + raise ValueError('Parameter "maximum" must be higher than parameter "minimum".') + self._minimum = minimum + self._maximum = maximum + + def fit(self, table: Table, column_names: list[str] | None) -> StandardScaler: + """ + Learn a transformation for a set of columns in a table. + + This transformer is not modified. + + Parameters + ---------- + table : Table + The table used to fit the transformer. + column_names : Optional[list[str]] + The list of columns from the table used to fit the transformer. If `None`, all columns are used. + + Returns + ------- + fitted_transformer : TableTransformer + The fitted transformer. + """ + if column_names is None: + column_names = table.column_names + else: + missing_columns = set(column_names) - set(table.column_names) + if len(missing_columns) > 0: + raise UnknownColumnNameError(list(missing_columns)) + + wrapped_transformer = sk_StandardScaler() + wrapped_transformer.fit(table._data[column_names]) + + result = StandardScaler() + result._wrapped_transformer = wrapped_transformer + result._column_names = column_names + + return result + + def transform(self, table: Table) -> Table: + """ + Apply the learned transformation to a table. + + The table is not modified. + + Parameters + ---------- + table : Table + The table to which the learned transformation is applied. + + Returns + ------- + transformed_table : Table + The transformed table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + # Transformer has not been fitted yet + if self._wrapped_transformer is None or self._column_names is None: + raise TransformerNotFittedError + + # Input table does not contain all columns used to fit the transformer + missing_columns = set(self._column_names) - set(table.column_names) + if len(missing_columns) > 0: + raise UnknownColumnNameError(list(missing_columns)) + + data = table._data.copy() + data.columns = table.column_names + data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) + return Table._from_pandas_dataframe(data) + + def inverse_transform(self, transformed_table: Table) -> Table: + """ + Undo the learned transformation. + + The table is not modified. + + Parameters + ---------- + transformed_table : Table + The table to be transformed back to the original version. + + Returns + ------- + table : Table + The original table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + # Transformer has not been fitted yet + if self._wrapped_transformer is None or self._column_names is None: + raise TransformerNotFittedError + + data = transformed_table._data.copy() + data.columns = transformed_table.column_names + data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) + return Table._from_pandas_dataframe(data) + + def is_fitted(self) -> bool: + """ + Check if the transformer is fitted. + + Returns + ------- + is_fitted : bool + Whether the transformer is fitted. + """ + return self._wrapped_transformer is not None + + def get_names_of_added_columns(self) -> list[str]: + """ + Get the names of all new columns that have been added by the StandardScaler. + + Returns + ------- + added_columns : list[str] + A list of names of the added columns, ordered as they will appear in the table. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + warnings.warn("StandardScaler only changes data within columns, but does not add any columns.", stacklevel=1) + if not self.is_fitted(): + raise TransformerNotFittedError + return [] + + # (Must implement abstract method, cannot instantiate class otherwise.) + def get_names_of_changed_columns(self) -> list[str]: + """ + Get the names of all columns that may have been changed by the StandardScaler. + + Returns + ------- + changed_columns : list[str] + The list of (potentially) changed column names, as passed to fit. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + if self._column_names is None: + raise TransformerNotFittedError + return self._column_names + + def get_names_of_removed_columns(self) -> list[str]: + """ + Get the names of all columns that have been removed by the StandardScaler. + + Returns + ------- + removed_columns : list[str] + A list of names of the removed columns, ordered as they appear in the table the StandardScaler was fitted on. + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + """ + warnings.warn("StandardScaler only changes data within columns, but does not remove any columns.", stacklevel=1) + if not self.is_fitted(): + raise TransformerNotFittedError + return [] From 26c6a56cbfbccbdf69c4de54e97dddd65f75a90b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 15:42:03 +0200 Subject: [PATCH 17/26] Fix init method --- .../transformation/_standard_scaler.py | 25 +++---------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index a75db8c5a..03ee3bb8e 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -10,28 +10,11 @@ class StandardScaler(InvertibleTableTransformer): - """ - The StandardScaler transforms column values by scaling each value to a given range. - - Parameters - ---------- - minimum : float - The minimum of the new range after the transformation - maximum : float - The maximum of the new range after the transformation - Raises - ------ - ValueError - If the given minimum is greater or equal to the given maximum - """ - - def __init__(self, minimum: float = 0.0, maximum: float = 1.0): + """The StandardScaler transforms column values by scaling each value to a given range.""" + + def __init__(self): self._column_names: list[str] | None = None - self._wrapped_transformer = None - if minimum >= maximum: - raise ValueError('Parameter "maximum" must be higher than parameter "minimum".') - self._minimum = minimum - self._maximum = maximum + self._wrapped_transformer: sk_StandardScaler | None = None def fit(self, table: Table, column_names: list[str] | None) -> StandardScaler: """ From abba5e4443648adbb34f6e61cf2731ed8715450a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 15:51:12 +0200 Subject: [PATCH 18/26] Add template for tests --- .../transformation/test_standard_scaler.py | 244 ++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 tests/safeds/data/tabular/transformation/test_standard_scaler.py diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py new file mode 100644 index 000000000..28857082e --- /dev/null +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -0,0 +1,244 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import StandardScaler +from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError + + +class TestInit: + def test_should_raise_value_error(self) -> None: + with pytest.raises(ValueError, match='Parameter "maximum" must be higher than parameter "minimum".'): + _ = StandardScaler(minimum=10, maximum=0) + + +class TestFit: + def test_should_raise_if_column_not_found(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + with pytest.raises(UnknownColumnNameError): + StandardScaler().fit(table, ["col2"]) + + def test_should_not_change_original_transformer(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + transformer = StandardScaler() + transformer.fit(table, None) + + assert transformer._wrapped_transformer is None + assert transformer._column_names is None + + +class TestTransform: + def test_should_raise_if_column_not_found(self) -> None: + table_to_fit = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + transformer = StandardScaler().fit(table_to_fit, None) + + table_to_transform = Table( + { + "col2": ["a", "b", "c"], + }, + ) + + with pytest.raises(UnknownColumnNameError): + transformer.transform(table_to_transform) + + def test_should_raise_if_not_fitted(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + transformer = StandardScaler() + + with pytest.raises(TransformerNotFittedError): + transformer.transform(table) + + +class TestIsFitted: + def test_should_return_false_before_fitting(self) -> None: + transformer = StandardScaler() + assert not transformer.is_fitted() + + def test_should_return_true_after_fitting(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + transformer = StandardScaler() + fitted_transformer = transformer.fit(table, None) + assert fitted_transformer.is_fitted() + + +class TestFitAndTransform: + @pytest.mark.parametrize( + ("table", "column_names", "expected"), + [ + ( + Table( + { + "col1": [0.0, 5.0, 5.0, 10.0], + }, + ), + None, + Table( + { + "col1": [0.0, 0.5, 0.5, 1.0], + }, + ), + ), + ( + Table( + { + "col1": [0.0, 5.0, 5.0, 10.0], + "col2": [0.0, 5.0, 5.0, 10.0], + }, + ), + ["col1"], + Table( + { + "col1": [0.0, 0.5, 0.5, 1.0], + "col2": [0.0, 5.0, 5.0, 10.0], + }, + ), + ), + ], + ) + def test_should_return_transformed_table( + self, + table: Table, + column_names: list[str] | None, + expected: Table, + ) -> None: + assert StandardScaler().fit_and_transform(table, column_names) == expected + + def test_should_not_change_original_table(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + StandardScaler().fit_and_transform(table) + + expected = Table( + { + "col1": [0.0, 5.0, 10.0], + }, + ) + + assert table == expected + + def test_get_names_of_added_columns(self) -> None: + transformer = StandardScaler() + with pytest.warns( + UserWarning, + match="StandardScaler only changes data within columns, but does not add any columns.", + ), pytest.raises(TransformerNotFittedError): + transformer.get_names_of_added_columns() + + table = Table( + { + "a": [0.0], + }, + ) + transformer = transformer.fit(table, None) + with pytest.warns( + UserWarning, + match="StandardScaler only changes data within columns, but does not add any columns.", + ): + assert transformer.get_names_of_added_columns() == [] + + def test_get_names_of_changed_columns(self) -> None: + transformer = StandardScaler() + with pytest.raises(TransformerNotFittedError): + transformer.get_names_of_changed_columns() + table = Table( + { + "a": [0.0], + }, + ) + transformer = transformer.fit(table, None) + assert transformer.get_names_of_changed_columns() == ["a"] + + def test_get_names_of_removed_columns(self) -> None: + transformer = StandardScaler() + with pytest.warns( + UserWarning, + match="StandardScaler only changes data within columns, but does not remove any columns.", + ), pytest.raises(TransformerNotFittedError): + transformer.get_names_of_removed_columns() + + table = Table( + { + "a": [0.0], + }, + ) + transformer = transformer.fit(table, None) + with pytest.warns( + UserWarning, + match="StandardScaler only changes data within columns, but does not remove any columns.", + ): + assert transformer.get_names_of_removed_columns() == [] + + +class TestInverseTransform: + @pytest.mark.parametrize( + "table", + [ + Table( + { + "col1": [0.0, 5.0, 5.0, 10.0], + }, + ), + ], + ) + def test_should_return_original_table(self, table: Table) -> None: + transformer = StandardScaler().fit(table, None) + + assert transformer.inverse_transform(transformer.transform(table)) == table + + def test_should_not_change_transformed_table(self) -> None: + table = Table( + { + "col1": [0.0, 0.5, 1.0], + }, + ) + + transformer = StandardScaler().fit(table, None) + transformed_table = transformer.transform(table) + transformer.inverse_transform(transformed_table) + + expected = Table( + { + "col1": [0.0, 0.5, 1.0], + }, + ) + + assert transformed_table == expected + + def test_should_raise_if_not_fitted(self) -> None: + table = Table( + { + "col1": [0.0, 5.0, 5.0, 10.0], + }, + ) + + transformer = StandardScaler() + + with pytest.raises(TransformerNotFittedError): + transformer.inverse_transform(table) From ad248ac1708ee8ed9f7d74d04dafe4a195e08f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 15:52:08 +0200 Subject: [PATCH 19/26] Drop unneccessary constructor test --- .../data/tabular/transformation/test_standard_scaler.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index 28857082e..eb0794c52 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -4,12 +4,6 @@ from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError -class TestInit: - def test_should_raise_value_error(self) -> None: - with pytest.raises(ValueError, match='Parameter "maximum" must be higher than parameter "minimum".'): - _ = StandardScaler(minimum=10, maximum=0) - - class TestFit: def test_should_raise_if_column_not_found(self) -> None: table = Table( From 8d29399e08eea935311b18843753a2e3b33c4ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 16:11:26 +0200 Subject: [PATCH 20/26] Start adding proper testcases --- .../tabular/transformation/test_standard_scaler.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index eb0794c52..c5528c66d 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -85,28 +85,30 @@ class TestFitAndTransform: ( Table( { - "col1": [0.0, 5.0, 5.0, 10.0], + "col1": [0.0, 0.0, 1.0, 1.0], + "col2": [0.0, 0.0, 1.0, 1.0], }, ), None, Table( { - "col1": [0.0, 0.5, 0.5, 1.0], + "col1": [-1.0, -1.0, 1.0, 1.0], + "col2": [-1.0, -1.0, 1.0, 1.0], }, ), ), ( Table( { - "col1": [0.0, 5.0, 5.0, 10.0], - "col2": [0.0, 5.0, 5.0, 10.0], + "col1": [2, 2], + "col2": [2, 2] }, ), ["col1"], Table( { - "col1": [0.0, 0.5, 0.5, 1.0], - "col2": [0.0, 5.0, 5.0, 10.0], + "col1": [3, 3], + "col2": [2, 2] }, ), ), From 4f3a5afa2123e3905d7be0794db62cb8addefed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 19 May 2023 16:29:42 +0200 Subject: [PATCH 21/26] Start refactoring tests --- .../transformation/test_standard_scaler.py | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index c5528c66d..1eb8f3db1 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -78,9 +78,9 @@ def test_should_return_true_after_fitting(self) -> None: assert fitted_transformer.is_fitted() -class TestFitAndTransform: +class TestFitAndTransformOnMultipleTables: @pytest.mark.parametrize( - ("table", "column_names", "expected"), + ("fit_and_transform_table", "only_transform_table", "column_names", "expected_1", "expected_2"), [ ( Table( @@ -89,39 +89,40 @@ class TestFitAndTransform: "col2": [0.0, 0.0, 1.0, 1.0], }, ), - None, Table( { - "col1": [-1.0, -1.0, 1.0, 1.0], - "col2": [-1.0, -1.0, 1.0, 1.0], + "col1": [2, 2], }, ), - ), - ( + None, Table( { - "col1": [2, 2], - "col2": [2, 2] + "col1": [-1.0, -1.0, 1.0, 1.0], + "col2": [-1.0, -1.0, 1.0, 1.0], }, ), - ["col1"], Table( { "col1": [3, 3], - "col2": [2, 2] }, ), ), ], ) - def test_should_return_transformed_table( + def test_should_return_transformed_tables( self, - table: Table, + fit_and_transform_table: Table, + only_transform_table: Table, column_names: list[str] | None, - expected: Table, + expected_1: Table, + expected_2: Table, ) -> None: - assert StandardScaler().fit_and_transform(table, column_names) == expected + s = StandardScaler().fit(fit_and_transform_table) + assert s.fit_and_transform(fit_and_transform_table, column_names) == expected_1 + assert s.fit_and_transform(only_transform_table, column_names) == expected_2 + +class TestFitAndTransform: def test_should_not_change_original_table(self) -> None: table = Table( { From 719f3d774e33754b914f3b89af953eaba2d4da5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 26 May 2023 10:22:42 +0200 Subject: [PATCH 22/26] Fix tests Also added check_that_tables_are_close method. Co-authored-by: sibre28 <86068340+sibre28@users.noreply.github.com> --- tests/helpers/_resources.py | 26 +++++++++++++++++++ .../transformation/test_standard_scaler.py | 16 +++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/tests/helpers/_resources.py b/tests/helpers/_resources.py index 5394cdb8b..24735aa8d 100644 --- a/tests/helpers/_resources.py +++ b/tests/helpers/_resources.py @@ -1,5 +1,9 @@ from pathlib import Path +from numpy import isclose + +from src.safeds.data.tabular.containers import Table + _resources_root = Path(__file__).parent / ".." / "resources" @@ -18,3 +22,25 @@ def resolve_resource_path(resource_path: str | Path) -> str: The absolute path to the resource. """ return str(_resources_root / resource_path) + + +def check_that_tables_are_close(table1: Table, table2: Table) -> None: + """ + Check that two tables are almost equal. + + Parameters + ---------- + table1: Table + The first table. + table2: Table + The table to compare the first table to. + """ + assert table1.schema == table2.schema + for column_name in table1.column_names: + assert table1.get_column(column_name).type == table2.get_column(column_name).type + assert table1.get_column(column_name).type.is_numeric() + assert table2.get_column(column_name).type.is_numeric() + for i in range(table1.number_of_rows): + entry_1 = table1.get_column(column_name).get_value(i) + entry_2 = table2.get_column(column_name).get_value(i) + assert (isclose(entry_1, entry_2)) diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index 1eb8f3db1..2e20b1553 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -3,6 +3,8 @@ from safeds.data.tabular.transformation import StandardScaler from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError +from tests.helpers._resources import check_that_tables_are_close + class TestFit: def test_should_raise_if_column_not_found(self) -> None: @@ -91,7 +93,8 @@ class TestFitAndTransformOnMultipleTables: ), Table( { - "col1": [2, 2], + "col1": [2], + "col2": [2], }, ), None, @@ -103,7 +106,8 @@ class TestFitAndTransformOnMultipleTables: ), Table( { - "col1": [3, 3], + "col1": [3.0], + "col2": [3.0], }, ), ), @@ -117,9 +121,9 @@ def test_should_return_transformed_tables( expected_1: Table, expected_2: Table, ) -> None: - s = StandardScaler().fit(fit_and_transform_table) + s = StandardScaler().fit(fit_and_transform_table, column_names) assert s.fit_and_transform(fit_and_transform_table, column_names) == expected_1 - assert s.fit_and_transform(only_transform_table, column_names) == expected_2 + assert s.transform(only_transform_table) == expected_2 class TestFitAndTransform: @@ -218,7 +222,7 @@ def test_should_not_change_transformed_table(self) -> None: transformer = StandardScaler().fit(table, None) transformed_table = transformer.transform(table) - transformer.inverse_transform(transformed_table) + transformed_table = transformer.inverse_transform(transformed_table) expected = Table( { @@ -226,7 +230,7 @@ def test_should_not_change_transformed_table(self) -> None: }, ) - assert transformed_table == expected + check_that_tables_are_close(transformed_table, expected) def test_should_raise_if_not_fitted(self) -> None: table = Table( From f514b48e58f25a3187072daf2f1a95b8c358cff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Severin=20Paul=20H=C3=B6fer?= <84280965+zzril@users.noreply.github.com> Date: Fri, 26 May 2023 11:08:52 +0200 Subject: [PATCH 23/26] Remove warnings --- .../transformation/_standard_scaler.py | 4 ---- .../transformation/test_standard_scaler.py | 22 ++++--------------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 03ee3bb8e..d84eb1a99 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -1,7 +1,5 @@ from __future__ import annotations -import warnings - from sklearn.preprocessing import StandardScaler as sk_StandardScaler from safeds.data.tabular.containers import Table @@ -140,7 +138,6 @@ def get_names_of_added_columns(self) -> list[str]: TransformerNotFittedError If the transformer has not been fitted yet. """ - warnings.warn("StandardScaler only changes data within columns, but does not add any columns.", stacklevel=1) if not self.is_fitted(): raise TransformerNotFittedError return [] @@ -178,7 +175,6 @@ def get_names_of_removed_columns(self) -> list[str]: TransformerNotFittedError If the transformer has not been fitted yet. """ - warnings.warn("StandardScaler only changes data within columns, but does not remove any columns.", stacklevel=1) if not self.is_fitted(): raise TransformerNotFittedError return [] diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index 2e20b1553..4b333ae7f 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -146,10 +146,7 @@ def test_should_not_change_original_table(self) -> None: def test_get_names_of_added_columns(self) -> None: transformer = StandardScaler() - with pytest.warns( - UserWarning, - match="StandardScaler only changes data within columns, but does not add any columns.", - ), pytest.raises(TransformerNotFittedError): + with pytest.raises(TransformerNotFittedError): transformer.get_names_of_added_columns() table = Table( @@ -158,11 +155,7 @@ def test_get_names_of_added_columns(self) -> None: }, ) transformer = transformer.fit(table, None) - with pytest.warns( - UserWarning, - match="StandardScaler only changes data within columns, but does not add any columns.", - ): - assert transformer.get_names_of_added_columns() == [] + assert transformer.get_names_of_added_columns() == [] def test_get_names_of_changed_columns(self) -> None: transformer = StandardScaler() @@ -178,10 +171,7 @@ def test_get_names_of_changed_columns(self) -> None: def test_get_names_of_removed_columns(self) -> None: transformer = StandardScaler() - with pytest.warns( - UserWarning, - match="StandardScaler only changes data within columns, but does not remove any columns.", - ), pytest.raises(TransformerNotFittedError): + with pytest.raises(TransformerNotFittedError): transformer.get_names_of_removed_columns() table = Table( @@ -190,11 +180,7 @@ def test_get_names_of_removed_columns(self) -> None: }, ) transformer = transformer.fit(table, None) - with pytest.warns( - UserWarning, - match="StandardScaler only changes data within columns, but does not remove any columns.", - ): - assert transformer.get_names_of_removed_columns() == [] + assert transformer.get_names_of_removed_columns() == [] class TestInverseTransform: From 7f41501d57517bff910c6b589d7d83b538fb9166 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 26 May 2023 13:02:45 +0200 Subject: [PATCH 24/26] changed approximation method --- tests/helpers/_resources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/helpers/_resources.py b/tests/helpers/_resources.py index 24735aa8d..92c31667f 100644 --- a/tests/helpers/_resources.py +++ b/tests/helpers/_resources.py @@ -1,6 +1,6 @@ from pathlib import Path -from numpy import isclose +from _pytest.python_api import approx from src.safeds.data.tabular.containers import Table @@ -43,4 +43,4 @@ def check_that_tables_are_close(table1: Table, table2: Table) -> None: for i in range(table1.number_of_rows): entry_1 = table1.get_column(column_name).get_value(i) entry_2 = table2.get_column(column_name).get_value(i) - assert (isclose(entry_1, entry_2)) + assert entry_1 == approx(entry_2) From cf6abbafd57057f2a5885f7646434b9c7de71610 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 26 May 2023 19:47:38 +0200 Subject: [PATCH 25/26] fix: linter issue --- src/safeds/data/tabular/transformation/_standard_scaler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index d84eb1a99..8e2466d5f 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -10,7 +10,7 @@ class StandardScaler(InvertibleTableTransformer): """The StandardScaler transforms column values by scaling each value to a given range.""" - def __init__(self): + def __init__(self) -> None: self._column_names: list[str] | None = None self._wrapped_transformer: sk_StandardScaler | None = None From 2a670cb69ab520e7f08ba1772d3302f6545299dd Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 26 May 2023 19:54:42 +0200 Subject: [PATCH 26/26] test: move new method to another file since it's unrelated to test resources --- tests/helpers/__init__.py | 3 ++- tests/helpers/_assertions.py | 24 +++++++++++++++++ tests/helpers/_resources.py | 26 ------------------- .../transformation/test_standard_scaler.py | 4 +-- 4 files changed, 28 insertions(+), 29 deletions(-) create mode 100644 tests/helpers/_assertions.py diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 019d5cbe7..f7b114a0b 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -1,3 +1,4 @@ +from ._assertions import assert_that_tables_are_close from ._resources import resolve_resource_path -__all__ = ["resolve_resource_path"] +__all__ = ["assert_that_tables_are_close", "resolve_resource_path"] diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py new file mode 100644 index 000000000..ecd93c1b4 --- /dev/null +++ b/tests/helpers/_assertions.py @@ -0,0 +1,24 @@ +import pytest +from safeds.data.tabular.containers import Table + + +def assert_that_tables_are_close(table1: Table, table2: Table) -> None: + """ + Assert that two tables are almost equal. + + Parameters + ---------- + table1: Table + The first table. + table2: Table + The table to compare the first table to. + """ + assert table1.schema == table2.schema + for column_name in table1.column_names: + assert table1.get_column(column_name).type == table2.get_column(column_name).type + assert table1.get_column(column_name).type.is_numeric() + assert table2.get_column(column_name).type.is_numeric() + for i in range(table1.number_of_rows): + entry_1 = table1.get_column(column_name).get_value(i) + entry_2 = table2.get_column(column_name).get_value(i) + assert entry_1 == pytest.approx(entry_2) diff --git a/tests/helpers/_resources.py b/tests/helpers/_resources.py index 92c31667f..5394cdb8b 100644 --- a/tests/helpers/_resources.py +++ b/tests/helpers/_resources.py @@ -1,9 +1,5 @@ from pathlib import Path -from _pytest.python_api import approx - -from src.safeds.data.tabular.containers import Table - _resources_root = Path(__file__).parent / ".." / "resources" @@ -22,25 +18,3 @@ def resolve_resource_path(resource_path: str | Path) -> str: The absolute path to the resource. """ return str(_resources_root / resource_path) - - -def check_that_tables_are_close(table1: Table, table2: Table) -> None: - """ - Check that two tables are almost equal. - - Parameters - ---------- - table1: Table - The first table. - table2: Table - The table to compare the first table to. - """ - assert table1.schema == table2.schema - for column_name in table1.column_names: - assert table1.get_column(column_name).type == table2.get_column(column_name).type - assert table1.get_column(column_name).type.is_numeric() - assert table2.get_column(column_name).type.is_numeric() - for i in range(table1.number_of_rows): - entry_1 = table1.get_column(column_name).get_value(i) - entry_2 = table2.get_column(column_name).get_value(i) - assert entry_1 == approx(entry_2) diff --git a/tests/safeds/data/tabular/transformation/test_standard_scaler.py b/tests/safeds/data/tabular/transformation/test_standard_scaler.py index 4b333ae7f..58e4d773f 100644 --- a/tests/safeds/data/tabular/transformation/test_standard_scaler.py +++ b/tests/safeds/data/tabular/transformation/test_standard_scaler.py @@ -3,7 +3,7 @@ from safeds.data.tabular.transformation import StandardScaler from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError -from tests.helpers._resources import check_that_tables_are_close +from tests.helpers import assert_that_tables_are_close class TestFit: @@ -216,7 +216,7 @@ def test_should_not_change_transformed_table(self) -> None: }, ) - check_that_tables_are_close(transformed_table, expected) + assert_that_tables_are_close(transformed_table, expected) def test_should_raise_if_not_fitted(self) -> None: table = Table(