From bc33cc0bd2cdec9d1248647149e5d9da7ab42be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 19 Apr 2024 16:13:37 +0200 Subject: [PATCH 01/42] feat: added `ImageDataset` feat: added param to return filenames in `ImageList.from_files` refactor: move `image.utils` to `image._utils` --- .../data/image/{utils => _utils}/__init__.py | 0 ...transformation_error_and_warning_checks.py | 0 .../image/containers/_empty_image_list.py | 2 +- src/safeds/data/image/containers/_image.py | 2 +- .../data/image/containers/_image_dataset.py | 117 ++++++++++++++++++ .../data/image/containers/_image_list.py | 33 ++++- .../containers/_multi_size_image_list.py | 2 +- .../containers/_single_size_image_list.py | 2 +- src/safeds/exceptions/__init__.py | 2 + src/safeds/exceptions/_data.py | 7 ++ 10 files changed, 159 insertions(+), 8 deletions(-) rename src/safeds/data/image/{utils => _utils}/__init__.py (100%) rename src/safeds/data/image/{utils => _utils}/_image_transformation_error_and_warning_checks.py (100%) create mode 100644 src/safeds/data/image/containers/_image_dataset.py diff --git a/src/safeds/data/image/utils/__init__.py b/src/safeds/data/image/_utils/__init__.py similarity index 100% rename from src/safeds/data/image/utils/__init__.py rename to src/safeds/data/image/_utils/__init__.py diff --git a/src/safeds/data/image/utils/_image_transformation_error_and_warning_checks.py b/src/safeds/data/image/_utils/_image_transformation_error_and_warning_checks.py similarity index 100% rename from src/safeds/data/image/utils/_image_transformation_error_and_warning_checks.py rename to src/safeds/data/image/_utils/_image_transformation_error_and_warning_checks.py diff --git a/src/safeds/data/image/containers/_empty_image_list.py b/src/safeds/data/image/containers/_empty_image_list.py index 531fa0472..04bc263e9 100644 --- a/src/safeds/data/image/containers/_empty_image_list.py +++ b/src/safeds/data/image/containers/_empty_image_list.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.image.utils._image_transformation_error_and_warning_checks import ( +from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_add_noise_errors, _check_adjust_brightness_errors_and_warnings, _check_adjust_color_balance_errors_and_warnings, diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index 886afa70a..8da742554 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -13,7 +13,7 @@ from safeds._config import _get_device from safeds._utils import _structural_hash -from safeds.data.image.utils._image_transformation_error_and_warning_checks import ( +from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_add_noise_errors, _check_adjust_brightness_errors_and_warnings, _check_adjust_color_balance_errors_and_warnings, diff --git a/src/safeds/data/image/containers/_image_dataset.py b/src/safeds/data/image/containers/_image_dataset.py new file mode 100644 index 000000000..aa9ac6e27 --- /dev/null +++ b/src/safeds/data/image/containers/_image_dataset.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import copy + +import numpy as np +import torch +from torch import Tensor + +from safeds._config import _get_device +from safeds.data.image.containers import ImageList +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList +from safeds.data.tabular.containers import Table +from safeds.exceptions import NonNumericColumnError, OutputLengthMismatchError, IndexOutOfBoundsError + + +class ImageDataset: + + def __init__(self, input_data: ImageList, output_data: ImageList | Table, batch_size=1, shuffle=False) -> None: + self._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data)))) + self._shuffle_after_epoch = shuffle + self._batch_size = batch_size + self._next_batch_index = 0 + + if not isinstance(input_data, _SingleSizeImageList): + raise ValueError("The given input ImageList contains images of different sizes.") + else: + self._input = input_data + if (isinstance(output_data, Table) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): + raise OutputLengthMismatchError(f"{len(input_data)} != {output_data.number_of_rows if isinstance(output_data, Table) else len(output_data)}") + if isinstance(output_data, Table): + non_numerical_columns = [] + wrong_interval_columns = [] + for column_name in output_data.column_names: + if not output_data.get_column_type(column_name).is_numeric(): + non_numerical_columns.append(column_name) + elif output_data.get_column(column_name).minimum() < 0 or output_data.get_column(column_name).maximum() > 1: + wrong_interval_columns.append(column_name) + if len(non_numerical_columns) > 0: + raise NonNumericColumnError(f"Columns {non_numerical_columns} are not numerical.") + if len(wrong_interval_columns) > 0: + raise ValueError(f"Columns {wrong_interval_columns} have values outside of the interval [0, 1].") + _output = _TableAsTensor(output_data) + elif isinstance(output_data, _SingleSizeImageList): + _output = output_data.clone()._as_single_size_image_list() + else: + raise ValueError("The given output ImageList contains images of different sizes.") + self._output = _output + + def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: + if batch_size is None: + batch_size = self._batch_size + if batch_size * batch_number >= len(self._input): + raise IndexOutOfBoundsError(batch_size * batch_number) + max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self._input) else len(self._input) + input_tensor = self._input._tensor[self._shuffle_tensor_indices[[self._input._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 + output_tensor: Tensor + if isinstance(self._output, _SingleSizeImageList): + output_tensor = self._output._tensor[self._shuffle_tensor_indices[[self._output._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 + else: # _output is instance of _TableAsTensor + output_tensor = self._output._tensor[self._shuffle_tensor_indices[batch_size * batch_number:max_index]] + return input_tensor, output_tensor + + def __iter__(self) -> ImageDataset: + # self._batch_index = 0 + # if self._shuffle_after_epoch: + # self._shuffle_inplace() + # return self + + # def _generator(): + # batch_index = 0 + # + # while batch_index * self._batch_size < len(self._input): + # yield self._get_batch(batch_index) + # + # batch_index += 1 + if self._shuffle_after_epoch: + im_ds = self.shuffle() + else: + im_ds = copy.copy(self) + im_ds._next_batch_index = 0 + return im_ds + + def __next__(self) -> tuple[Tensor, Tensor]: + if self._next_batch_index * self._batch_size >= len(self._input): + raise StopIteration + self._next_batch_index += 1 + return self._get_batch(self._next_batch_index - 1) + + def __len__(self) -> int: + return self._input.number_of_images + + def shuffle(self) -> ImageDataset: + im_dataset: ImageDataset = copy.copy(self) + im_dataset._shuffle_tensor_indices = torch.randperm(len(self)) + im_dataset._next_batch_index = 0 + return im_dataset + + # def _shuffle_inplace(self) -> None: + # self._shuffle_tensor_indices = torch.randperm(len(self)) + # + # def _reset_indices_inplace(self) -> None: + # self._shuffle_tensor_indices = torch.LongTensor(list(range(len(self)))) + + +class _TableAsTensor: + + def __init__(self, table: Table) -> None: + self._column_names = table.column_names + + columns_as_tensors = [] + for column_name in table.column_names: + columns_as_tensors.append(torch.Tensor(table.get_column(column_name)._data.values.astype(np.float32)).unsqueeze(dim=0)) + + self._tensor = torch.cat(columns_as_tensors, dim=0).to(_get_device()).T + + if not torch.all(self._tensor.sum(dim=1) == torch.ones(self._tensor.size(dim=0))): + raise ValueError("The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.") diff --git a/src/safeds/data/image/containers/_image_list.py b/src/safeds/data/image/containers/_image_list.py index 9b1eef92d..ed599988f 100644 --- a/src/safeds/data/image/containers/_image_list.py +++ b/src/safeds/data/image/containers/_image_list.py @@ -5,7 +5,7 @@ import os from abc import ABCMeta, abstractmethod from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, overload, Literal import torch from PIL.Image import open as pil_image_open @@ -86,7 +86,23 @@ def from_images(images: list[Image]) -> ImageList: return _SingleSizeImageList._create_image_list([image._image_tensor for image in images], indices) @staticmethod - def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: + @overload + def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: ... + + @staticmethod + @overload + def from_files(path: str | Path | Sequence[str | Path], return_filenames: Literal[False]) -> ImageList: ... + + @staticmethod + @overload + def from_files(path: str | Path | Sequence[str | Path], return_filenames: Literal[True]) -> tuple[ImageList, list[str]]: ... + + @staticmethod + @overload + def from_files(path: str | Path | Sequence[str | Path], return_filenames: bool) -> ImageList | tuple[ImageList, list[str]]: ... + + @staticmethod + def from_files(path: str | Path | Sequence[str | Path], return_filenames: bool = False) -> ImageList | tuple[ImageList, list[str]]: """ Create an ImageList from a directory or a list of files. @@ -96,6 +112,8 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: ---------- path: the path to the directory or a list of files + return_filenames: + if True the output will be a tuple which contains a list of the filenames in order of the images Returns ------- @@ -115,6 +133,7 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: return _EmptyImageList() image_tensors = [] + file_names = [] fixed_size = True path_list: list[str | Path] @@ -128,6 +147,7 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: path_list += sorted([p / name for name in os.listdir(p)]) else: image_tensors.append(ImageList._pil_to_tensor(pil_image_open(p))) + file_names.append(str(p)) if fixed_size and ( image_tensors[0].size(dim=2) != image_tensors[-1].size(dim=2) or image_tensors[0].size(dim=1) != image_tensors[-1].size(dim=1) @@ -140,9 +160,14 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: indices = list(range(len(image_tensors))) if fixed_size: - return _SingleSizeImageList._create_image_list(image_tensors, indices) + image_list = _SingleSizeImageList._create_image_list(image_tensors, indices) + else: + image_list = _MultiSizeImageList._create_image_list(image_tensors, indices) + + if return_filenames: + return image_list, file_names else: - return _MultiSizeImageList._create_image_list(image_tensors, indices) + return image_list @abstractmethod def clone(self) -> ImageList: diff --git a/src/safeds/data/image/containers/_multi_size_image_list.py b/src/safeds/data/image/containers/_multi_size_image_list.py index 955086f1c..db6c48a6f 100644 --- a/src/safeds/data/image/containers/_multi_size_image_list.py +++ b/src/safeds/data/image/containers/_multi_size_image_list.py @@ -10,7 +10,7 @@ from safeds._utils import _structural_hash from safeds.data.image.containers import Image, ImageList -from safeds.data.image.utils._image_transformation_error_and_warning_checks import ( +from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_blur_errors_and_warnings, _check_remove_images_with_size_errors, ) diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index 20ad8d856..ad4490a51 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -15,7 +15,7 @@ from safeds._utils import _structural_hash from safeds.data.image.containers._image import Image from safeds.data.image.containers._image_list import ImageList -from safeds.data.image.utils._image_transformation_error_and_warning_checks import ( +from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_add_noise_errors, _check_adjust_brightness_errors_and_warnings, _check_adjust_color_balance_errors_and_warnings, diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 5f8fa74ee..b98190365 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -12,6 +12,7 @@ IndexOutOfBoundsError, MissingValuesColumnError, NonNumericColumnError, + OutputLengthMismatchError, TransformerNotFittedError, UnknownColumnNameError, ValueNotPresentWhenFittedError, @@ -49,6 +50,7 @@ "IndexOutOfBoundsError", "MissingValuesColumnError", "NonNumericColumnError", + "OutputLengthMismatchError", "TransformerNotFittedError", "UnknownColumnNameError", "ValueNotPresentWhenFittedError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 6271239f9..6ee385b32 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -127,6 +127,13 @@ def __init__(self, column_info: str): super().__init__(f"The length of at least one column differs: \n{column_info}") +class OutputLengthMismatchError(Exception): + """Exception raised when the lengths of the input and output container does not match.""" + + def __init__(self, output_info: str): + super().__init__(f"The length of the output container differs: \n{output_info}") + + class TransformerNotFittedError(Exception): """Raised when a transformer is used before fitting it.""" From 312feec2502c32318d81fa569827496cb40b6c74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Tue, 23 Apr 2024 16:45:01 +0200 Subject: [PATCH 02/42] feat: added `Convolutional2DLayer`, `FlattenLayer`, `MaxPooling2DLayer` and `AvgPooling2DLayer` feat: added `InputConversionImage` and `OutputConversionImage` feat: added class `ImageSize` and methods `ImageList.sizes` and `Image.size` to get the sizes of the respective images feat: added ability to iterate over `SingleSizeImageList` feat: added option `None` for no activation function in `ForwardLayer` fix: fixed bug #581 in removing the Softmax function from the last layer in `NeuralNetworkClassifier` --- src/safeds/data/image/containers/__init__.py | 3 + .../image/containers/_empty_image_list.py | 5 ++ src/safeds/data/image/containers/_image.py | 13 +++ .../data/image/containers/_image_dataset.py | 81 +++++++++-------- .../data/image/containers/_image_list.py | 15 +++- .../containers/_multi_size_image_list.py | 10 +++ .../containers/_single_size_image_list.py | 42 +++++++++ src/safeds/data/image/typing/__init__.py | 19 ++++ src/safeds/data/image/typing/_image_size.py | 70 +++++++++++++++ src/safeds/ml/nn/__init__.py | 23 ++++- src/safeds/ml/nn/_convolutional2d_layer.py | 78 +++++++++++++++++ src/safeds/ml/nn/_flatten_layer.py | 61 +++++++++++++ src/safeds/ml/nn/_forward_layer.py | 6 +- src/safeds/ml/nn/_input_conversion.py | 9 +- src/safeds/ml/nn/_input_conversion_image.py | 36 ++++++++ src/safeds/ml/nn/_layer.py | 10 ++- src/safeds/ml/nn/_model.py | 24 +++-- src/safeds/ml/nn/_output_conversion_image.py | 44 ++++++++++ src/safeds/ml/nn/_pooling2d_layer.py | 87 +++++++++++++++++++ 19 files changed, 574 insertions(+), 62 deletions(-) create mode 100644 src/safeds/data/image/typing/__init__.py create mode 100644 src/safeds/data/image/typing/_image_size.py create mode 100644 src/safeds/ml/nn/_convolutional2d_layer.py create mode 100644 src/safeds/ml/nn/_flatten_layer.py create mode 100644 src/safeds/ml/nn/_input_conversion_image.py create mode 100644 src/safeds/ml/nn/_output_conversion_image.py create mode 100644 src/safeds/ml/nn/_pooling2d_layer.py diff --git a/src/safeds/data/image/containers/__init__.py b/src/safeds/data/image/containers/__init__.py index f0a8f344c..d294224d0 100644 --- a/src/safeds/data/image/containers/__init__.py +++ b/src/safeds/data/image/containers/__init__.py @@ -7,16 +7,19 @@ if TYPE_CHECKING: from ._image import Image from ._image_list import ImageList + from ._image_dataset import ImageDataset apipkg.initpkg( __name__, { "Image": "._image:Image", "ImageList": "._image_list:ImageList", + "ImageDataset": "._image_dataset:ImageDataset", }, ) __all__ = [ "Image", "ImageList", + "ImageDataset", ] diff --git a/src/safeds/data/image/containers/_empty_image_list.py b/src/safeds/data/image/containers/_empty_image_list.py index 75876e618..7e358c2d8 100644 --- a/src/safeds/data/image/containers/_empty_image_list.py +++ b/src/safeds/data/image/containers/_empty_image_list.py @@ -17,6 +17,7 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) +from safeds.data.image.typing import ImageSize from safeds.exceptions import IndexOutOfBoundsError if TYPE_CHECKING: @@ -91,6 +92,10 @@ def heights(self) -> list[int]: def channel(self) -> int: return NotImplemented + @property + def sizes(self) -> list[ImageSize]: + return [] + @property def number_of_sizes(self) -> int: return 0 diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index 25b0b50ca..bb58c466e 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -18,6 +18,7 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) +from safeds.data.image.typing import ImageSize from safeds.exceptions import IllegalFormatError if TYPE_CHECKING: @@ -261,6 +262,18 @@ def channel(self) -> int: """ return self._image_tensor.size(dim=0) + @property + def size(self) -> ImageSize: + """ + Get the `ImageSize` of the image. + + Returns + ------- + image_size: + The size of the image. + """ + return ImageSize(self.width, self.height, self.channel) + @property def device(self) -> Device: """ diff --git a/src/safeds/data/image/containers/_image_dataset.py b/src/safeds/data/image/containers/_image_dataset.py index aa9ac6e27..e97e11909 100644 --- a/src/safeds/data/image/containers/_image_dataset.py +++ b/src/safeds/data/image/containers/_image_dataset.py @@ -1,21 +1,24 @@ from __future__ import annotations import copy - -import numpy as np -import torch -from torch import Tensor +from typing import TYPE_CHECKING from safeds._config import _get_device from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList +from safeds.data.image.typing import ImageSize from safeds.data.tabular.containers import Table from safeds.exceptions import NonNumericColumnError, OutputLengthMismatchError, IndexOutOfBoundsError +if TYPE_CHECKING: + from torch import Tensor + class ImageDataset: def __init__(self, input_data: ImageList, output_data: ImageList | Table, batch_size=1, shuffle=False) -> None: + import torch + self._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data)))) self._shuffle_after_epoch = shuffle self._batch_size = batch_size @@ -24,6 +27,7 @@ def __init__(self, input_data: ImageList, output_data: ImageList | Table, batch_ if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") else: + self._input_size = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input = input_data if (isinstance(output_data, Table) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): raise OutputLengthMismatchError(f"{len(input_data)} != {output_data.number_of_rows if isinstance(output_data, Table) else len(output_data)}") @@ -46,33 +50,7 @@ def __init__(self, input_data: ImageList, output_data: ImageList | Table, batch_ raise ValueError("The given output ImageList contains images of different sizes.") self._output = _output - def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: - if batch_size is None: - batch_size = self._batch_size - if batch_size * batch_number >= len(self._input): - raise IndexOutOfBoundsError(batch_size * batch_number) - max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self._input) else len(self._input) - input_tensor = self._input._tensor[self._shuffle_tensor_indices[[self._input._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 - output_tensor: Tensor - if isinstance(self._output, _SingleSizeImageList): - output_tensor = self._output._tensor[self._shuffle_tensor_indices[[self._output._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 - else: # _output is instance of _TableAsTensor - output_tensor = self._output._tensor[self._shuffle_tensor_indices[batch_size * batch_number:max_index]] - return input_tensor, output_tensor - def __iter__(self) -> ImageDataset: - # self._batch_index = 0 - # if self._shuffle_after_epoch: - # self._shuffle_inplace() - # return self - - # def _generator(): - # batch_index = 0 - # - # while batch_index * self._batch_size < len(self._input): - # yield self._get_batch(batch_index) - # - # batch_index += 1 if self._shuffle_after_epoch: im_ds = self.shuffle() else: @@ -89,29 +67,48 @@ def __next__(self) -> tuple[Tensor, Tensor]: def __len__(self) -> int: return self._input.number_of_images + @property + def input_size(self) -> ImageSize: + return self._input_size + + def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: + import torch + from torch import Tensor + + if batch_size is None: + batch_size = self._batch_size + if batch_size * batch_number >= len(self._input): + raise IndexOutOfBoundsError(batch_size * batch_number) + max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self._input) else len(self._input) + input_tensor = self._input._tensor[self._shuffle_tensor_indices[[self._input._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 + output_tensor: Tensor + if isinstance(self._output, _SingleSizeImageList): + output_tensor = self._output._tensor[self._shuffle_tensor_indices[[self._output._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 + else: # _output is instance of _TableAsTensor + output_tensor = self._output._tensor[self._shuffle_tensor_indices[batch_size * batch_number:max_index]] + return input_tensor, output_tensor + def shuffle(self) -> ImageDataset: + import torch im_dataset: ImageDataset = copy.copy(self) im_dataset._shuffle_tensor_indices = torch.randperm(len(self)) im_dataset._next_batch_index = 0 return im_dataset - # def _shuffle_inplace(self) -> None: - # self._shuffle_tensor_indices = torch.randperm(len(self)) - # - # def _reset_indices_inplace(self) -> None: - # self._shuffle_tensor_indices = torch.LongTensor(list(range(len(self)))) - class _TableAsTensor: def __init__(self, table: Table) -> None: - self._column_names = table.column_names - - columns_as_tensors = [] - for column_name in table.column_names: - columns_as_tensors.append(torch.Tensor(table.get_column(column_name)._data.values.astype(np.float32)).unsqueeze(dim=0)) + import torch - self._tensor = torch.cat(columns_as_tensors, dim=0).to(_get_device()).T + self._tensor = torch.Tensor(table._data.to_numpy(copy=True)).to(_get_device()) if not torch.all(self._tensor.sum(dim=1) == torch.ones(self._tensor.size(dim=0))): raise ValueError("The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.") + + @staticmethod + def _from_tensor(tensor: Tensor) -> _TableAsTensor: + table_as_tensor = _TableAsTensor.__new__(_TableAsTensor) + table_as_tensor._tensor = tensor + return table_as_tensor + diff --git a/src/safeds/data/image/containers/_image_list.py b/src/safeds/data/image/containers/_image_list.py index c6c08e0f0..5c54bcee8 100644 --- a/src/safeds/data/image/containers/_image_list.py +++ b/src/safeds/data/image/containers/_image_list.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, overload, Literal from safeds.data.image.containers._image import Image +from safeds.data.image.typing import ImageSize if TYPE_CHECKING: from collections.abc import Sequence @@ -120,7 +121,7 @@ def from_files(path: str | Path | Sequence[str | Path], return_filenames: bool = If the directory or one of the files of the path cannot be found """ from PIL.Image import open as pil_image_open - from torchvision.transforms.functional import pil_to_tensor + from torchvision.transforms.v2.functional import pil_to_tensor from safeds.data.image.containers._empty_image_list import _EmptyImageList from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList @@ -325,6 +326,18 @@ def channel(self) -> int: The channel of all images """ + @property + @abstractmethod + def sizes(self) -> list[ImageSize]: + """ + Return the sizes of all images + + Returns + ------- + sizes: + The sizes of all images + """ + @property @abstractmethod def number_of_sizes(self) -> int: diff --git a/src/safeds/data/image/containers/_multi_size_image_list.py b/src/safeds/data/image/containers/_multi_size_image_list.py index c1433b76a..04ec4b5f4 100644 --- a/src/safeds/data/image/containers/_multi_size_image_list.py +++ b/src/safeds/data/image/containers/_multi_size_image_list.py @@ -11,6 +11,7 @@ _check_blur_errors_and_warnings, _check_remove_images_with_size_errors, ) +from safeds.data.image.typing import ImageSize from safeds.exceptions import ( DuplicateIndexError, IllegalFormatError, @@ -158,6 +159,15 @@ def heights(self) -> list[int]: def channel(self) -> int: return next(iter(self._image_list_dict.values())).channel + @property + def sizes(self) -> list[ImageSize]: + sizes = {} + for image_list in self._image_list_dict.values(): + indices = image_list._as_single_size_image_list()._tensor_positions_to_indices + for i, index in enumerate(indices): + sizes[index] = image_list.sizes[i] + return [sizes[index] for index in sorted(sizes)] + @property def number_of_sizes(self) -> int: return len(self._image_list_dict) diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index 02930b1a4..970eb5e57 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -20,6 +20,7 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) +from safeds.data.image.typing import ImageSize from safeds.exceptions import ( DuplicateIndexError, IllegalFormatError, @@ -49,6 +50,9 @@ class _SingleSizeImageList(ImageList): def __init__(self) -> None: import torch + self._next_batch_index = 0 + self._batch_size = 1 + self._tensor: Tensor = torch.empty(0) self._tensor_positions_to_indices: list[int] = [] # list[tensor_position] = index self._indices_to_tensor_positions: dict[int, int] = {} # {index: tensor_position} @@ -95,6 +99,40 @@ def _create_image_list(images: list[Tensor], indices: list[int]) -> ImageList: return image_list + @staticmethod + def _create_from_tensor(images_tensor: Tensor, indices: list[int]) -> _SingleSizeImageList: + if images_tensor.dim() != 4: + raise ValueError(f"Invalid Tensor. This Tensor requires 4 dimensions but has {images_tensor.dim()}") + + image_list = _SingleSizeImageList() + image_list._tensor = images_tensor.detach().clone() + image_list._tensor_positions_to_indices = indices + image_list._indices_to_tensor_positions = image_list._calc_new_indices_to_tensor_positions() + + return image_list + + def __iter__(self) -> _SingleSizeImageList: + im_ds = copy.copy(self) + im_ds._next_batch_index = 0 + return im_ds + + def __next__(self) -> Tensor: + if self._next_batch_index * self._batch_size >= len(self): + raise StopIteration + self._next_batch_index += 1 + return self._get_batch(self._next_batch_index - 1) + + def _get_batch(self, batch_number: int, batch_size: int | None = None) -> Tensor: + import torch + + if batch_size is None: + batch_size = self._batch_size + if batch_size * batch_number >= len(self): + raise IndexOutOfBoundsError(batch_size * batch_number) + max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self) else len(self) + input_tensor = self._tensor[[self._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]].to(torch.float32) / 255 + return input_tensor + def clone(self) -> ImageList: cloned_image_list = self._clone_without_tensor() cloned_image_list._tensor = self._tensor.detach().clone() @@ -183,6 +221,10 @@ def heights(self) -> list[int]: def channel(self) -> int: return self._tensor.size(dim=1) + @property + def sizes(self) -> list[ImageSize]: + return [ImageSize(self._tensor.size(dim=3), self._tensor.size(dim=2), self._tensor.size(dim=1))] * self.number_of_images + @property def number_of_sizes(self) -> int: return 1 diff --git a/src/safeds/data/image/typing/__init__.py b/src/safeds/data/image/typing/__init__.py new file mode 100644 index 000000000..92ab61a47 --- /dev/null +++ b/src/safeds/data/image/typing/__init__.py @@ -0,0 +1,19 @@ +"""Types used to define the attributes of image data.""" + +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._image_size import ImageSize + +apipkg.initpkg( + __name__, + { + "ImageSize": "._image_size:ImageSize", + }, +) + +__all__ = [ + "ImageSize", +] diff --git a/src/safeds/data/image/typing/_image_size.py b/src/safeds/data/image/typing/_image_size.py new file mode 100644 index 000000000..88bd084ac --- /dev/null +++ b/src/safeds/data/image/typing/_image_size.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import sys +from typing import TYPE_CHECKING + +from safeds._utils import _structural_hash +from safeds.exceptions import OutOfBoundsError, ClosedBound + +if TYPE_CHECKING: + from safeds.data.image.containers import Image + + +class ImageSize: + """ + A container for image size data + + Parameters + ---------- + width: + the width of the image + height: + the height of the image + channel: + the channel of the image + + Raises + ------ + OutOfBoundsError: + if width or height are below 1 + ValueError + if an invalid channel is given + """ + + def __init__(self, width: int, height: int, channel: int, *, _ignore_invalid_channel: bool = False) -> None: + if width < 1 or height < 1: + raise OutOfBoundsError(min(width, height), lower_bound=ClosedBound(1)) + elif not _ignore_invalid_channel and channel not in (1, 3, 4): + raise ValueError(f"Channel {channel} is not a valid channel option. Use either 1, 3 or 4") + elif channel < 1: + raise OutOfBoundsError(channel, name="channel", lower_bound=ClosedBound(1)) + self._width = width + self._height = height + self._channel = channel + + @staticmethod + def from_image(image: Image) -> ImageSize: + return ImageSize(image.width, image.height, image.channel) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ImageSize): + return NotImplemented + return self._width == other._width and self._height == other._height and self._channel == other._channel + + def __hash__(self): + return _structural_hash(self._width, self._height, self._channel) + + def __sizeof__(self): + return sys.getsizeof(self._width) + sys.getsizeof(self._height) + sys.getsizeof(self._channel) + + @property + def width(self) -> int: + return self._width + + @property + def height(self) -> int: + return self._height + + @property + def channel(self) -> int: + return self._channel diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 6158e640d..1da37d10b 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -5,26 +5,45 @@ import apipkg if TYPE_CHECKING: + from ._pooling2d_layer import AvgPooling2DLayer + from ._convolutional2d_layer import Convolutional2DLayer + from ._flatten_layer import FlattenLayer from ._forward_layer import ForwardLayer + from ._input_conversion_image import InputConversionImage from ._input_conversion_table import InputConversionTable + from ._pooling2d_layer import MaxPooling2DLayer from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor + from ._output_conversion_image import OutputConversionImage from ._output_conversion_table import OutputConversionTable apipkg.initpkg( __name__, { + "AvgPooling2DLayer": "._pooling2d_layer:AvgPooling2DLayer", + "Convolutional2DLayer": "._convolutional2d_layer:Convolutional2DLayer", + "FlattenLayer": "._flatten_layer:FlattenLayer", "ForwardLayer": "._forward_layer:ForwardLayer", + "InputConversionImage": "._input_conversion_image:InputConversionImage", "InputConversionTable": "._input_conversion_table:InputConversionTable", - "OutputConversionTable": "._output_conversion_table:OutputConversionTable", + "MaxPooling2DLayer": "._pooling2d_layer:MaxPooling2DLayer", "NeuralNetworkClassifier": "._model:NeuralNetworkClassifier", "NeuralNetworkRegressor": "._model:NeuralNetworkRegressor", + "OutputConversionImage": "._output_conversion_image:OutputConversionImage", + "OutputConversionTable": "._output_conversion_table:OutputConversionTable", + "Pooling2DLayer": "._pooling2d_layer:Pooling2DLayer", }, ) __all__ = [ + "AvgPooling2DLayer", + "Convolutional2DLayer", + "FlattenLayer", "ForwardLayer", + "InputConversionImage", "InputConversionTable", - "OutputConversionTable", + "MaxPooling2DLayer", "NeuralNetworkClassifier", "NeuralNetworkRegressor", + "OutputConversionImage", + "OutputConversionTable", ] diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py new file mode 100644 index 000000000..a824ebf09 --- /dev/null +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +from safeds.data.image.typing import ImageSize + +if TYPE_CHECKING: + from torch import Tensor, nn + +from safeds.ml.nn._layer import _Layer + + +def _create_internal_model(input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int) -> nn.Module: + from torch import nn + + class _InternalLayer(nn.Module): + def __init__(self, input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int): + super().__init__() + self._layer = nn.Conv2d(in_channels=input_size, out_channels=output_size, kernel_size=kernel_size, padding=padding, stride=stride) + match activation_function: + case "sigmoid": + self._fn = nn.Sigmoid() + case "relu": + self._fn = nn.ReLU() + case "softmax": + self._fn = nn.Softmax() + case _: + raise ValueError("Unknown Activation Function: " + activation_function) + + def forward(self, x: Tensor) -> Tensor: + return self._fn(self._layer(x)) + + return _InternalLayer(input_size, output_size, kernel_size, activation_function, padding, stride) + + +class Convolutional2DLayer(_Layer): + def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0): + """ + Create a Convolutional 2D Layer. + """ + self._output_channel = output_channel + self._kernel_size = kernel_size + self._stride = stride + self._padding = padding + + def _get_internal_layer(self, *, activation_function: str) -> nn.Module: + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride) + + @property + def input_size(self) -> ImageSize: + """ + Get the input_size of this layer. + + Returns + ------- + result: + The amount of values being passed into this layer. + """ + return self._input_size + + @property + def output_size(self) -> ImageSize: + """ + Get the output_size of this layer. + + Returns + ------- + result: + The Number of Neurons in this layer. + """ + return self._output_size + + def _set_input_size(self, input_size: ImageSize) -> None: + self._input_size = input_size + new_width = math.ceil((input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + new_height = math.ceil((input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py new file mode 100644 index 000000000..5a9ca7f2b --- /dev/null +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds.data.image.typing import ImageSize + +if TYPE_CHECKING: + from torch import Tensor, nn + +from safeds.ml.nn._layer import _Layer + + +def _create_internal_model() -> nn.Module: + from torch import nn + + class _InternalLayer(nn.Module): + def __init__(self): + super().__init__() + self._layer = nn.Flatten() + + def forward(self, x: Tensor) -> Tensor: + return self._layer(x) + + return _InternalLayer() + + +class FlattenLayer(_Layer): + def __init__(self): + """Create a Flatten Layer.""" + self._input_size: ImageSize | None = None + self._output_size: ImageSize | None = None + + def _get_internal_layer(self) -> nn.Module: + return _create_internal_model() + + @property + def input_size(self) -> ImageSize: + """ + Get the input_size of this layer. + + Returns + ------- + result : + The amount of values being passed into this layer. + """ + return self._input_size + + @property + def output_size(self) -> int: + """ + Get the output_size of this layer. + + Returns + ------- + result : + The Number of Neurons in this layer. + """ + return self._input_size.width * self._input_size.height * self._input_size.channel if self._input_size is not None else None + + def _set_input_size(self, input_size: ImageSize) -> None: + self._input_size = input_size diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index e5f745e91..1ef7e69b0 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -23,11 +23,13 @@ def __init__(self, input_size: int, output_size: int, activation_function: str): self._fn = nn.ReLU() case "softmax": self._fn = nn.Softmax() + case "none": + self._fn = None case _: raise ValueError("Unknown Activation Function: " + activation_function) def forward(self, x: Tensor) -> Tensor: - return self._fn(self._layer(x)) + return self._fn(self._layer(x)) if self._fn is not None else self._layer(x) return _InternalLayer(input_size, output_size, activation_function) @@ -57,7 +59,7 @@ def __init__(self, output_size: int, input_size: int | None = None): raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) self._output_size = output_size - def _get_internal_layer(self, activation_function: str) -> nn.Module: + def _get_internal_layer(self, *, activation_function: str) -> nn.Module: return _create_internal_model(self._input_size, self._output_size, activation_function) @property diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 8e60e8bdb..f71b4b4b7 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -3,10 +3,13 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar +from safeds.data.image.typing import ImageSize + if TYPE_CHECKING: from torch.utils.data import DataLoader from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries +from safeds.data.image.containers import ImageDataset, ImageList FT = TypeVar("FT", TaggedTable, TimeSeries) PT = TypeVar("PT", Table, TimeSeries) @@ -17,15 +20,15 @@ class _InputConversion(Generic[FT, PT], ABC): @property @abstractmethod - def _data_size(self) -> int: + def _data_size(self) -> int | ImageSize: pass # pragma: no cover @abstractmethod - def _data_conversion_fit(self, input_data: FT, batch_size: int, num_of_classes: int = 1) -> DataLoader: + def _data_conversion_fit(self, input_data: FT, batch_size: int, num_of_classes: int = 1) -> DataLoader | ImageDataset: pass # pragma: no cover @abstractmethod - def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader: + def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader | ImageList: pass # pragma: no cover @abstractmethod diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py new file mode 100644 index 000000000..62a4347e3 --- /dev/null +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from safeds.data.image.containers import ImageDataset, ImageList +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList +from safeds.data.image.typing import ImageSize + +from safeds.ml.nn._input_conversion import _InputConversion + + +class InputConversionImage(_InputConversion[ImageDataset, ImageList]): + """The input conversion for a neural network, defines the input parameters for the neural network.""" + + def __init__(self, image_size: ImageSize) -> None: + """ + Define the input parameters for the neural network in the input conversion. + + Parameters + ---------- + """ + self._image_size = image_size + + @property + def _data_size(self) -> ImageSize: + return self._image_size + + def _data_conversion_fit(self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1) -> ImageDataset: + return input_data + + def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> ImageList: + return input_data + + def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: + return input_data.input_size == self._image_size + + def _is_predict_data_valid(self, input_data: ImageList) -> bool: + return isinstance(input_data, _SingleSizeImageList) and input_data.sizes[0] == self._image_size diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index 36f653a50..22a179b08 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +from safeds.data.image.typing import ImageSize + if TYPE_CHECKING: from torch import nn @@ -13,19 +15,19 @@ def __init__(self) -> None: pass # pragma: no cover @abstractmethod - def _get_internal_layer(self, activation_function: str) -> nn.Module: + def _get_internal_layer(self, **kwargs) -> nn.Module: pass # pragma: no cover @property @abstractmethod - def input_size(self) -> int: + def input_size(self) -> int | ImageSize: pass # pragma: no cover @property @abstractmethod - def output_size(self) -> int: + def output_size(self) -> int | ImageSize: pass # pragma: no cover @abstractmethod - def _set_input_size(self, input_size: int) -> None: + def _set_input_size(self, input_size: int | ImageSize) -> None: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 0f862e4b6..c698872b5 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -3,6 +3,7 @@ import copy from typing import TYPE_CHECKING, Generic, Self, TypeVar +from safeds.data.image.containers import ImageList, ImageDataset from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries from safeds.exceptions import ( ClosedBound, @@ -11,6 +12,8 @@ ModelNotFittedError, OutOfBoundsError, ) +from safeds.ml.nn import InputConversionImage, FlattenLayer +from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer if TYPE_CHECKING: from collections.abc import Callable @@ -21,9 +24,9 @@ from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion -IFT = TypeVar("IFT", TaggedTable, TimeSeries) # InputFitType -IPT = TypeVar("IPT", Table, TimeSeries) # InputPredictType -OT = TypeVar("OT", TaggedTable, TimeSeries) # OutputType +IFT = TypeVar("IFT", TaggedTable, TimeSeries, ImageDataset) # InputFitType +IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType +OT = TypeVar("OT", TaggedTable, TimeSeries, ImageDataset) # OutputType class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): @@ -34,7 +37,7 @@ def __init__( output_conversion: _OutputConversion[IPT, OT], ): self._input_conversion: _InputConversion[IFT, IPT] = input_conversion - self._model = _create_internal_model(layers, is_for_classification=False) + self._model = _create_internal_model(input_conversion, layers, is_for_classification=False) self._output_conversion: _OutputConversion[IPT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 @@ -188,7 +191,7 @@ def __init__( output_conversion: _OutputConversion[IPT, OT], ): self._input_conversion: _InputConversion[IFT, IPT] = input_conversion - self._model = _create_internal_model(layers, is_for_classification=True) + self._model = _create_internal_model(input_conversion, layers, is_for_classification=True) self._output_conversion: _OutputConversion[IPT, OT] = output_conversion self._input_size = self._model.input_size self._batch_size = 1 @@ -345,7 +348,7 @@ def is_fitted(self) -> bool: return self._is_fitted -def _create_internal_model(layers: list[_Layer], is_for_classification: bool) -> nn.Module: +def _create_internal_model(input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], is_for_classification: bool) -> nn.Module: from torch import nn class _InternalModel(nn.Module): @@ -359,13 +362,18 @@ def __init__(self, layers: list[_Layer], is_for_classification: bool) -> None: for layer in layers: if previous_output_size is not None: layer._set_input_size(previous_output_size) - internal_layers.append(layer._get_internal_layer(activation_function="relu")) + elif isinstance(input_conversion, InputConversionImage): + layer._set_input_size(input_conversion._data_size) + if isinstance(layer, FlattenLayer) or isinstance(layer, _Pooling2DLayer): + internal_layers.append(layer._get_internal_layer()) + else: + internal_layers.append(layer._get_internal_layer(activation_function="relu")) previous_output_size = layer.output_size if is_for_classification: internal_layers.pop() if layers[-1].output_size > 2: - internal_layers.append(layers[-1]._get_internal_layer(activation_function="softmax")) + internal_layers.append(layers[-1]._get_internal_layer(activation_function="none")) else: internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) self._pytorch_layers = nn.Sequential(*internal_layers) diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py new file mode 100644 index 000000000..fcfe2f850 --- /dev/null +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from safeds.data.image.containers import ImageDataset, ImageList +from safeds.data.image.containers._image_dataset import _TableAsTensor +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList + +if TYPE_CHECKING: + from torch import Tensor, LongTensor + +from safeds.ml.nn._output_conversion import _OutputConversion + + +class OutputConversionImage(_OutputConversion[ImageList, ImageDataset]): + """The output conversion for a neural network, defines the output parameters for the neural network.""" + + def __init__(self, output_is_image: bool) -> None: + """ + Define the output parameters for the neural network in the output conversion. + + Parameters + ---------- + """ + self._output_is_image = output_is_image + + def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset: + from torch import LongTensor + + if not isinstance(input_data, _SingleSizeImageList): + raise ValueError("The given input ImageList contains images of different sizes.") + + if self._output_is_image: + return ImageDataset(input_data, _SingleSizeImageList._create_from_tensor(output_data, list(range(output_data.size(dim=0))))) + else: + im_dataset = ImageDataset.__new__(ImageDataset) + im_dataset._output = _TableAsTensor._from_tensor(output_data) + im_dataset._shuffle_tensor_indices = LongTensor(list(range(len(input_data)))) + im_dataset._shuffle_after_epoch = False + im_dataset._batch_size = 1 + im_dataset._next_batch_index = 0 + im_dataset._input_size = input_data.sizes[0] + im_dataset._input = input_data + return im_dataset diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py new file mode 100644 index 000000000..581b2aa71 --- /dev/null +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +import math +from typing import TYPE_CHECKING + +from safeds.data.image.typing import ImageSize + +if TYPE_CHECKING: + from torch import Tensor, nn + +from safeds.ml.nn._layer import _Layer + + +def _create_internal_model(strategy: str, kernel_size: int, padding: int, stride: int) -> nn.Module: + from torch import nn + + class _InternalLayer(nn.Module): + def __init__(self, strategy: str, kernel_size: int, padding: int, stride: int): + super().__init__() + match strategy: + case "max": + self._layer = nn.MaxPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + case "avg": + self._layer = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) + case _: + raise ValueError(f"Unknown pooling strategy: {strategy}") + + def forward(self, x: Tensor) -> Tensor: + return self._layer(x) + + return _InternalLayer(strategy, kernel_size, padding, stride) + + +class _Pooling2DLayer(_Layer): + def __init__(self, strategy: str, kernel_size: int, *, stride: int = -1, padding: int = 0): + """ + Create a Pooling 2D Layer. + """ + self._strategy = strategy + self._kernel_size = kernel_size + self._stride = stride if stride != -1 else kernel_size + self._padding = padding + + def _get_internal_layer(self) -> nn.Module: + return _create_internal_model(self._strategy, self._kernel_size, self._padding, self._stride) + + @property + def input_size(self) -> ImageSize: + """ + Get the input_size of this layer. + + Returns + ------- + result: + The amount of values being passed into this layer. + """ + return self._input_size + + @property + def output_size(self) -> ImageSize: + """ + Get the output_size of this layer. + + Returns + ------- + result: + The Number of Neurons in this layer. + """ + return self._output_size + + def _set_input_size(self, input_size: ImageSize) -> None: + self._input_size = input_size + new_width = math.ceil((input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + new_height = math.ceil((input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + self._output_size = ImageSize(new_width, new_height, self._input_size.channel, _ignore_invalid_channel=True) + + +class MaxPooling2DLayer(_Pooling2DLayer): + + def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: + super().__init__("max", kernel_size, stride=stride, padding=padding) + + +class AvgPooling2DLayer(_Pooling2DLayer): + + def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: + super().__init__("avg", kernel_size, stride=stride, padding=padding) From 7ae5b567cfc089d2bef0009c9b876f46f540a35d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Tue, 23 Apr 2024 16:57:37 +0200 Subject: [PATCH 03/42] test: fixed one test --- tests/safeds/ml/nn/test_forward_layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/ml/nn/test_forward_layer.py b/tests/safeds/ml/nn/test_forward_layer.py index 29c2a8a6d..40306cc67 100644 --- a/tests/safeds/ml/nn/test_forward_layer.py +++ b/tests/safeds/ml/nn/test_forward_layer.py @@ -45,7 +45,7 @@ def test_should_raise_if_unknown_activation_function_is_passed(activation_functi ValueError, match=rf"Unknown Activation Function: {activation_function}", ): - ForwardLayer(output_size=1, input_size=1)._get_internal_layer(activation_function) + ForwardLayer(output_size=1, input_size=1)._get_internal_layer(activation_function=activation_function) @pytest.mark.parametrize( From 83a008e8d45bf7a4d9e916828c80a244174aeb93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Tue, 23 Apr 2024 21:49:34 +0200 Subject: [PATCH 04/42] test: added tests for `ImageSize` and `Image.size` refactor: extracted test devices to `helpers.devices` --- tests/helpers/__init__.py | 12 ++ tests/helpers/_devices.py | 19 ++ .../data/image/containers/test_image.py | 195 ++++++++---------- tests/safeds/data/image/typing/__init__.py | 0 .../data/image/typing/test_image_size.py | 151 ++++++++++++++ 5 files changed, 273 insertions(+), 104 deletions(-) create mode 100644 tests/helpers/_devices.py create mode 100644 tests/safeds/data/image/typing/__init__.py create mode 100644 tests/safeds/data/image/typing/test_image_size.py diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py index 7ca043461..aa1935ee3 100644 --- a/tests/helpers/__init__.py +++ b/tests/helpers/__init__.py @@ -3,6 +3,13 @@ assert_that_tagged_tables_are_equal, assert_that_time_series_are_equal, ) +from ._devices import ( + device_cpu, + device_cuda, + get_devices, + get_devices_ids, + skip_if_device_not_available, +) from ._images import ( grayscale_jpg_id, grayscale_jpg_path, @@ -32,10 +39,14 @@ "assert_that_tables_are_close", "assert_that_tagged_tables_are_equal", "assert_that_time_series_are_equal", + "device_cpu", + "device_cuda", "grayscale_jpg_id", "grayscale_jpg_path", "grayscale_png_id", "grayscale_png_path", + "get_devices", + "get_devices_ids", "images_all", "images_all_channel", "images_all_channel_ids", @@ -49,6 +60,7 @@ "resolve_resource_path", "rgba_png_id", "rgba_png_path", + "skip_if_device_not_available", "test_images_folder", "white_square_jpg_id", "white_square_jpg_path", diff --git a/tests/helpers/_devices.py b/tests/helpers/_devices.py new file mode 100644 index 000000000..b4405bcc3 --- /dev/null +++ b/tests/helpers/_devices.py @@ -0,0 +1,19 @@ +import pytest +import torch +from torch.types import Device + +device_cpu = torch.device("cpu") +device_cuda = torch.device("cuda") + + +def get_devices() -> list[torch.device]: + return [device_cpu, device_cuda] + + +def get_devices_ids() -> list[str]: + return ["cpu", "cuda"] + + +def skip_if_device_not_available(device: Device) -> None: + if device == device_cuda and not torch.cuda.is_available(): + pytest.skip("This test requires cuda") diff --git a/tests/safeds/data/image/containers/test_image.py b/tests/safeds/data/image/containers/test_image.py index 3555ad993..f2484950c 100644 --- a/tests/safeds/data/image/containers/test_image.py +++ b/tests/safeds/data/image/containers/test_image.py @@ -6,12 +6,14 @@ import pytest import torch from safeds.data.image.containers import Image +from safeds.data.image.typing import ImageSize from safeds.data.tabular.containers import Table from safeds.exceptions import IllegalFormatError, OutOfBoundsError from syrupy import SnapshotAssertion from torch.types import Device from tests.helpers import ( + device_cuda, grayscale_jpg_id, grayscale_jpg_path, grayscale_png_id, @@ -30,25 +32,9 @@ white_square_jpg_id, white_square_jpg_path, white_square_png_id, - white_square_png_path, + white_square_png_path, get_devices, get_devices_ids, skip_if_device_not_available, ) -_device_cuda = torch.device("cuda") -_device_cpu = torch.device("cpu") - - -def _test_devices() -> list[torch.device]: - return [_device_cpu, _device_cuda] - - -def _test_devices_ids() -> list[str]: - return ["cpu", "cuda"] - - -def _skip_if_device_not_available(device: Device) -> None: - if device == _device_cuda and not torch.cuda.is_available(): - pytest.skip("This test requires cuda") - def _assert_width_height_channel(image1: Image, image2: Image) -> None: assert image1.width == image2.width @@ -56,7 +42,7 @@ def _assert_width_height_channel(image1: Image, image2: Image) -> None: assert image1.channel == image2.channel -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestFromFile: @pytest.mark.parametrize( "resource_path", @@ -67,7 +53,7 @@ class TestFromFile: ], ) def test_should_load_from_file(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) assert image != Image(torch.empty(1, 1, 1)) @@ -82,12 +68,12 @@ def test_should_load_from_file(self, resource_path: str | Path, device: Device) ids=["missing_file_jpg", "missing_file_jpg_Path", "missing_file_png", "missing_file_png_Path"], ) def test_should_raise_if_file_not_found(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.raises(FileNotFoundError): Image.from_file(resolve_resource_path(resource_path), device) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestFromBytes: @pytest.mark.parametrize( "resource_path", @@ -95,7 +81,7 @@ class TestFromBytes: ids=[plane_jpg_id, white_square_jpg_id, white_square_png_id, grayscale_jpg_id, grayscale_png_id], ) def test_should_write_and_load_bytes_jpeg(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_copy = Image.from_bytes(typing.cast(bytes, image._repr_jpeg_()), device) _assert_width_height_channel(image, image_copy) @@ -106,13 +92,13 @@ def test_should_write_and_load_bytes_jpeg(self, resource_path: str | Path, devic ids=images_all_ids(), ) def test_should_write_and_load_bytes_png(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_copy = Image.from_bytes(image._repr_png_(), device) assert image == image_copy -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestReprJpeg: @pytest.mark.parametrize( "resource_path", @@ -120,7 +106,7 @@ class TestReprJpeg: ids=[plane_jpg_id, white_square_jpg_id, white_square_png_id, grayscale_jpg_id, grayscale_png_id], ) def test_should_return_bytes(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) assert isinstance(image._repr_jpeg_(), bytes) @@ -133,12 +119,12 @@ def test_should_return_bytes(self, resource_path: str | Path, device: Device) -> ids=[plane_png_id, rgba_png_id], ) def test_should_return_none_if_image_has_alpha_channel(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) assert image._repr_jpeg_() is None -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestReprPng: @pytest.mark.parametrize( "resource_path", @@ -146,12 +132,12 @@ class TestReprPng: ids=images_all_ids(), ) def test_should_return_bytes(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) assert isinstance(image._repr_png_(), bytes) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestToJpegFile: @pytest.mark.parametrize( "resource_path", @@ -159,7 +145,7 @@ class TestToJpegFile: ids=[plane_jpg_id, white_square_jpg_id, white_square_png_id, grayscale_jpg_id, grayscale_png_id], ) def test_should_save_file(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with NamedTemporaryFile(suffix=".jpg") as tmp_jpeg_file: tmp_jpeg_file.close() @@ -178,7 +164,7 @@ def test_should_save_file(self, resource_path: str | Path, device: Device) -> No ids=[plane_png_id, rgba_png_id], ) def test_should_raise_if_image_has_alpha_channel(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with NamedTemporaryFile(suffix=".jpg") as tmp_jpeg_file: tmp_jpeg_file.close() @@ -189,7 +175,7 @@ def test_should_raise_if_image_has_alpha_channel(self, resource_path: str | Path image.to_jpeg_file(tmp_file.name) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestToPngFile: @pytest.mark.parametrize( "resource_path", @@ -197,7 +183,7 @@ class TestToPngFile: ids=images_all_ids(), ) def test_should_save_file(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with NamedTemporaryFile(suffix=".png") as tmp_png_file: tmp_png_file.close() @@ -208,7 +194,7 @@ def test_should_save_file(self, resource_path: str | Path, device: Device) -> No assert image == image_r -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestProperties: @pytest.mark.parametrize( ("resource_path", "width", "height", "channel"), @@ -274,29 +260,30 @@ def test_should_return_image_properties( channel: int, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) assert image.width == width assert image.height == height assert image.channel == channel + assert image.size == ImageSize(width, height, channel) class TestEQ: - @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) @pytest.mark.parametrize( "resource_path", images_all(), ids=images_all_ids(), ) def test_should_be_equal(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image2 = Image.from_file(resolve_resource_path(resource_path), device) assert image == image2 - @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) def test_should_not_be_equal(self, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(plane_png_path), device) image2 = Image.from_file(resolve_resource_path(white_square_png_path), device) assert image != image2 @@ -307,48 +294,48 @@ def test_should_not_be_equal(self, device: Device) -> None: ids=images_all_ids(), ) def test_should_be_equal_different_devices(self, resource_path: str) -> None: - _skip_if_device_not_available(_device_cuda) + skip_if_device_not_available(device_cuda) image = Image.from_file(resolve_resource_path(resource_path), torch.device("cpu")) image2 = Image.from_file(resolve_resource_path(resource_path), torch.device("cuda")) assert image == image2 assert image2 == image def test_should_not_be_equal_different_devices(self) -> None: - _skip_if_device_not_available(_device_cuda) + skip_if_device_not_available(device_cuda) image = Image.from_file(resolve_resource_path(plane_png_path), torch.device("cpu")) image2 = Image.from_file(resolve_resource_path(white_square_png_path), torch.device("cuda")) assert image != image2 assert image2 != image - @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) @pytest.mark.parametrize( "resource_path", images_all(), ids=images_all_ids(), ) def test_should_raise(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) other = Table() assert (image.__eq__(other)) is NotImplemented class TestHash: - @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) @pytest.mark.parametrize( "resource_path", images_all(), ids=images_all_ids(), ) def test_should_hash_be_equal(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image2 = Image.from_file(resolve_resource_path(resource_path), device) assert hash(image) == hash(image2) - @pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) def test_should_hash_not_be_equal(self, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(plane_png_path), device) image2 = Image.from_file(resolve_resource_path(white_square_png_path), device) assert hash(image) != hash(image2) @@ -359,19 +346,19 @@ def test_should_hash_not_be_equal(self, device: Device) -> None: ids=images_all_ids(), ) def test_should_hash_be_equal_different_devices(self, resource_path: str) -> None: - _skip_if_device_not_available(_device_cuda) + skip_if_device_not_available(device_cuda) image = Image.from_file(resolve_resource_path(resource_path), torch.device("cpu")) image2 = Image.from_file(resolve_resource_path(resource_path), torch.device("cuda")) assert hash(image) == hash(image2) def test_should_hash_not_be_equal_different_devices(self) -> None: - _skip_if_device_not_available(_device_cuda) + skip_if_device_not_available(device_cuda) image = Image.from_file(resolve_resource_path(plane_png_path), torch.device("cpu")) image2 = Image.from_file(resolve_resource_path(white_square_png_path), torch.device("cuda")) assert hash(image) != hash(image2) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestChangeChannel: @pytest.mark.parametrize( "resource_path", @@ -386,7 +373,7 @@ def test_should_change_channel( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) new_image = image.change_channel(channel) assert new_image.channel == channel @@ -399,13 +386,13 @@ def test_should_change_channel( ) @pytest.mark.parametrize("channel", [2], ids=["invalid-channel"]) def test_should_raise(self, resource_path: str, channel: int, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with pytest.raises(ValueError, match=rf"Channel {channel} is not a valid channel option. Use either 1, 3 or 4"): image.change_channel(channel) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestResize: @pytest.mark.parametrize( "resource_path", @@ -438,7 +425,7 @@ def test_should_return_resized_image( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) new_image = image.resize(new_width, new_height) assert new_image.width == new_width @@ -458,7 +445,7 @@ def test_should_return_resized_image( ids=["invalid width", "invalid height", "invalid width and height"], ) def test_should_raise(self, resource_path: str, new_width: int, new_height: int, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with pytest.raises( OutOfBoundsError, @@ -474,13 +461,13 @@ class TestDevices: ids=images_all_ids(), ) def test_should_change_device(self, resource_path: str) -> None: - _skip_if_device_not_available(_device_cuda) + skip_if_device_not_available(device_cuda) image = Image.from_file(resolve_resource_path(resource_path), torch.device("cpu")) new_device = torch.device("cuda", 0) assert image._set_device(new_device).device == new_device -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestConvertToGrayscale: @pytest.mark.parametrize( "resource_path", @@ -493,14 +480,14 @@ def test_convert_to_grayscale( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) grayscale_image = image.convert_to_grayscale() assert grayscale_image == snapshot_png_image _assert_width_height_channel(image, grayscale_image) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestCrop: @pytest.mark.parametrize( "resource_path", @@ -513,7 +500,7 @@ def test_should_return_cropped_image( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_cropped = image.crop(0, 0, 100, 100) assert image_cropped == snapshot_png_image @@ -536,7 +523,7 @@ def test_should_raise_invalid_size( new_height: int, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with pytest.raises( OutOfBoundsError, @@ -555,7 +542,7 @@ def test_should_raise_invalid_size( ids=["invalid x", "invalid y", "invalid x and y"], ) def test_should_raise_invalid_coordinates(self, resource_path: str, new_x: int, new_y: int, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with pytest.raises( OutOfBoundsError, @@ -580,7 +567,7 @@ def test_should_warn_if_coordinates_outsize_image( new_y: int, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_blank_tensor = torch.zeros((image.channel, 1, 1), device=device) with pytest.warns( @@ -591,7 +578,7 @@ def test_should_warn_if_coordinates_outsize_image( assert torch.all(torch.eq(cropped_image._image_tensor, image_blank_tensor)) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestFlipVertically: @pytest.mark.parametrize( "resource_path", @@ -604,7 +591,7 @@ def test_should_flip_vertically( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_flip_v = image.flip_vertically() assert image != image_flip_v @@ -617,13 +604,13 @@ def test_should_flip_vertically( ids=images_all_ids(), ) def test_should_be_original(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_flip_v_v = image.flip_vertically().flip_vertically() assert image == image_flip_v_v -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestFlipHorizontally: @pytest.mark.parametrize( "resource_path", @@ -636,7 +623,7 @@ def test_should_flip_horizontally( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_flip_h = image.flip_horizontally() assert image != image_flip_h @@ -649,13 +636,13 @@ def test_should_flip_horizontally( ids=images_all_ids(), ) def test_should_be_original(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_flip_h_h = image.flip_horizontally().flip_horizontally() assert image == image_flip_h_h -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestBrightness: @pytest.mark.parametrize("factor", [0.5, 10], ids=["small factor", "large factor"]) @pytest.mark.parametrize( @@ -670,7 +657,7 @@ def test_should_adjust_brightness( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_adjusted_brightness = image.adjust_brightness(factor) assert image != image_adjusted_brightness @@ -683,7 +670,7 @@ def test_should_adjust_brightness( ids=images_all_ids(), ) def test_should_not_brighten(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.warns( UserWarning, match="Brightness adjustment factor is 1.0, this will not make changes to the image.", @@ -698,13 +685,13 @@ def test_should_not_brighten(self, resource_path: str, device: Device) -> None: ids=images_all_ids(), ) def test_should_raise(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with pytest.raises(OutOfBoundsError, match=r"factor \(=-1\) is not inside \[0, \u221e\)."): image.adjust_brightness(-1) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestAddNoise: @pytest.mark.parametrize( "standard_deviation", @@ -727,7 +714,7 @@ def test_should_add_noise( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) torch.manual_seed(0) image = Image.from_file(resolve_resource_path(resource_path), device) image_noise = image.add_noise(standard_deviation) @@ -750,7 +737,7 @@ def test_should_raise_standard_deviation( standard_deviation: float, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with pytest.raises( OutOfBoundsError, @@ -759,7 +746,7 @@ def test_should_raise_standard_deviation( image.add_noise(standard_deviation) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestAdjustContrast: @pytest.mark.parametrize("factor", [0.75, 5], ids=["small factor", "large factor"]) @pytest.mark.parametrize( @@ -774,7 +761,7 @@ def test_should_adjust_contrast( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_adjusted_contrast = image.adjust_contrast(factor) assert image != image_adjusted_contrast @@ -787,7 +774,7 @@ def test_should_adjust_contrast( ids=images_all_ids(), ) def test_should_not_adjust_contrast(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.warns( UserWarning, match="Contrast adjustment factor is 1.0, this will not make changes to the image.", @@ -802,12 +789,12 @@ def test_should_not_adjust_contrast(self, resource_path: str, device: Device) -> ids=images_all_ids(), ) def test_should_raise_negative_contrast(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.raises(OutOfBoundsError, match=r"factor \(=-1.0\) is not inside \[0, \u221e\)."): Image.from_file(resolve_resource_path(resource_path), device).adjust_contrast(-1.0) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestAdjustColor: @pytest.mark.parametrize("factor", [2, 0.5, 0], ids=["add color", "remove color", "gray"]) @pytest.mark.parametrize( @@ -822,7 +809,7 @@ def test_should_adjust_colors( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_adjusted_color_balance = image.adjust_color_balance(factor) assert image != image_adjusted_color_balance @@ -834,7 +821,7 @@ def test_should_adjust_colors( ids=images_all_ids(), ) def test_should_not_adjust_colors_factor_1(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.warns( UserWarning, match="Color adjustment factor is 1.0, this will not make changes to the image.", @@ -849,7 +836,7 @@ def test_should_not_adjust_colors_factor_1(self, resource_path: str, device: Dev ids=[grayscale_png_id, grayscale_jpg_id], ) def test_should_not_adjust_colors_channel_1(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.warns( UserWarning, match="Color adjustment will not have an affect on grayscale images with only one channel", @@ -864,12 +851,12 @@ def test_should_not_adjust_colors_channel_1(self, resource_path: str, device: De ids=images_all_ids(), ) def test_should_raise_negative_color_adjust(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.raises(OutOfBoundsError, match=r"factor \(=-1.0\) is not inside \[0, \u221e\)."): Image.from_file(resolve_resource_path(resource_path), device).adjust_color_balance(-1.0) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestBlur: @pytest.mark.parametrize( "resource_path", @@ -882,7 +869,7 @@ def test_should_return_blurred_image( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device=device) image_blurred = image.blur(2) assert image_blurred == snapshot_png_image @@ -894,7 +881,7 @@ def test_should_return_blurred_image( ids=images_asymmetric_ids(), ) def test_should_not_blur_radius_0(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.warns( UserWarning, match="Blur radius is 0, this will not make changes to the image.", @@ -909,7 +896,7 @@ def test_should_not_blur_radius_0(self, resource_path: str, device: Device) -> N ids=images_asymmetric_ids(), ) def test_should_raise_blur_radius_out_of_bounds(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) with pytest.raises( OutOfBoundsError, @@ -923,7 +910,7 @@ def test_should_raise_blur_radius_out_of_bounds(self, resource_path: str, device image.blur(min(image.width, image.height)) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestSharpen: @pytest.mark.parametrize("factor", [0, 0.5, 10], ids=["zero factor", "small factor", "large factor"]) @pytest.mark.parametrize( @@ -938,7 +925,7 @@ def test_should_sharpen( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_sharpened = image.sharpen(factor) assert image != image_sharpened @@ -951,7 +938,7 @@ def test_should_sharpen( ids=images_all_ids(), ) def test_should_raise_negative_sharpen(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.raises(OutOfBoundsError, match=r"factor \(=-1.0\) is not inside \[0, \u221e\)."): Image.from_file(resolve_resource_path(resource_path), device).sharpen(-1.0) @@ -961,14 +948,14 @@ def test_should_raise_negative_sharpen(self, resource_path: str, device: Device) ids=images_all_ids(), ) def test_should_not_sharpen(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) with pytest.warns(UserWarning, match="Sharpen factor is 1.0, this will not make changes to the image."): image = Image.from_file(resolve_resource_path(resource_path), device) image_sharpened = image.sharpen(1) assert image == image_sharpened -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestInvertColors: @pytest.mark.parametrize( "resource_path", @@ -981,14 +968,14 @@ def test_should_invert_colors( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_inverted_colors = image.invert_colors() assert image_inverted_colors == snapshot_png_image _assert_width_height_channel(image, image_inverted_colors) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestRotate: @pytest.mark.parametrize( "resource_path", @@ -1001,7 +988,7 @@ def test_should_return_clockwise_rotated_image( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_right_rotated = image.rotate_right() assert image_right_rotated == snapshot_png_image @@ -1018,7 +1005,7 @@ def test_should_return_counter_clockwise_rotated_image( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_left_rotated = image.rotate_left() assert image_left_rotated == snapshot_png_image @@ -1030,7 +1017,7 @@ def test_should_return_counter_clockwise_rotated_image( ids=images_all_ids(), ) def test_should_return_flipped_image(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_left_rotated = image.rotate_left().rotate_left() image_right_rotated = image.rotate_right().rotate_right() @@ -1046,7 +1033,7 @@ def test_should_return_flipped_image(self, resource_path: str, device: Device) - ids=images_all_ids(), ) def test_should_be_original(self, resource_path: str, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) image_left_right_rotated = image.rotate_left().rotate_right() image_right_left_rotated = image.rotate_right().rotate_left() @@ -1058,7 +1045,7 @@ def test_should_be_original(self, resource_path: str, device: Device) -> None: assert image == image_left_r_r_r_r -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestFindEdges: @pytest.mark.parametrize( "resource_path", @@ -1071,14 +1058,14 @@ def test_should_return_edges_of_image( snapshot_png_image: SnapshotAssertion, device: Device, ) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device=device) image_edges = image.find_edges() assert image_edges == snapshot_png_image _assert_width_height_channel(image, image_edges) -@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids()) +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestSizeof: @pytest.mark.parametrize( "resource_path", @@ -1086,6 +1073,6 @@ class TestSizeof: ids=images_all_ids(), ) def test_should_size_be_greater_than_normal_object(self, resource_path: str | Path, device: Device) -> None: - _skip_if_device_not_available(device) + skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) assert sys.getsizeof(image) >= image.width * image.height * image.channel diff --git a/tests/safeds/data/image/typing/__init__.py b/tests/safeds/data/image/typing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/image/typing/test_image_size.py b/tests/safeds/data/image/typing/test_image_size.py new file mode 100644 index 000000000..273f21176 --- /dev/null +++ b/tests/safeds/data/image/typing/test_image_size.py @@ -0,0 +1,151 @@ +import sys +from typing import Any + +import pytest +from torch.types import Device + +from safeds.data.image.containers import Image +from safeds.data.image.typing import ImageSize +from safeds.exceptions import OutOfBoundsError +from tests.helpers import resolve_resource_path, images_all, images_all_ids, get_devices, get_devices_ids, \ + skip_if_device_not_available, plane_png_path + + +class TestFromImage: + + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) + @pytest.mark.parametrize( + "resource_path", + images_all(), + ids=images_all_ids() + ) + def test_should_create(self, resource_path: str, device: Device) -> None: + skip_if_device_not_available(device) + image = Image.from_file(resolve_resource_path(resource_path), device) + expected_image_size = ImageSize(image.width, image.height, image.channel) + assert ImageSize.from_image(image) == expected_image_size + + +class TestEq: + + @pytest.mark.parametrize( + ("image_size", "width", "height", "channel"), + [ + ( + ImageSize(1, 2, 3), 1, 2, 3 + ) + ] + ) + def test_should_be_equal(self, image_size: ImageSize, width: int, height: int, channel: int) -> None: + assert image_size == ImageSize(width, height, channel) + + @pytest.mark.parametrize( + ("image_size", "width", "height", "channel"), + [ + ( + ImageSize(1, 2, 3), 3, 2, 1 + ) + ] + ) + def test_should_not_be_equal(self, image_size: ImageSize, width: int, height: int, channel: int) -> None: + assert image_size != ImageSize(width, height, channel) + + @pytest.mark.parametrize( + ("image_size", "other"), + [ + (ImageSize(1, 2, 3), None), + (ImageSize(1, 2, 3), Image.from_file(resolve_resource_path(plane_png_path))), + ], + ids=["None", "Image"] + ) + def test_should_be_not_implemented(self, image_size: ImageSize, other: Any) -> None: + assert image_size.__eq__(other) is NotImplemented + + +class TestHash: + + @pytest.mark.parametrize( + "resource_path", + images_all(), + ids=images_all_ids(), + ) + def test_hash_should_be_equal(self, resource_path: str) -> None: + image = Image.from_file(resolve_resource_path(resource_path)) + image2 = Image.from_file(resolve_resource_path(resource_path)) + assert hash(ImageSize.from_image(image)) == hash(ImageSize.from_image(image2)) + + def test_hash_should_not_be_equal(self) -> None: + assert hash(ImageSize(1, 2, 3)) != hash(ImageSize(3, 2, 1)) + + +class TestSizeOf: + + @pytest.mark.parametrize( + "image_size", + [ImageSize(1, 2, 3)] + ) + def test_should_size_be_greater_than_normal_object(self, image_size: ImageSize) -> None: + assert sys.getsizeof(image_size) >= sys.getsizeof(0) * 3 + + +class TestProperties: + + @pytest.mark.parametrize( + "width", + list(range(1, 5)) + ) + @pytest.mark.parametrize( + "height", + list(range(1, 5)) + ) + @pytest.mark.parametrize( + "channel", + [1, 3, 4] + ) + def test_width_height_channel(self, width: int, height: int, channel: int): + image_size = ImageSize(width, height, channel) + assert image_size.width == width + assert image_size.height == height + assert image_size.channel == channel + + @pytest.mark.parametrize( + "channel", + [2, 5, 6] + ) + def test_should_ignore_invalid_channel(self, channel: int) -> None: + assert ImageSize(1, 1, channel, _ignore_invalid_channel=True).channel == channel + + +class TestErrors: + + @pytest.mark.parametrize( + "width", + [-1, 0] + ) + def test_should_raise_invalid_width(self, width: int) -> None: + with pytest.raises(OutOfBoundsError, match=rf"{width} is not inside \[1, \u221e\)."): + ImageSize(width, 1, 1) + + @pytest.mark.parametrize( + "height", + [-1, 0] + ) + def test_should_raise_invalid_height(self, height: int) -> None: + with pytest.raises(OutOfBoundsError, match=rf"{height} is not inside \[1, \u221e\)."): + ImageSize(1, height, 1) + + @pytest.mark.parametrize( + "channel", + [-1, 0, 2, 5] + ) + def test_should_raise_invalid_channel(self, channel: int) -> None: + with pytest.raises(ValueError, match=rf"Channel {channel} is not a valid channel option. Use either 1, 3 or 4"): + ImageSize(1, 1, channel) + + @pytest.mark.parametrize( + "channel", + [-1, 0] + ) + def test_should_raise_negative_channel_ignore_invalid_channel(self, channel: int) -> None: + with pytest.raises(OutOfBoundsError, match=rf"channel \(={channel}\) is not inside \[1, \u221e\)."): + ImageSize(1, 1, channel, _ignore_invalid_channel=True) From cf7bfa4169ebb6a6677314507eccc2a385ced763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 24 Apr 2024 15:07:34 +0200 Subject: [PATCH 05/42] test: added tests for `ImageList.sizes` test: added test for activation function in `ForwardLayer` test: added test for ImageToTable workflow of a cnn --- .../data/image/containers/test_image_list.py | 67 +++++++++++++++++++ tests/safeds/ml/nn/test_cnn_workflow.py | 46 +++++++++++++ tests/safeds/ml/nn/test_forward_layer.py | 17 +++++ 3 files changed, 130 insertions(+) create mode 100644 tests/safeds/ml/nn/test_cnn_workflow.py diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index 0761a65ea..6eec2a6a3 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -1,3 +1,4 @@ +import math import random import sys import tempfile @@ -5,6 +6,8 @@ import pytest import torch +from torch import Tensor + from safeds._config import _get_device from safeds.data.image.containers import Image, ImageList from safeds.data.image.containers._empty_image_list import _EmptyImageList @@ -151,6 +154,9 @@ def test_from_files(self, resource_path1: str, resource_path2: str, resource_pat # Test channel assert image_list.channel == expected_channel + # Test sizes + assert image_list.sizes == [image1.size, image2.size, image3.size] + # Test number_of_sizes assert image_list.number_of_sizes == len({(image.width, image.height) for image in [image1, image2, image3]}) @@ -448,7 +454,10 @@ class TestFromFiles: def test_from_files_creation(self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion) -> None: torch.set_default_device(torch.device("cpu")) image_list = ImageList.from_files(resolve_resource_path(resource_path)) + image_list_returned_filenames, filenames = ImageList.from_files(resolve_resource_path(resource_path), return_filenames=True) assert image_list == snapshot_png_image_list + assert image_list == image_list_returned_filenames + assert len(image_list) == len(filenames) @pytest.mark.parametrize( "resource_path", @@ -600,6 +609,7 @@ def test_should_save_images_in_directories_for_different_sizes(self, resource_pa assert set(image_list.widths) == set(image_list_loaded.widths) assert set(image_list.heights) == set(image_list_loaded.heights) assert image_list.channel == image_list_loaded.channel + assert set(image_list.sizes) == set(image_list_loaded.sizes) for tmp_dir in tmp_dirs: tmp_dir.cleanup() @@ -694,6 +704,7 @@ def test_should_save_images_in_directories_for_different_sizes(self, resource_pa assert set(image_list.widths) == set(image_list_loaded.widths) assert set(image_list.heights) == set(image_list_loaded.heights) assert image_list.channel == image_list_loaded.channel + assert set(image_list.sizes) == set(image_list_loaded.sizes) for tmp_dir in tmp_dirs: tmp_dir.cleanup() @@ -1203,6 +1214,59 @@ def test_should_not_adjust( assert image_list_original == image_list_clone +class TestSingleSizeImageList: + + @pytest.mark.parametrize( + "tensor", + [ + torch.ones(4, 3, 1, 1) + ] + ) + def test_create_from_tensor(self, tensor: Tensor) -> None: + image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) + assert image_list._tensor_positions_to_indices == list(range(tensor.size(0))) + assert len(image_list) == tensor.size(0) + assert image_list.widths[0] == tensor.size(3) + assert image_list.heights[0] == tensor.size(2) + assert image_list.channel == tensor.size(1) + + @pytest.mark.parametrize( + "tensor", + [ + torch.ones(4, 3, 1, 1, 1), + torch.ones(4, 3, 1) + ], + ids=["5-dim", "3-dim"] + ) + def test_should_raise_from_invalid_tensor(self, tensor: Tensor) -> None: + with pytest.raises(ValueError, match=rf"Invalid Tensor. This Tensor requires 4 dimensions but has {tensor.dim()}"): + _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) + + @pytest.mark.parametrize( + "tensor", + [ + torch.randn(16, 4, 4, 4) + ] + ) + def test_get_batch_and_iterate(self, tensor: Tensor) -> None: + image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) + batch_size = math.ceil(tensor.size(0) / 1.999) + assert image_list._get_batch(0, batch_size).size(0) == batch_size + assert torch.all(torch.eq(image_list._get_batch(0, 1), image_list._get_batch(0))) + assert torch.all(torch.eq(image_list._get_batch(0, batch_size), tensor[:batch_size].to(torch.float32) / 255)) + assert torch.all(torch.eq(image_list._get_batch(1, batch_size), tensor[batch_size:].to(torch.float32) / 255)) + iterate_image_list = iter(image_list) + assert iterate_image_list == image_list + assert iterate_image_list is not image_list + iterate_image_list._batch_size = batch_size + assert torch.all(torch.eq(image_list._get_batch(0, batch_size), next(iterate_image_list))) + assert torch.all(torch.eq(image_list._get_batch(1, batch_size), next(iterate_image_list))) + with pytest.raises(IndexOutOfBoundsError, match=rf"There is no element at index '{batch_size * 2}'."): + image_list._get_batch(2, batch_size) + with pytest.raises(StopIteration): + next(iterate_image_list) + + class TestEmptyImageList: def test_warn_empty_image_list(self) -> None: @@ -1264,6 +1328,9 @@ def test_heights(self) -> None: def test_channel(self) -> None: assert _EmptyImageList().channel is NotImplemented + def test_sizes(self) -> None: + assert _EmptyImageList().sizes == [] + def test_number_of_sizes(self) -> None: assert _EmptyImageList().number_of_sizes == 0 diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py new file mode 100644 index 000000000..129a5d1c7 --- /dev/null +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -0,0 +1,46 @@ +import re + +import pytest +import torch + +from safeds._config import _get_device +from safeds.data.image.containers import ImageList, ImageDataset +from safeds.data.tabular.containers import Table +from safeds.data.tabular.transformation import OneHotEncoder +from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ + FlattenLayer, ForwardLayer, OutputConversionImage +from tests.helpers import resolve_resource_path, images_all + + +class TestImageToTable: + + @pytest.mark.parametrize( + ("seed", "layer_3_bias"), + [ + (1234, [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185]), + (4711, [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957]), + ], + ids=["seed-1234", "seed-4711"] + ) + def test_should_train_model(self, seed: int, layer_3_bias: list[float]): + torch.manual_seed(seed) + torch.set_default_device(_get_device()) + + image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) + image_list = image_list.resize(20, 20) + image_classes = Table({"class": [re.search(r"(.*)\\(.*)\.", filename).group(2) for filename in filenames]}) + one_hot_encoder = OneHotEncoder().fit(image_classes, ["class"]) + image_classes_one_hot_encoded = one_hot_encoder.transform(image_classes) + image_dataset = ImageDataset(image_list, image_classes_one_hot_encoded) + + layers = [ + Convolutional2DLayer(1, 2), + MaxPooling2DLayer(10), + FlattenLayer(), + ForwardLayer(len(one_hot_encoder.get_names_of_added_columns())) + ] + nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, + OutputConversionImage(False)) + nn = nn_original.fit(image_dataset, epoch_size=2) + assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) + assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias diff --git a/tests/safeds/ml/nn/test_forward_layer.py b/tests/safeds/ml/nn/test_forward_layer.py index 40306cc67..63caadfbe 100644 --- a/tests/safeds/ml/nn/test_forward_layer.py +++ b/tests/safeds/ml/nn/test_forward_layer.py @@ -2,6 +2,8 @@ from typing import Any import pytest +from torch import nn + from safeds.exceptions import OutOfBoundsError from safeds.ml.nn import ForwardLayer @@ -33,6 +35,21 @@ def test_should_raise_if_input_size_doesnt_match(input_size: int) -> None: assert ForwardLayer(output_size=1, input_size=input_size).input_size == input_size +@pytest.mark.parametrize( + ("activation_function", "expected_activation_function"), + [ + ("sigmoid", nn.Sigmoid), + ("relu", nn.ReLU), + ("softmax", nn.Softmax), + ("none", None), + ], + ids=["sigmoid", "relu", "softmax", "none"], +) +def test_should_accept_activation_function(activation_function: str, expected_activation_function: type | None) -> None: + forward_layer = ForwardLayer(output_size=1, input_size=1)._get_internal_layer(activation_function=activation_function) + assert forward_layer._fn is None if expected_activation_function is None else isinstance(forward_layer._fn, expected_activation_function) + + @pytest.mark.parametrize( "activation_function", [ From a40ed0e05ea0e7959d5e3fce3b6cedc62202007c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 24 Apr 2024 18:32:19 +0200 Subject: [PATCH 06/42] feat: changed `ImageDataset` to have generic output type feat: added `OutputConversionImageToTable` and `OutputConversionImageToImage` feat: changed `OutputConversionImage` to be intern and abstract with generic `ImageDataset` --- .../data/image/containers/_image_dataset.py | 27 ++++++++-- src/safeds/ml/nn/__init__.py | 8 +-- src/safeds/ml/nn/_output_conversion_image.py | 53 +++++++++++-------- tests/safeds/ml/nn/test_cnn_workflow.py | 4 +- 4 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/safeds/data/image/containers/_image_dataset.py b/src/safeds/data/image/containers/_image_dataset.py index e97e11909..2f2411d51 100644 --- a/src/safeds/data/image/containers/_image_dataset.py +++ b/src/safeds/data/image/containers/_image_dataset.py @@ -1,7 +1,7 @@ from __future__ import annotations import copy -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypeVar, Generic from safeds._config import _get_device from safeds.data.image.containers import ImageList @@ -13,10 +13,12 @@ if TYPE_CHECKING: from torch import Tensor +T = TypeVar("T", Table, ImageList) -class ImageDataset: - def __init__(self, input_data: ImageList, output_data: ImageList | Table, batch_size=1, shuffle=False) -> None: +class ImageDataset(Generic[T]): + + def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle=False) -> None: import torch self._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data)))) @@ -71,6 +73,16 @@ def __len__(self) -> int: def input_size(self) -> ImageSize: return self._input_size + def get_input(self) -> ImageList: + return self._input + + def get_output(self) -> T: + output = self._output + if isinstance(output, _TableAsTensor): + return output._to_table() + else: + return output + def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch from torch import Tensor @@ -88,9 +100,9 @@ def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[ output_tensor = self._output._tensor[self._shuffle_tensor_indices[batch_size * batch_number:max_index]] return input_tensor, output_tensor - def shuffle(self) -> ImageDataset: + def shuffle(self) -> ImageDataset[T]: import torch - im_dataset: ImageDataset = copy.copy(self) + im_dataset: ImageDataset[T] = copy.copy(self) im_dataset._shuffle_tensor_indices = torch.randperm(len(self)) im_dataset._next_batch_index = 0 return im_dataset @@ -101,6 +113,7 @@ class _TableAsTensor: def __init__(self, table: Table) -> None: import torch + self._column_names = table.column_names self._tensor = torch.Tensor(table._data.to_numpy(copy=True)).to(_get_device()) if not torch.all(self._tensor.sum(dim=1) == torch.ones(self._tensor.size(dim=0))): @@ -112,3 +125,7 @@ def _from_tensor(tensor: Tensor) -> _TableAsTensor: table_as_tensor._tensor = tensor return table_as_tensor + def _to_table(self) -> Table: + table = Table(dict(zip(self._column_names, self._tensor.T.tolist()))) + return table + diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 1da37d10b..05b0a9e52 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -13,7 +13,7 @@ from ._input_conversion_table import InputConversionTable from ._pooling2d_layer import MaxPooling2DLayer from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor - from ._output_conversion_image import OutputConversionImage + from ._output_conversion_image import OutputConversionImageToImage, OutputConversionImageToTable from ._output_conversion_table import OutputConversionTable apipkg.initpkg( @@ -28,7 +28,8 @@ "MaxPooling2DLayer": "._pooling2d_layer:MaxPooling2DLayer", "NeuralNetworkClassifier": "._model:NeuralNetworkClassifier", "NeuralNetworkRegressor": "._model:NeuralNetworkRegressor", - "OutputConversionImage": "._output_conversion_image:OutputConversionImage", + "OutputConversionImageToImage": "._output_conversion_image:OutputConversionImageToImage", + "OutputConversionImageToTable": "._output_conversion_image:OutputConversionImageToTable", "OutputConversionTable": "._output_conversion_table:OutputConversionTable", "Pooling2DLayer": "._pooling2d_layer:Pooling2DLayer", }, @@ -44,6 +45,7 @@ "MaxPooling2DLayer", "NeuralNetworkClassifier", "NeuralNetworkRegressor", - "OutputConversionImage", + "OutputConversionImageToImage", + "OutputConversionImageToTable", "OutputConversionTable", ] diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index fcfe2f850..dac23c6ad 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,44 +1,53 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, TypeVar from safeds.data.image.containers import ImageDataset, ImageList from safeds.data.image.containers._image_dataset import _TableAsTensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList +from safeds.data.tabular.containers import Table if TYPE_CHECKING: from torch import Tensor, LongTensor from safeds.ml.nn._output_conversion import _OutputConversion +T = TypeVar("T", Table, ImageList) -class OutputConversionImage(_OutputConversion[ImageList, ImageDataset]): + +class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset[T]], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" - def __init__(self, output_is_image: bool) -> None: - """ - Define the output parameters for the neural network in the output conversion. + @abstractmethod + def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset[T]: + pass + - Parameters - ---------- - """ - self._output_is_image = output_is_image +class OutputConversionImageToTable(_OutputConversionImage[Table]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset: + def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset[Table]: from torch import LongTensor if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") - if self._output_is_image: - return ImageDataset(input_data, _SingleSizeImageList._create_from_tensor(output_data, list(range(output_data.size(dim=0))))) - else: - im_dataset = ImageDataset.__new__(ImageDataset) - im_dataset._output = _TableAsTensor._from_tensor(output_data) - im_dataset._shuffle_tensor_indices = LongTensor(list(range(len(input_data)))) - im_dataset._shuffle_after_epoch = False - im_dataset._batch_size = 1 - im_dataset._next_batch_index = 0 - im_dataset._input_size = input_data.sizes[0] - im_dataset._input = input_data - return im_dataset + im_dataset = ImageDataset.__new__(ImageDataset) + im_dataset._output = _TableAsTensor._from_tensor(output_data) + im_dataset._shuffle_tensor_indices = LongTensor(list(range(len(input_data)))) + im_dataset._shuffle_after_epoch = False + im_dataset._batch_size = 1 + im_dataset._next_batch_index = 0 + im_dataset._input_size = input_data.sizes[0] + im_dataset._input = input_data + return im_dataset + + +class OutputConversionImageToImage(_OutputConversionImage[ImageList]): + + def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset[ImageList]: + if not isinstance(input_data, _SingleSizeImageList): + raise ValueError("The given input ImageList contains images of different sizes.") + + return ImageDataset(input_data, _SingleSizeImageList._create_from_tensor(output_data, list( + range(output_data.size(dim=0))))) diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index 129a5d1c7..7fc9e10b1 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -8,7 +8,7 @@ from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ - FlattenLayer, ForwardLayer, OutputConversionImage + FlattenLayer, ForwardLayer, OutputConversionImageToTable from tests.helpers import resolve_resource_path, images_all @@ -40,7 +40,7 @@ def test_should_train_model(self, seed: int, layer_3_bias: list[float]): ForwardLayer(len(one_hot_encoder.get_names_of_added_columns())) ] nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, - OutputConversionImage(False)) + OutputConversionImageToTable()) nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias From 9137eac0531a1bf13d61fe353698cb60deb2a9fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 24 Apr 2024 18:35:59 +0200 Subject: [PATCH 07/42] test: corrected test in `ImageList` --- tests/safeds/data/image/containers/test_image_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index 6eec2a6a3..e74c7d89b 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -155,7 +155,7 @@ def test_from_files(self, resource_path1: str, resource_path2: str, resource_pat assert image_list.channel == expected_channel # Test sizes - assert image_list.sizes == [image1.size, image2.size, image3.size] + assert image_list.sizes == [image1_with_expected_channel.size, image2_with_expected_channel.size, image3_with_expected_channel.size] # Test number_of_sizes assert image_list.number_of_sizes == len({(image.width, image.height) for image in [image1, image2, image3]}) From ac452e4da797cca4efebf26ee76f7a644b56f1e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 24 Apr 2024 18:49:44 +0200 Subject: [PATCH 08/42] test: corrected cnn workflow test to be os independent --- tests/safeds/ml/nn/test_cnn_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index 7fc9e10b1..c51b36e6e 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -28,7 +28,7 @@ def test_should_train_model(self, seed: int, layer_3_bias: list[float]): image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) image_list = image_list.resize(20, 20) - image_classes = Table({"class": [re.search(r"(.*)\\(.*)\.", filename).group(2) for filename in filenames]}) + image_classes = Table({"class": [re.search(r"(.*)[\\/](.*)\.", filename).group(2) for filename in filenames]}) one_hot_encoder = OneHotEncoder().fit(image_classes, ["class"]) image_classes_one_hot_encoded = one_hot_encoder.transform(image_classes) image_dataset = ImageDataset(image_list, image_classes_one_hot_encoded) From c3bcc207cb64e6b046285338acdc7cc5217f0a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 24 Apr 2024 23:53:58 +0200 Subject: [PATCH 09/42] feat: added `ConvolutionalTranspose2DLayer` feat: added `OutputConversionImageToColumn` feat: added `ImageDataset` with `Column` as output feat: added kwargs to abstractmethod `_OutputConversion._data_conversion` test: added test for cnn workflow to `Image` and `Column` output types --- .../data/image/containers/_image_dataset.py | 59 ++++++++-- .../containers/_single_size_image_list.py | 4 +- src/safeds/ml/nn/__init__.py | 4 +- src/safeds/ml/nn/_convolutional2d_layer.py | 30 ++++- src/safeds/ml/nn/_input_conversion_image.py | 19 ++++ src/safeds/ml/nn/_model.py | 11 +- src/safeds/ml/nn/_output_conversion.py | 2 +- src/safeds/ml/nn/_output_conversion_image.py | 50 +++++++-- src/safeds/ml/nn/_output_conversion_table.py | 2 +- .../data/image/containers/test_image_list.py | 54 +++++++-- ...train_and_predict_model[seed-1234-cpu].png | Bin 0 -> 1543 bytes ...rain_and_predict_model[seed-1234-cuda].png | Bin 0 -> 2665 bytes ...train_and_predict_model[seed-4711-cpu].png | Bin 0 -> 1592 bytes ...rain_and_predict_model[seed-4711-cuda].png | Bin 0 -> 2246 bytes tests/safeds/ml/nn/test_cnn_workflow.py | 106 ++++++++++++++++-- 15 files changed, 295 insertions(+), 46 deletions(-) create mode 100644 tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cpu].png create mode 100644 tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cuda].png create mode 100644 tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cpu].png create mode 100644 tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cuda].png diff --git a/src/safeds/data/image/containers/_image_dataset.py b/src/safeds/data/image/containers/_image_dataset.py index 2f2411d51..5288368ac 100644 --- a/src/safeds/data/image/containers/_image_dataset.py +++ b/src/safeds/data/image/containers/_image_dataset.py @@ -3,17 +3,18 @@ import copy from typing import TYPE_CHECKING, TypeVar, Generic -from safeds._config import _get_device from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.image.typing import ImageSize -from safeds.data.tabular.containers import Table -from safeds.exceptions import NonNumericColumnError, OutputLengthMismatchError, IndexOutOfBoundsError +from safeds.data.tabular.containers import Table, Column +from safeds.data.tabular.transformation import OneHotEncoder +from safeds.exceptions import NonNumericColumnError, OutputLengthMismatchError, IndexOutOfBoundsError, \ + TransformerNotFittedError if TYPE_CHECKING: from torch import Tensor -T = TypeVar("T", Table, ImageList) +T = TypeVar("T", Column, Table, ImageList) class ImageDataset(Generic[T]): @@ -31,7 +32,7 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle= else: self._input_size = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input = input_data - if (isinstance(output_data, Table) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): + if ((isinstance(output_data, Table) or isinstance(output_data, Column)) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): raise OutputLengthMismatchError(f"{len(input_data)} != {output_data.number_of_rows if isinstance(output_data, Table) else len(output_data)}") if isinstance(output_data, Table): non_numerical_columns = [] @@ -46,8 +47,13 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle= if len(wrong_interval_columns) > 0: raise ValueError(f"Columns {wrong_interval_columns} have values outside of the interval [0, 1].") _output = _TableAsTensor(output_data) + self._output_size = output_data.number_of_columns + elif isinstance(output_data, Column): + _output = _ColumnAsTensor(output_data) + self._output_size = len(_output._one_hot_encoder.get_names_of_added_columns()) elif isinstance(output_data, _SingleSizeImageList): _output = output_data.clone()._as_single_size_image_list() + self._output_size = ImageSize(output_data.widths[0], output_data.heights[0], output_data.channel) else: raise ValueError("The given output ImageList contains images of different sizes.") self._output = _output @@ -73,6 +79,10 @@ def __len__(self) -> int: def input_size(self) -> ImageSize: return self._input_size + @property + def output_size(self) -> ImageSize | int: + return self._output_size + def get_input(self) -> ImageList: return self._input @@ -80,6 +90,8 @@ def get_output(self) -> T: output = self._output if isinstance(output, _TableAsTensor): return output._to_table() + elif isinstance(output, _ColumnAsTensor): + return output._to_column() else: return output @@ -114,18 +126,51 @@ def __init__(self, table: Table) -> None: import torch self._column_names = table.column_names - self._tensor = torch.Tensor(table._data.to_numpy(copy=True)).to(_get_device()) + self._tensor = torch.Tensor(table._data.to_numpy(copy=True)).to(torch.get_default_device()) if not torch.all(self._tensor.sum(dim=1) == torch.ones(self._tensor.size(dim=0))): raise ValueError("The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.") @staticmethod - def _from_tensor(tensor: Tensor) -> _TableAsTensor: + def _from_tensor(tensor: Tensor, column_names: list[str]) -> _TableAsTensor: + if tensor.dim() != 2: + raise ValueError(f"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got {tensor.dim()}.") + if tensor.size(dim=1) != len(column_names): + raise ValueError(f"Tensor and column_names have different amounts of classes ({tensor.size(dim=1)}!={column_names}.") table_as_tensor = _TableAsTensor.__new__(_TableAsTensor) table_as_tensor._tensor = tensor + table_as_tensor._column_names = column_names return table_as_tensor def _to_table(self) -> Table: table = Table(dict(zip(self._column_names, self._tensor.T.tolist()))) return table + +class _ColumnAsTensor: + + def __init__(self, column: Column) -> None: + import torch + + self._column_name = column.name + column_as_table = Table.from_columns([column]) + self._one_hot_encoder = OneHotEncoder().fit(column_as_table, [self._column_name]) + self._tensor = torch.Tensor(self._one_hot_encoder.transform(column_as_table)._data.to_numpy(copy=True)).to(torch.get_default_device()) + + @staticmethod + def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncoder) -> _ColumnAsTensor: + if tensor.dim() != 2: + raise ValueError(f"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got {tensor.dim()}.") + if not one_hot_encoder.is_fitted(): + raise TransformerNotFittedError() + if tensor.size(dim=1) != len(one_hot_encoder.get_names_of_added_columns()): + raise ValueError(f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={one_hot_encoder.get_names_of_added_columns()}.") + table_as_tensor = _ColumnAsTensor.__new__(_ColumnAsTensor) + table_as_tensor._tensor = tensor + table_as_tensor._column_name = column_name + table_as_tensor._one_hot_encoder = one_hot_encoder + return table_as_tensor + + def _to_column(self) -> Column: + table = Table(dict(zip(self._one_hot_encoder.get_names_of_added_columns(), self._tensor.T.tolist()))) + return self._one_hot_encoder.inverse_transform(table).get_column(self._column_name) diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index 802489b23..fea53d81a 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -101,8 +101,10 @@ def _create_image_list(images: list[Tensor], indices: list[int]) -> ImageList: @staticmethod def _create_from_tensor(images_tensor: Tensor, indices: list[int]) -> _SingleSizeImageList: + if images_tensor.dim() == 3: + images_tensor = images_tensor.unsqueeze(dim=1) if images_tensor.dim() != 4: - raise ValueError(f"Invalid Tensor. This Tensor requires 4 dimensions but has {images_tensor.dim()}") + raise ValueError(f"Invalid Tensor. This Tensor requires 3 or 4 dimensions but has {images_tensor.dim()}") image_list = _SingleSizeImageList() image_list._tensor = images_tensor.detach().clone() diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 05b0a9e52..e12d78681 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from ._pooling2d_layer import AvgPooling2DLayer - from ._convolutional2d_layer import Convolutional2DLayer + from ._convolutional2d_layer import Convolutional2DLayer, ConvolutionalTranspose2DLayer from ._flatten_layer import FlattenLayer from ._forward_layer import ForwardLayer from ._input_conversion_image import InputConversionImage @@ -21,6 +21,7 @@ { "AvgPooling2DLayer": "._pooling2d_layer:AvgPooling2DLayer", "Convolutional2DLayer": "._convolutional2d_layer:Convolutional2DLayer", + "ConvolutionalTranspose2DLayer": "._convolutional2d_layer:ConvolutionalTranspose2DLayer", "FlattenLayer": "._flatten_layer:FlattenLayer", "ForwardLayer": "._forward_layer:ForwardLayer", "InputConversionImage": "._input_conversion_image:InputConversionImage", @@ -38,6 +39,7 @@ __all__ = [ "AvgPooling2DLayer", "Convolutional2DLayer", + "ConvolutionalTranspose2DLayer", "FlattenLayer", "ForwardLayer", "InputConversionImage", diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index a824ebf09..ef2f66cff 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -11,13 +11,16 @@ from safeds.ml.nn._layer import _Layer -def _create_internal_model(input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int) -> nn.Module: +def _create_internal_model(input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int, transpose: bool, output_padding: int = 0) -> nn.Module: from torch import nn class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int): + def __init__(self, input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int, transpose: bool, output_padding: int): super().__init__() - self._layer = nn.Conv2d(in_channels=input_size, out_channels=output_size, kernel_size=kernel_size, padding=padding, stride=stride) + if transpose: + self._layer = nn.ConvTranspose2d(in_channels=input_size, out_channels=output_size, kernel_size=kernel_size, padding=padding, stride=stride, output_padding=output_padding) + else: + self._layer = nn.Conv2d(in_channels=input_size, out_channels=output_size, kernel_size=kernel_size, padding=padding, stride=stride) match activation_function: case "sigmoid": self._fn = nn.Sigmoid() @@ -31,7 +34,7 @@ def __init__(self, input_size: int, output_size: int, kernel_size: int, activati def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) - return _InternalLayer(input_size, output_size, kernel_size, activation_function, padding, stride) + return _InternalLayer(input_size, output_size, kernel_size, activation_function, padding, stride, transpose, output_padding) class Convolutional2DLayer(_Layer): @@ -45,7 +48,7 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa self._padding = padding def _get_internal_layer(self, *, activation_function: str) -> nn.Module: - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride) + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, False) @property def input_size(self) -> ImageSize: @@ -76,3 +79,20 @@ def _set_input_size(self, input_size: ImageSize) -> None: new_width = math.ceil((input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) new_height = math.ceil((input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) + + +class ConvolutionalTranspose2DLayer(Convolutional2DLayer): + + def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0, output_padding: int = 0): + super().__init__(output_channel, kernel_size, stride=stride, padding=padding) + self._output_padding = output_padding + + def _get_internal_layer(self, *, activation_function: str) -> nn.Module: + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, True, self._output_padding) + + def _set_input_size(self, input_size: ImageSize) -> None: + self._input_size = input_size + new_width = (input_size.width - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding + new_height = (input_size.height - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding + self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) + diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 62a4347e3..291c44893 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,8 +1,10 @@ from __future__ import annotations from safeds.data.image.containers import ImageDataset, ImageList +from safeds.data.image.containers._image_dataset import _ColumnAsTensor, _TableAsTensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.image.typing import ImageSize +from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn._input_conversion import _InputConversion @@ -18,6 +20,9 @@ def __init__(self, image_size: ImageSize) -> None: ---------- """ self._image_size = image_size + self._one_hot_encoder: OneHotEncoder | None = None + self._column_name: str | None = None + self._column_names: list[str] | None = None @property def _data_size(self) -> ImageSize: @@ -30,6 +35,20 @@ def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> Im return input_data def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: + if isinstance(input_data._output, _ColumnAsTensor): + if self._one_hot_encoder is None: + self._one_hot_encoder = input_data._output._one_hot_encoder + elif self._one_hot_encoder != input_data._output._one_hot_encoder: + return False + if self._column_name is None: + self._column_name = input_data._output._column_name + elif self._column_name != input_data._output._column_name: + return False + if isinstance(input_data._output, _TableAsTensor): + if self._column_names is None: + self._column_names = input_data._output._column_names + elif self._column_names != input_data._output._column_names: + return False return input_data.input_size == self._image_size def _is_predict_data_valid(self, input_data: ImageList) -> bool: diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index c698872b5..f03fcdf0f 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -12,7 +12,8 @@ ModelNotFittedError, OutOfBoundsError, ) -from safeds.ml.nn import InputConversionImage, FlattenLayer +from safeds.ml.nn import InputConversionImage, FlattenLayer, OutputConversionImageToTable +from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer if TYPE_CHECKING: @@ -168,6 +169,10 @@ def predict(self, test_data: IPT) -> OT: for x in dataloader: elem = self._model(x) predictions.append(elem.squeeze(dim=1)) + if isinstance(self._output_conversion, OutputConversionImageToTable) and isinstance(self._input_conversion, InputConversionImage): + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_names=self._input_conversion._column_names) + if isinstance(self._output_conversion, OutputConversionImageToColumn) and isinstance(self._input_conversion, InputConversionImage): + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_name=self._input_conversion._column_name, one_hot_encoder=self._input_conversion._one_hot_encoder) return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property @@ -333,6 +338,10 @@ def predict(self, test_data: IPT) -> OT: predictions.append(torch.argmax(elem, dim=1)) else: predictions.append(elem.squeeze(dim=1).round()) + if isinstance(self._output_conversion, OutputConversionImageToTable) and isinstance(self._input_conversion, InputConversionImage): + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_names=self._input_conversion._column_names) + if isinstance(self._output_conversion, OutputConversionImageToColumn) and isinstance(self._input_conversion, InputConversionImage): + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_name=self._input_conversion._column_name, one_hot_encoder=self._input_conversion._one_hot_encoder) return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 17034b652..55587fe05 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -16,5 +16,5 @@ class _OutputConversion(Generic[IT, OT], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: IT, output_data: Tensor) -> OT: + def _data_conversion(self, input_data: IT, output_data: Tensor, **kwargs) -> OT: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index dac23c6ad..1d78506fd 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -4,37 +4,65 @@ from typing import TYPE_CHECKING, TypeVar from safeds.data.image.containers import ImageDataset, ImageList -from safeds.data.image.containers._image_dataset import _TableAsTensor +from safeds.data.image.containers._image_dataset import _TableAsTensor, _ColumnAsTensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList -from safeds.data.tabular.containers import Table +from safeds.data.tabular.containers import Table, Column +from safeds.data.tabular.transformation import OneHotEncoder if TYPE_CHECKING: from torch import Tensor, LongTensor from safeds.ml.nn._output_conversion import _OutputConversion -T = TypeVar("T", Table, ImageList) +T = TypeVar("T", Column, Table, ImageList) class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset[T]], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset[T]: + def _data_conversion(self, **kwargs) -> ImageDataset[T]: pass +class OutputConversionImageToColumn(_OutputConversionImage[Column]): + + def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column_name: str, one_hot_encoder: OneHotEncoder) -> ImageDataset[Column]: + import torch + + if not isinstance(input_data, _SingleSizeImageList): + raise ValueError("The given input ImageList contains images of different sizes.") + + print(output_data) + + output = torch.zeros(len(input_data), len(one_hot_encoder.get_names_of_added_columns())) + output[torch.arange(len(input_data)), output_data] = 1 + + im_dataset: ImageDataset[Column] = ImageDataset[Column].__new__(ImageDataset) + im_dataset._output = _ColumnAsTensor._from_tensor(output, column_name, one_hot_encoder) + im_dataset._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data)))) + im_dataset._shuffle_after_epoch = False + im_dataset._batch_size = 1 + im_dataset._next_batch_index = 0 + im_dataset._input_size = input_data.sizes[0] + im_dataset._input = input_data + return im_dataset + + class OutputConversionImageToTable(_OutputConversionImage[Table]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset[Table]: - from torch import LongTensor + def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column_names: list[str]) -> ImageDataset[Table]: + import torch if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") - im_dataset = ImageDataset.__new__(ImageDataset) - im_dataset._output = _TableAsTensor._from_tensor(output_data) - im_dataset._shuffle_tensor_indices = LongTensor(list(range(len(input_data)))) + output = torch.zeros(len(input_data), len(column_names)) + output[torch.arange(len(input_data)), output_data] = 1 + + im_dataset: ImageDataset[Table] = ImageDataset[Table].__new__(ImageDataset) + im_dataset._output = _TableAsTensor._from_tensor(output, column_names) + im_dataset._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data)))) im_dataset._shuffle_after_epoch = False im_dataset._batch_size = 1 im_dataset._next_batch_index = 0 @@ -46,8 +74,10 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageD class OutputConversionImageToImage(_OutputConversionImage[ImageList]): def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset[ImageList]: + import torch + if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") - return ImageDataset(input_data, _SingleSizeImageList._create_from_tensor(output_data, list( + return ImageDataset[ImageList](input_data, _SingleSizeImageList._create_from_tensor((output_data * 255).to(torch.uint8), list( range(output_data.size(dim=0))))) diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index 1b56988e1..a23580276 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -23,7 +23,7 @@ def __init__(self, prediction_name: str = "prediction") -> None: """ self._prediction_name = prediction_name - def _data_conversion(self, input_data: Table, output_data: Tensor) -> TaggedTable: + def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs) -> TaggedTable: return input_data.add_column(Column(self._prediction_name, output_data.tolist())).tag_columns( self._prediction_name, ) diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index e74c7d89b..709d34e1f 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -1219,10 +1219,25 @@ class TestSingleSizeImageList: @pytest.mark.parametrize( "tensor", [ - torch.ones(4, 3, 1, 1) + torch.ones(4, 1, 1), ] ) - def test_create_from_tensor(self, tensor: Tensor) -> None: + def test_create_from_tensor_3_dim(self, tensor: Tensor) -> None: + expected_tensor = tensor.unsqueeze(dim=1) + image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) + assert image_list._tensor_positions_to_indices == list(range(tensor.size(0))) + assert len(image_list) == expected_tensor.size(0) + assert image_list.widths[0] == expected_tensor.size(3) + assert image_list.heights[0] == expected_tensor.size(2) + assert image_list.channel == expected_tensor.size(1) + + @pytest.mark.parametrize( + "tensor", + [ + torch.ones(4, 3, 1, 1), + ] + ) + def test_create_from_tensor_4_dim(self, tensor: Tensor) -> None: image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) assert image_list._tensor_positions_to_indices == list(range(tensor.size(0))) assert len(image_list) == tensor.size(0) @@ -1234,21 +1249,46 @@ def test_create_from_tensor(self, tensor: Tensor) -> None: "tensor", [ torch.ones(4, 3, 1, 1, 1), - torch.ones(4, 3, 1) + torch.ones(4, 3) ], - ids=["5-dim", "3-dim"] + ids=["5-dim", "2-dim"] ) def test_should_raise_from_invalid_tensor(self, tensor: Tensor) -> None: - with pytest.raises(ValueError, match=rf"Invalid Tensor. This Tensor requires 4 dimensions but has {tensor.dim()}"): + with pytest.raises(ValueError, match=rf"Invalid Tensor. This Tensor requires 3 or 4 dimensions but has {tensor.dim()}"): _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) @pytest.mark.parametrize( "tensor", [ - torch.randn(16, 4, 4, 4) + torch.randn(16, 4, 4), + ] + ) + def test_get_batch_and_iterate_3_dim(self, tensor: Tensor) -> None: + expected_tensor = tensor.unsqueeze(dim=1) + image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) + batch_size = math.ceil(expected_tensor.size(0) / 1.999) + assert image_list._get_batch(0, batch_size).size(0) == batch_size + assert torch.all(torch.eq(image_list._get_batch(0, 1), image_list._get_batch(0))) + assert torch.all(torch.eq(image_list._get_batch(0, batch_size), expected_tensor[:batch_size].to(torch.float32) / 255)) + assert torch.all(torch.eq(image_list._get_batch(1, batch_size), expected_tensor[batch_size:].to(torch.float32) / 255)) + iterate_image_list = iter(image_list) + assert iterate_image_list == image_list + assert iterate_image_list is not image_list + iterate_image_list._batch_size = batch_size + assert torch.all(torch.eq(image_list._get_batch(0, batch_size), next(iterate_image_list))) + assert torch.all(torch.eq(image_list._get_batch(1, batch_size), next(iterate_image_list))) + with pytest.raises(IndexOutOfBoundsError, match=rf"There is no element at index '{batch_size * 2}'."): + image_list._get_batch(2, batch_size) + with pytest.raises(StopIteration): + next(iterate_image_list) + + @pytest.mark.parametrize( + "tensor", + [ + torch.randn(16, 4, 4, 4), ] ) - def test_get_batch_and_iterate(self, tensor: Tensor) -> None: + def test_get_batch_and_iterate_4_dim(self, tensor: Tensor) -> None: image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) batch_size = math.ceil(tensor.size(0) / 1.999) assert image_list._get_batch(0, batch_size).size(0) == batch_size diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cpu].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cpu].png new file mode 100644 index 0000000000000000000000000000000000000000..c931271a1c826ba1ee7cf9b86d1c6de81a8b4624 GIT binary patch literal 1543 zcmV+i2Kf1jP)yq0x423_^)+U?FYczfT2WtBSopidHOtO~PA0R{sB4v4HPgYx>8QZ#0M(4h=> z+_8jUwtnX$Um1v-7$X7bc;c^{@sUm^Ckk(7&;J09H<*VO^&B*P$@rca~L}V zW+zL9YTH`r+VCl3J~oX}<4v6Bm8r7M`#`7|r1W|k_Y)B~&dMKwp>2?sTdEtklB?3o z1Chfl2-bL8=R7aKFyj>y1~-Y8a}XF1@6giGoUPS_^&WC?M0R^Xk{&64AU5WNS`5(! zC--}vSGuX^L|?s|MH)M6Zup*%&jaT@#2>F2Wt!~BUaj#h-g))c@kS)!t9w@o4K8*A(91c!!*t$C72bJ4Sl?+{PYB`@N@2Y#EA| z9VZm+ieZi?SH@n~8l^Q}D#u&NU#v`+C*XTFObs1nNcOor;L0bCmbc|0wFV4Kd%9<8 ztOM34m+{&%o;RA$v?+3JErzIf`6;~>-E>&Z4B{_PLZ3w0EYccDB6*ncw0M<6k9k9s zd)9fc4sVbiiR_x#w1$s;*3fd&E7Bjn;!C{BsXaO{?En^duMTSvpDn6OdY4vrv<I9{LG`s+j9>JblXmp7S6 zG01_Ao=@hO4@65Td7A8BJ-pB((Q!SG_GbVF-@Ar6r$A34(&Y$Yhf7i5BauYMH{rD+^NA<^MFQX!JWPV$ zt#}!EAM?Hh&uLzy&oeg10r+mp9ql85HAq613Le>pcSL@SRlgOlH0FKgO_V2Nn-}SH zZP#wBkwhbjJS*?E#iL(|w#3kywdATW89uXg?-B{|wLyFOL`m|_cd#^G<5|ctWHjP_ z4U)!8TKm_>i5k>{x^X{outR3Q2B}L4YjGT}I1-jL)xDu63#>sApNsA~67Cu2yh372 z7>8Ce$26c$bojk{2%I-cpPSvf@<1vo+MW#(k^H9ieh7#soDeNF(9%*89pNp^GCEs? zMjO(ojyEvvO0)X$flr)% zO?&Kfjtr+ak*VrZr>I>vTU|;etdbwcV{am;4&$?ojUj7PdApR7@&fuBn0BS<-Xqo| zYWgd)kS=~?c-6gQyWC$^Rd2}~30WZ?1R%o@c82 z^?4&51D~<0i28<6F@`R$PFEYnBx*Ex-mK=`@xOsp-K~nP%Zm1$n-{6rdxH(C{Vi1N zy}?hrS68w32Kz2&4VzW$y}`c@ajA-}56(>$dvEXyRhO#RTCS;L?+xBVEY|jNU_r&+ z8+=fGiHe;jTTaE^8)|PMwqOJNga51h78P5Dnp3g&20P0vsn{agCVG}t>>*#X t=AUq{64oGX65oT!Bfz*@%`8QtrV_-@+`&z`;p6kJO6k|-oJdyUk@X%A~t!$3#y<5 z7(@gLEpE8Ozz-vr|El~GARl*p!08QRT;6`5u8%cC zx+5a7uayK;RSZ#C$0WG&ci6;o42W*n+YPDSvU|>j=Qr&5VLqwdu;&GMb;&+!B>|8?E2mWbmq>W4+ld!PE|Hcn35{?dsf zHV%JjoHkObcX&~82g5#Vh*BESRd_oPRoG4Y=MTLyLJg`jaSc|SeSzYudkwD`G% zken#do?J2*5{@7;GRH?p0pab&R?qtpUJ#v^iPvxd&zP3H0R$1hL16Ned(~S7D2pN? zFBpzx;Z+dMu-kDjusQ`J;SW>YDKA^Rw&K0uPLQg%pvkB$WW7HH5&n=q)OM%V+6aM( zU~2zcSJSc8WsaW!Ss)KWl_+2#%-axVwujLuu_rvR3?U&C{xiI$*4^rhF$OwwHA_gn1M8a z1-9;XITPceqt9JtPN;4JA^bqn6CxhcEH z32*fsb#yLzJ@<$xJy9>=wI%dpfR5KaH|Z9Xo*-R>2?Ih7veqrU z5}vTjgi6BVg0YTJR!`tK^a#bY)p)UfE^Cr9MC+atZ%tUf;84caqE2o)QP~mNufs7v z2u1r|65$Ts6zlr18g>ltNm3OYFS_NAga%QHV-bKO1&$#FQ4IGzmonUh(txD|)$m0Y zQEwS@j0_FpP=f3Z9;jbfQsBkF_gqT28A=0E1}%8ICbyg(2?*ehQUY^^%S^3$s10n7 zYNk90NW}4D5^h0MM+t_Uj(yBSt;?hYWS zYLP|>C8!QLN()YbOnaJUI@JqNALq3~Sn6FSH{KfLsHeQz;I)Jwvf=9%qD%|9 zejfoa5pI|@wTL@G;Nd8|52waaaOosrblw6~qBgD4^a{8q%xZi3?(L$ZbZyZ4ywsKQ zZ!o!;TS&s);NVy`l*FinDBu|eh}zU-?1X!U=zUtWAu|9=L}AgdC*J71Fe*3RNGPj` zx)Wae2;P?th#BMy!VaiZqyrX)thInrDm%c#@dIQkV9>{TGtts&Q=92Y-qd+BB|eTf z*1RttkODpe1rsO@q!7k{1G`A16oka!)&)#3TykZ~-c9F1; zc?e%Q9ObSmeVYZwS z3++E8xt0?E@X9cLh4bb~-X@$rmNy>q-rw@|zO z_Y8IqJSOnI;0sQ0>`#CG6Mr2GuXv_A?gw;t;OalY-(QmV``69Q4=4fZYFY@7-W8*u zD%upc3%k@Tgn0yCx%tN&lD?yaaru73#r_n;2QnjR>Zvcqt8W;yvD@D@{o)q{l`Dy{ z8YW@Go`}p{t?ZIhBZx8}oSmS$kPhtpCyoxzEX-bU&<(k~d+1_+3Q~~xVICe|iq~GT zj~lGsas@aySRJ{ca`5K#4e^#*cZ_Mrg;(12^a^?9lG+^j{38QD&gXev4d!=*79(4` zQ}B2=y>IPwws*#5iR_1JeNmwT!V0$`_Qh_eldHxr1p19sfs_Muje@S_Nt zYeJ|O!8Xef=LR299jS-IVu} zTvOk_=cY6(VY4gcJsvTyOgP_w6k`7Y-M?RWwY@_KFfryE%oCS8@uAqZm!dmt{O@Ow zLoDHTLCqse_2JWYfJm5+5F@ERB0*FAOoW-#Qe*d4Vp5*LLVj zp(-Vsja7)MUM*c3P0Q~Hso-}Gsn3G5Z4CR5y7riQ2`T`^|*YZ~_#49RyL>(Eck<4-3Q=@zaIwTy` zig{(4?l#e$o=~bi@5qQW;A)k~sVE054Wdf;NG#W2{1QlnrMYj~r)H5NPdJNh)p_r4d42hc zpT4=_DjeZNUqMg-lM#^^Z+3(##f;*$l|Lj3i7~(;i1HPMx6iq{FZko&m|dUgV%u@9 zuyV1_4YSF-?P6ccI87rQTL zI8U6cQyqWK#a=@w#fFQWsngux+%UJ0gocMMw#Vs;u@kr>wp{FI6)k^X90TGNdD(Qa zAI0(Ajwf8~bHf~vwt(*ETx?A)_G68}*HH4RQx(#EyQXemDD}YG~^qL zW3v(B9;z_}rAuK#s3+OfOab%4hN^^oGRcv6)eNtl9795PF7KTxn>;E6$*EndK^Kdc zUuRM1D%lAftz$KB-mgkt$Ym@E1^AGTNWE7RdJ3n6IV_Ml4l39ForNlC9PBM4|nB9YTiOQA2-)6nR`bW#$pvBv6?P=aAgeFa+ zjGV&fZ_gs6$I^>i(oiJZ$rhr3Y`=OOcf1{`IQQXQ2F0MN3Ed$d8#Klbt|>-ju%{~N zs+AqDicHt>BCc!mE`zMg81n2xgYSPTRzwRSq;RtEK7o1_Z-5+(OWq|Q1R2?LVBeUS zQqjWq>3-O80E`&Wm*hRd#M}`iPtQnACZRcHQ?C$Y?@4xoqy3Gh2F* z5-NH+PNX%=z@9A!J^0aC464r@>Dk`F9X}QB_%gi7&9`)%eI)o1myr=g$0^2xi=I<9 z^C&hPU+=oV734Xsij!*_Hv8m$;_5e2UGziVGBnD+xpPZ^j<# zyC65>$TY%gy;`Tg2SJ=G-^IF&T#}=-8p2;`HLX~X)|fHBw4XXFWn8f!4$~62YIY=J zVeYzsRSV)eNxP~c1vBW`O8ON~T=lJ35QD;?T%J}zv7yI$w`F>4@owCE5X4orn>gzv z=rI`op19@YTl&I2Sao7HH_?i>eL!$ka}T)ndDVjCa$J*zq)3?>WLMl}Tx$jkvdlnR zZ){u^>xSZxwUjTfRxF6iQp0;iJ831#J8U!-3$flqrYhgq=C8n}Vg1|wu5$MSRy~H~ zRVl@&M~ScnBS*foAamUhj2teczn7q#MJLT7hCy^M2o%buwpf+#*o{p(oWDW|G&GpjLdbPcd-Xv q;$p82h3kGd7rQ6A;}1OWp71|nhg{vB+*`^30000P82-= literal 0 HcmV?d00001 diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cuda].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cuda].png new file mode 100644 index 0000000000000000000000000000000000000000..703799cbba8b1d90a4b2c618e0ae6acab561a718 GIT binary patch literal 2246 zcmV;%2s!tOP)G5&bJp={K>)YDTJXiUI3kF!vjqRjwy3zrvs|N6(oTjdMqS8QLx`xju3si0Z-z3I67`J4~G zly`}H%KP$$-52oh^O@oiqKDXc;=LytFm?L4sX>~C`;X^*%;&v9Heh$K+Z4Y&qVoMi zT4+ri8vjJz{&31$gW1Bin&FKg5xjw>25-SuP&ugV)F99p8F))FW57&rC?WbK{b9PN ztpBtJPRM%_^ajzV8#C|PBI6G>l^qHUjR6^sAL5M#=NvD?H7R*dg{g;FyI*k;FactO zXy_|#gw$YdR5U<wU%5RfCRyYf^6+ z(FO!13+no&zh#6&yjG3&Xx^9_T*jU9n)t*K^}Kw}sJuY1)$rTML)5jMo+CwN%wQCg zAF6Ke1v*LBtbj=I$YXdlzC4OILTsR$8RF@NG=`qH)muVRCCYEJa4XWZ;+BVLbN%{EO;dulW9zt zYiu5dVaA)hA;3%wA>H6RZFnY(gI9=VfI_N_ajCIVrLwNDXIaS@z_t>RuOl!8!)bdH(&9$tmgMI2q{(=&SVGRSjKE8aK%xn+j?N_8yv2&41L^ zz;6t$g6$6K>>;%tPYdQbT|W{bbfo#Zg)YoZyK93>VpgR+?HtrBFZ4#oT`DLh9S2+e zA9#ob_ZjRyPi^S4<_8u#qRO7+vVNQpuG=6M+!i(rV$+g(28^_aoXJWDvD5_CY!D};=$iT6K4#fa%5B;=4=C-C3XHI+=no5tkxhQ1;pSN zbY=%ss`xqB>;?}R8K!~ytRC#JW`ji0eO*(;gaVE<^HdS)u#ltpbmRJiDyioyH}t$n zQsaW`8z&V=pBPfG4H4oUG~NtewpxQ&Fcp|B{brZ(r15h=XB0{MyuO82Aa>BX?GSKR zYY>-~`}HQ}B6P);$r2*aCL{PR?L$SM-aA?UkkM@kTcsEw5pb!2S+E|V$WZA(8t_3q zBov$Izrs5?r-7!I4&AnpVQ=S9c~MciBfW0n14Kl`D(*y=t5Jy25>3_b_L&MSEmp!XWUy(A)})r3*5XB;RrhBci{6co zQLzKGjjGtu5cKsxSA|~1wyr`-6?<*yW5}sg?6sj(-5;f5bEJyBHk66kRK?ybsn{hR zQnA;D-XIGqc7DHWv4L5|)|X%QRP42(H^_{NZF6>a;Qj|y?6qNJkYiQsjJ(P$7gg;< z#adu=E^L|Rpm<_cyNJ0>l2yIb&(sfxWelm>~YV6sp)=Ok5xYDvWoN6b#Y zWsKfrcAAvwd455EQN@P-9}+(QcjXrDOBH)EBwx;c&O>3FpbtkPCb|lWRdrY`sx1?qfGS_D)!n?zU%&Gq+&yi=2^vl#xutF4_tyM UlFC749RL6T07*qoM6N<$g7gSd=>Px# literal 0 HcmV?d00001 diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index c51b36e6e..fddc24d08 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -2,29 +2,34 @@ import pytest import torch +from syrupy import SnapshotAssertion +from torch.types import Device -from safeds._config import _get_device from safeds.data.image.containers import ImageList, ImageDataset -from safeds.data.tabular.containers import Table +from safeds.data.tabular.containers import Table, Column from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ - FlattenLayer, ForwardLayer, OutputConversionImageToTable -from tests.helpers import resolve_resource_path, images_all + FlattenLayer, ForwardLayer, OutputConversionImageToTable, ConvolutionalTranspose2DLayer, NeuralNetworkRegressor +from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn, OutputConversionImageToImage +from tests.helpers import resolve_resource_path, images_all, device_cuda, device_cpu, skip_if_device_not_available class TestImageToTable: @pytest.mark.parametrize( - ("seed", "layer_3_bias"), + ("seed", "device", "layer_3_bias", "prediction_label"), [ - (1234, [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185]), - (4711, [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957]), + (1234, device_cuda, [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185], ["grayscale"] * 7), + (4711, device_cuda, [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957], ["white_square"] * 7), + (1234, device_cpu, [-0.6926110982894897, 0.33004942536354065, -0.32962560653686523, 0.5768553614616394], ["grayscale"] * 7), + (4711, device_cpu, [-0.9051575660705566, -0.8625037670135498, 0.24682046473026276, -0.2612163722515106], ["white_square"] * 7), ], - ids=["seed-1234", "seed-4711"] + ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] ) - def test_should_train_model(self, seed: int, layer_3_bias: list[float]): + def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device): + skip_if_device_not_available(device) + torch.set_default_device(device) torch.manual_seed(seed) - torch.set_default_device(_get_device()) image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) image_list = image_list.resize(20, 20) @@ -32,15 +37,92 @@ def test_should_train_model(self, seed: int, layer_3_bias: list[float]): one_hot_encoder = OneHotEncoder().fit(image_classes, ["class"]) image_classes_one_hot_encoded = one_hot_encoder.transform(image_classes) image_dataset = ImageDataset(image_list, image_classes_one_hot_encoded) - layers = [ Convolutional2DLayer(1, 2), MaxPooling2DLayer(10), FlattenLayer(), - ForwardLayer(len(one_hot_encoder.get_names_of_added_columns())) + ForwardLayer(image_dataset.output_size) ] nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToTable()) nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias + prediction = nn.predict(image_dataset.get_input()) + assert one_hot_encoder.inverse_transform(prediction.get_output()) == Table({"class": prediction_label}) + + +class TestImageToColumn: + + @pytest.mark.parametrize( + ("seed", "device", "layer_3_bias", "prediction_label"), + [ + (1234, device_cuda, [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185], ["grayscale"] * 7), + (4711, device_cuda, [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957], ["white_square"] * 7), + (1234, device_cpu, [-0.6926110982894897, 0.33004942536354065, -0.32962560653686523, 0.5768553614616394], ["grayscale"] * 7), + (4711, device_cpu, [-0.9051575660705566, -0.8625037670135498, 0.24682046473026276, -0.2612163722515106], ["white_square"] * 7), + ], + ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] + ) + def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device): + skip_if_device_not_available(device) + torch.set_default_device(device) + torch.manual_seed(seed) + + image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) + image_list = image_list.resize(20, 20) + image_classes = Column("class", [re.search(r"(.*)\\(.*)\.", filename).group(2) for filename in filenames]) + image_dataset = ImageDataset(image_list, image_classes) + print(image_dataset._output._tensor) + print(image_dataset._output._tensor.size()) + + layers = [ + Convolutional2DLayer(1, 2), + MaxPooling2DLayer(10), + FlattenLayer(), + ForwardLayer(image_dataset.output_size) + ] + nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, + OutputConversionImageToColumn()) + nn = nn_original.fit(image_dataset, epoch_size=2) + assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) + assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias + prediction = nn.predict(image_dataset.get_input()) + assert prediction.get_output() == Column("class", prediction_label) + + +class TestImageToImage: + + @pytest.mark.parametrize( + ("seed", "device", "layer_3_bias"), + [ + (1234, device_cuda, [0.13570494949817657, 0.02420804090797901, -0.1311846673488617, 0.22676928341388702]), + (4711, device_cuda, [0.11234158277511597, 0.13972002267837524, -0.07925988733768463, 0.07342307269573212]), + (1234, device_cpu, [-0.1637762188911438, 0.02012808807194233, -0.22295698523521423, 0.1689515858888626]), + (4711, device_cpu, [-0.030541712418198586, -0.15364733338356018, 0.1741572618484497, 0.015837203711271286]), + ], + ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] + ) + def test_should_train_and_predict_model(self, seed: int, snapshot_png_image_list: SnapshotAssertion, layer_3_bias: list[float], device: Device): + skip_if_device_not_available(device) + torch.set_default_device(device) + torch.manual_seed(seed) + + image_list = ImageList.from_files(resolve_resource_path(images_all())) + image_list = image_list.resize(20, 20) + image_list_grayscale = image_list.convert_to_grayscale() + image_dataset = ImageDataset(image_list, image_list_grayscale) + + layers = [ + Convolutional2DLayer(6, 2), + Convolutional2DLayer(12, 2), + ConvolutionalTranspose2DLayer(6, 2), + ConvolutionalTranspose2DLayer(4, 2), + ] + nn_original = NeuralNetworkRegressor(InputConversionImage(image_dataset.input_size), layers, + OutputConversionImageToImage()) + nn = nn_original.fit(image_dataset, epoch_size=20) + assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) + assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias + prediction = nn.predict(image_dataset.get_input()) + assert prediction.get_output() == snapshot_png_image_list From 8312d9931511eee01287d964da799f4ea4815456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 24 Apr 2024 23:57:58 +0200 Subject: [PATCH 10/42] test: made `TestImageToColumn.test_should_train_and_predict_model` os independent --- tests/safeds/ml/nn/test_cnn_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index fddc24d08..d9a5313f4 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -71,7 +71,7 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) image_list = image_list.resize(20, 20) - image_classes = Column("class", [re.search(r"(.*)\\(.*)\.", filename).group(2) for filename in filenames]) + image_classes = Column("class", [re.search(r"(.*)[\\/](.*)\.", filename).group(2) for filename in filenames]) image_dataset = ImageDataset(image_list, image_classes) print(image_dataset._output._tensor) print(image_dataset._output._tensor.size()) From 031677a2cfa433ba023e3542c4be615156e1ae13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Mon, 29 Apr 2024 01:00:32 +0200 Subject: [PATCH 11/42] feat: added `Image.__array__` to convert a `Image` to a `numpy.ndarray` --- src/safeds/data/image/containers/_image.py | 15 +++++++++++++++ .../safeds/data/image/containers/test_image.py | 17 +++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index bb37faf87..c75198887 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -25,6 +25,8 @@ from torch import Tensor from torch.types import Device + from numpy import ndarray + class Image: """ @@ -165,6 +167,19 @@ def __sizeof__(self) -> int: """ return sys.getsizeof(self._image_tensor) + self._image_tensor.element_size() * self._image_tensor.nelement() + def __array__(self, dtype=None) -> ndarray: + """ + Return the image as a numpy array. + + Returns + ------- + numpy_array: + The image as numpy array. + """ + from numpy import uint8 + + return self._image_tensor.permute(1, 2, 0).detach().cpu().numpy().astype(uint8 if dtype is None else dtype) + def _repr_jpeg_(self) -> bytes | None: """ Return a JPEG image as bytes. diff --git a/tests/safeds/data/image/containers/test_image.py b/tests/safeds/data/image/containers/test_image.py index f2484950c..ea4f251a4 100644 --- a/tests/safeds/data/image/containers/test_image.py +++ b/tests/safeds/data/image/containers/test_image.py @@ -3,6 +3,8 @@ from pathlib import Path from tempfile import NamedTemporaryFile +import PIL.Image +import numpy as np import pytest import torch from safeds.data.image.containers import Image @@ -98,6 +100,21 @@ def test_should_write_and_load_bytes_png(self, resource_path: str | Path, device assert image == image_copy +@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) +class TestToNumpyArray: + + @pytest.mark.parametrize( + "resource_path", + images_all(), + ids=images_all_ids(), + ) + def test_should_return_numpy_array(self, resource_path: str | Path, device: Device) -> None: + skip_if_device_not_available(device) + image_safeds = Image.from_file(resolve_resource_path(resource_path), device) + image_np = np.array(PIL.Image.open(resolve_resource_path(resource_path))) + assert np.all(np.array(image_safeds).squeeze() == image_np) + + @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) class TestReprJpeg: @pytest.mark.parametrize( From c567b5571447fb99977d1b745ed388850b350954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Mon, 29 Apr 2024 02:35:49 +0200 Subject: [PATCH 12/42] feat: added checks and errors for invalid CNNs refactor: moved `ImageDataset` to `labeled.containers.ImageDataset` --- src/safeds/data/image/containers/__init__.py | 3 - src/safeds/data/labeled/__init__.py | 1 + .../data/labeled/containers/__init__.py | 19 ++++ .../containers/_image_dataset.py | 0 src/safeds/exceptions/__init__.py | 3 + src/safeds/exceptions/_ml.py | 7 ++ src/safeds/ml/nn/_input_conversion.py | 3 +- src/safeds/ml/nn/_input_conversion_image.py | 5 +- src/safeds/ml/nn/_model.py | 101 ++++++++++++++++-- src/safeds/ml/nn/_output_conversion.py | 7 +- src/safeds/ml/nn/_output_conversion_image.py | 5 +- tests/safeds/ml/nn/test_cnn_workflow.py | 9 +- tests/safeds/ml/nn/test_model.py | 78 +++++++++++++- 13 files changed, 218 insertions(+), 23 deletions(-) create mode 100644 src/safeds/data/labeled/__init__.py create mode 100644 src/safeds/data/labeled/containers/__init__.py rename src/safeds/data/{image => labeled}/containers/_image_dataset.py (100%) diff --git a/src/safeds/data/image/containers/__init__.py b/src/safeds/data/image/containers/__init__.py index d294224d0..f0a8f344c 100644 --- a/src/safeds/data/image/containers/__init__.py +++ b/src/safeds/data/image/containers/__init__.py @@ -7,19 +7,16 @@ if TYPE_CHECKING: from ._image import Image from ._image_list import ImageList - from ._image_dataset import ImageDataset apipkg.initpkg( __name__, { "Image": "._image:Image", "ImageList": "._image_list:ImageList", - "ImageDataset": "._image_dataset:ImageDataset", }, ) __all__ = [ "Image", "ImageList", - "ImageDataset", ] diff --git a/src/safeds/data/labeled/__init__.py b/src/safeds/data/labeled/__init__.py new file mode 100644 index 000000000..7ca424f89 --- /dev/null +++ b/src/safeds/data/labeled/__init__.py @@ -0,0 +1 @@ +"""Work with labeled data.""" diff --git a/src/safeds/data/labeled/containers/__init__.py b/src/safeds/data/labeled/containers/__init__.py new file mode 100644 index 000000000..e3e8bdf51 --- /dev/null +++ b/src/safeds/data/labeled/containers/__init__.py @@ -0,0 +1,19 @@ +"""Classes that can store labeled data.""" + +from typing import TYPE_CHECKING + +import apipkg + +if TYPE_CHECKING: + from ._image_dataset import ImageDataset + +apipkg.initpkg( + __name__, + { + "ImageDataset": "._image_dataset:ImageDataset", + }, +) + +__all__ = [ + "ImageDataset", +] diff --git a/src/safeds/data/image/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py similarity index 100% rename from src/safeds/data/image/containers/_image_dataset.py rename to src/safeds/data/labeled/containers/_image_dataset.py diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 578e5c8e5..6de1c3c9c 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -35,6 +35,7 @@ DatasetMissesFeaturesError, FeatureDataMismatchError, InputSizeError, + InvalidModelStructureError, LearningError, ModelNotFittedError, NonTimeSeriesError, @@ -70,6 +71,7 @@ "DatasetMissesFeaturesError": "._ml:DatasetMissesFeaturesError", "FeatureDataMismatchError": "._ml:FeatureDataMismatchError", "InputSizeError": "._ml:InputSizeError", + "InvalidModelStructureError": "._ml:InvalidModelStructureError", "LearningError": "._ml:LearningError", "ModelNotFittedError": "._ml:ModelNotFittedError", "NonTimeSeriesError": "._ml:NonTimeSeriesError", @@ -108,6 +110,7 @@ "DatasetMissesFeaturesError", "FeatureDataMismatchError", "InputSizeError", + "InvalidModelStructureError", "LearningError", "ModelNotFittedError", "NonTimeSeriesError", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 68063cff0..28cad77a8 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -54,6 +54,13 @@ def __init__(self) -> None: super().__init__("The model has not been fitted yet.") +class InvalidModelStructureError(Exception): + """Raised when the structure of the model is invalid.""" + + def __init__(self, reason: str) -> None: + super().__init__(f"The model structure is invalid: {reason}") + + class PredictionError(Exception): """ Raised when an error occurred while prediction a target vector using a model. diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index f71b4b4b7..4807d38a0 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -9,7 +9,8 @@ from torch.utils.data import DataLoader from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries -from safeds.data.image.containers import ImageDataset, ImageList +from safeds.data.image.containers import ImageList +from safeds.data.labeled.containers import ImageDataset FT = TypeVar("FT", TaggedTable, TimeSeries) PT = TypeVar("PT", Table, TimeSeries) diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 291c44893..9a1d2789e 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,7 +1,8 @@ from __future__ import annotations -from safeds.data.image.containers import ImageDataset, ImageList -from safeds.data.image.containers._image_dataset import _ColumnAsTensor, _TableAsTensor +from safeds.data.image.containers import ImageList +from safeds.data.labeled.containers import ImageDataset +from safeds.data.labeled.containers._image_dataset import _ColumnAsTensor, _TableAsTensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.image.typing import ImageSize from safeds.data.tabular.transformation import OneHotEncoder diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index f03fcdf0f..32cca4a39 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -3,17 +3,20 @@ import copy from typing import TYPE_CHECKING, Generic, Self, TypeVar -from safeds.data.image.containers import ImageList, ImageDataset +from safeds.data.image.containers import ImageList +from safeds.data.labeled.containers import ImageDataset from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries from safeds.exceptions import ( ClosedBound, FeatureDataMismatchError, + InvalidModelStructureError, InputSizeError, ModelNotFittedError, OutOfBoundsError, ) -from safeds.ml.nn import InputConversionImage, FlattenLayer, OutputConversionImageToTable -from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn +from safeds.ml.nn import InputConversionImage, FlattenLayer, OutputConversionImageToTable, Convolutional2DLayer, \ + ForwardLayer, OutputConversionImageToImage +from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn, _OutputConversionImage from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer if TYPE_CHECKING: @@ -31,12 +34,56 @@ class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): + """ + A NeuralNetworkRegressor is a neural network that is used for regression tasks. + + Parameters + ---------- + input_conversion: + to convert the input data for the neural network + layers: + a list of layers for the neural network to learn + output_conversion: + to convert the output data of the neural network back + + Raises + ------ + InvalidModelStructureError + if the defined model structure is invalid + """ + def __init__( self, input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], output_conversion: _OutputConversion[IPT, OT], ): + if len(layers) == 0: + raise InvalidModelStructureError("You need to provide at least one layer to a neural network.") + if isinstance(input_conversion, InputConversionImage): + if not isinstance(output_conversion, _OutputConversionImage): + raise InvalidModelStructureError("The defined model uses an input conversion for images but no output conversion for images.") + elif isinstance(output_conversion, OutputConversionImageToTable) or isinstance(output_conversion, OutputConversionImageToColumn): + raise InvalidModelStructureError("A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output.") + data_dimensions = 2 + for layer in layers: + if data_dimensions == 2 and (isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer)): + continue + elif data_dimensions == 2 and isinstance(layer, FlattenLayer): + data_dimensions = 1 + elif data_dimensions == 1 and isinstance(layer, ForwardLayer): + continue + else: + raise InvalidModelStructureError("The 2-dimensional data has to be flattened before using a 1-dimensional layer." if data_dimensions == 2 else "You cannot use a 2-dimensional layer with 1-dimensional data.") + if data_dimensions == 1 and isinstance(output_conversion, OutputConversionImageToImage): + raise InvalidModelStructureError("The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data.") + elif isinstance(output_conversion, _OutputConversionImage): + raise InvalidModelStructureError("The defined model uses an output conversion for images but no input conversion for images.") + else: + for layer in layers: + if isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer) or isinstance(layer, FlattenLayer): + raise InvalidModelStructureError("You cannot use a 2-dimensional layer with 1-dimensional data.") + self._input_conversion: _InputConversion[IFT, IPT] = input_conversion self._model = _create_internal_model(input_conversion, layers, is_for_classification=False) self._output_conversion: _OutputConversion[IPT, OT] = output_conversion @@ -169,10 +216,6 @@ def predict(self, test_data: IPT) -> OT: for x in dataloader: elem = self._model(x) predictions.append(elem.squeeze(dim=1)) - if isinstance(self._output_conversion, OutputConversionImageToTable) and isinstance(self._input_conversion, InputConversionImage): - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_names=self._input_conversion._column_names) - if isinstance(self._output_conversion, OutputConversionImageToColumn) and isinstance(self._input_conversion, InputConversionImage): - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_name=self._input_conversion._column_name, one_hot_encoder=self._input_conversion._one_hot_encoder) return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property @@ -189,12 +232,56 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier(Generic[IFT, IPT, OT]): + """ + A NeuralNetworkClassifier is a neural network that is used for classification tasks. + + Parameters + ---------- + input_conversion: + to convert the input data for the neural network + layers: + a list of layers for the neural network to learn + output_conversion: + to convert the output data of the neural network back + + Raises + ------ + InvalidModelStructureError + if the defined model structure is invalid + """ + def __init__( self, input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], output_conversion: _OutputConversion[IPT, OT], ): + if len(layers) == 0: + raise InvalidModelStructureError("You need to provide at least one layer to a neural network.") + if isinstance(output_conversion, OutputConversionImageToImage): + raise InvalidModelStructureError("A NeuralNetworkClassifier cannot be used with images as output.") + elif isinstance(input_conversion, InputConversionImage): + if not isinstance(output_conversion, _OutputConversionImage): + raise InvalidModelStructureError("The defined model uses an input conversion for images but no output conversion for images.") + data_dimensions = 2 + for layer in layers: + if data_dimensions == 2 and (isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer)): + continue + elif data_dimensions == 2 and isinstance(layer, FlattenLayer): + data_dimensions = 1 + elif data_dimensions == 1 and isinstance(layer, ForwardLayer): + continue + else: + raise InvalidModelStructureError("The 2-dimensional data has to be flattened before using a 1-dimensional layer." if data_dimensions == 2 else "You cannot use a 2-dimensional layer with 1-dimensional data.") + if data_dimensions == 2 and (isinstance(output_conversion, OutputConversionImageToTable) or isinstance(output_conversion, OutputConversionImageToColumn)): + raise InvalidModelStructureError("The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.") + elif isinstance(output_conversion, _OutputConversionImage): + raise InvalidModelStructureError("The defined model uses an output conversion for images but no input conversion for images.") + else: + for layer in layers: + if isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer) or isinstance(layer, FlattenLayer): + raise InvalidModelStructureError("You cannot use a 2-dimensional layer with 1-dimensional data.") + self._input_conversion: _InputConversion[IFT, IPT] = input_conversion self._model = _create_internal_model(input_conversion, layers, is_for_classification=True) self._output_conversion: _OutputConversion[IPT, OT] = output_conversion diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 55587fe05..2575b5faa 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -3,13 +3,16 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar +from safeds.data.image.containers import ImageList +from safeds.data.labeled.containers import ImageDataset + if TYPE_CHECKING: from torch import Tensor from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries -IT = TypeVar("IT", Table, TimeSeries) -OT = TypeVar("OT", TaggedTable, TimeSeries) +IT = TypeVar("IT", Table, TimeSeries, ImageList) +OT = TypeVar("OT", TaggedTable, TimeSeries, ImageDataset) class _OutputConversion(Generic[IT, OT], ABC): diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 1d78506fd..130354e37 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -3,8 +3,9 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, TypeVar -from safeds.data.image.containers import ImageDataset, ImageList -from safeds.data.image.containers._image_dataset import _TableAsTensor, _ColumnAsTensor +from safeds.data.image.containers import ImageList +from safeds.data.labeled.containers import ImageDataset +from safeds.data.labeled.containers._image_dataset import _TableAsTensor, _ColumnAsTensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.containers import Table, Column from safeds.data.tabular.transformation import OneHotEncoder diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index d9a5313f4..675a00f21 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -5,7 +5,8 @@ from syrupy import SnapshotAssertion from torch.types import Device -from safeds.data.image.containers import ImageList, ImageDataset +from safeds.data.image.containers import ImageList +from safeds.data.labeled.containers import ImageDataset from safeds.data.tabular.containers import Table, Column from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ @@ -14,7 +15,7 @@ from tests.helpers import resolve_resource_path, images_all, device_cuda, device_cpu, skip_if_device_not_available -class TestImageToTable: +class TestImageToTableClassifier: @pytest.mark.parametrize( ("seed", "device", "layer_3_bias", "prediction_label"), @@ -52,7 +53,7 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa assert one_hot_encoder.inverse_transform(prediction.get_output()) == Table({"class": prediction_label}) -class TestImageToColumn: +class TestImageToColumnClassifier: @pytest.mark.parametrize( ("seed", "device", "layer_3_bias", "prediction_label"), @@ -91,7 +92,7 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa assert prediction.get_output() == Column("class", prediction_label) -class TestImageToImage: +class TestImageToImageRegressor: @pytest.mark.parametrize( ("seed", "device", "layer_3_bias"), diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 2c43739a8..ea8ac5ca2 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,13 +1,21 @@ import pytest + +from safeds.data.image.typing import ImageSize from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError +from safeds.exceptions import FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError, \ + InvalidModelStructureError from safeds.ml.nn import ( ForwardLayer, InputConversionTable, NeuralNetworkClassifier, NeuralNetworkRegressor, - OutputConversionTable, + OutputConversionTable, OutputConversionImageToTable, OutputConversionImageToImage, Convolutional2DLayer, + ConvolutionalTranspose2DLayer, MaxPooling2DLayer, AvgPooling2DLayer, FlattenLayer, InputConversionImage, ) +from safeds.ml.nn._input_conversion import _InputConversion +from safeds.ml.nn._layer import _Layer +from safeds.ml.nn._output_conversion import _OutputConversion +from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn class TestClassificationModel: @@ -228,6 +236,46 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True + @pytest.mark.parametrize( + ("input_conversion", "layers", "output_conversion", "error_msg"), + [ + (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToTable(), r"The defined model uses an output conversion for images but no input conversion for images."), + (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToColumn(), r"The defined model uses an output conversion for images but no input conversion for images."), + (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToImage(), r"A NeuralNetworkClassifier cannot be used with images as output."), + (InputConversionTable([], ""), [Convolutional2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [MaxPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [AvgPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [FlattenLayer()], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionTable(), r"The defined model uses an input conversion for images but no output conversion for images."), + (InputConversionImage(ImageSize(1, 1, 1)), [Convolutional2DLayer(1, 1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [Convolutional2DLayer(1, 1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [MaxPooling2DLayer(1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [MaxPooling2DLayer(1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [AvgPooling2DLayer(1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [AvgPooling2DLayer(1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), Convolutional2DLayer(1, 1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), Convolutional2DLayer(1, 1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), MaxPooling2DLayer(1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), MaxPooling2DLayer(1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), AvgPooling2DLayer(1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), AvgPooling2DLayer(1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), FlattenLayer()], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), FlattenLayer()], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [ForwardLayer(1)], OutputConversionImageToTable(), r"The 2-dimensional data has to be flattened before using a 1-dimensional layer."), + (InputConversionImage(ImageSize(1, 1, 1)), [ForwardLayer(1)], OutputConversionImageToColumn(), r"The 2-dimensional data has to be flattened before using a 1-dimensional layer."), + (InputConversionImage(ImageSize(1, 1, 1)), [], OutputConversionImageToTable(), r"You need to provide at least one layer to a neural network."), + (InputConversionImage(ImageSize(1, 1, 1)), [], OutputConversionImageToColumn(), r"You need to provide at least one layer to a neural network."), + ] + ) + def test_should_raise_if_model_has_invalid_structure(self, input_conversion: _InputConversion, layers: list[_Layer], output_conversion: _OutputConversion, error_msg: str) -> None: + with pytest.raises(InvalidModelStructureError, match=error_msg): + NeuralNetworkClassifier(input_conversion, layers, output_conversion) + class TestRegressionModel: @pytest.mark.parametrize( @@ -420,3 +468,29 @@ def callback_was_called(self) -> bool: model.fit(Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), callback_on_epoch_completion=obj.cb) assert obj.callback_was_called() is True + + @pytest.mark.parametrize( + ("input_conversion", "layers", "output_conversion", "error_msg"), + [ + (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToImage(), r"The defined model uses an output conversion for images but no input conversion for images."), + (InputConversionTable([], ""), [Convolutional2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [MaxPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [AvgPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionTable([], ""), [FlattenLayer()], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionTable(), r"The defined model uses an input conversion for images but no output conversion for images."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionImageToImage(), r"The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), Convolutional2DLayer(1, 1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), MaxPooling2DLayer(1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), AvgPooling2DLayer(1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), FlattenLayer()], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [ForwardLayer(1)], OutputConversionImageToImage(), r"The 2-dimensional data has to be flattened before using a 1-dimensional layer."), + (InputConversionImage(ImageSize(1, 1, 1)), [], OutputConversionImageToImage(), r"You need to provide at least one layer to a neural network."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionImageToTable(), r"A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionImageToColumn(), r"A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output."), + ] + ) + def test_should_raise_if_model_has_invalid_structure(self, input_conversion: _InputConversion, layers: list[_Layer], output_conversion: _OutputConversion, error_msg: str) -> None: + with pytest.raises(InvalidModelStructureError, match=error_msg): + NeuralNetworkRegressor(input_conversion, layers, output_conversion) From 137d65815aee87fb37e5c4761167d3a5ba5acf74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Mon, 29 Apr 2024 17:38:53 +0200 Subject: [PATCH 13/42] feat: added equals check to `OneHotEncoder` test: added tests for `ImageDataset`, `InputConversionImage` and all `OutputConversionImage` variants --- .../data/labeled/containers/_image_dataset.py | 16 ++- .../transformation/_one_hot_encoder.py | 8 ++ src/safeds/ml/nn/_input_conversion_image.py | 24 ++-- src/safeds/ml/nn/_output_conversion_image.py | 2 +- tests/safeds/data/labeled/__init__.py | 0 .../data/labeled/containers/__init__.py | 0 .../labeled/containers/test_image_dataset.py | 119 ++++++++++++++++++ ...rain_and_predict_model[seed-1234-cpu].png} | Bin ...ain_and_predict_model[seed-1234-cuda].png} | Bin ...rain_and_predict_model[seed-4711-cpu].png} | Bin ...ain_and_predict_model[seed-4711-cuda].png} | Bin .../ml/nn/test_input_conversion_image.py | 37 ++++++ .../ml/nn/test_output_conversion_image.py | 22 ++++ 13 files changed, 212 insertions(+), 16 deletions(-) create mode 100644 tests/safeds/data/labeled/__init__.py create mode 100644 tests/safeds/data/labeled/containers/__init__.py create mode 100644 tests/safeds/data/labeled/containers/test_image_dataset.py rename tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/{TestImageToImage.test_should_train_and_predict_model[seed-1234-cpu].png => TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cpu].png} (100%) rename tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/{TestImageToImage.test_should_train_and_predict_model[seed-1234-cuda].png => TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cuda].png} (100%) rename tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/{TestImageToImage.test_should_train_and_predict_model[seed-4711-cpu].png => TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cpu].png} (100%) rename tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/{TestImageToImage.test_should_train_and_predict_model[seed-4711-cuda].png => TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cuda].png} (100%) create mode 100644 tests/safeds/ml/nn/test_input_conversion_image.py create mode 100644 tests/safeds/ml/nn/test_output_conversion_image.py diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 5288368ac..6fb1ecc82 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -4,12 +4,14 @@ from typing import TYPE_CHECKING, TypeVar, Generic from safeds.data.image.containers import ImageList +from safeds.data.image.containers._empty_image_list import _EmptyImageList +from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.image.typing import ImageSize from safeds.data.tabular.containers import Table, Column from safeds.data.tabular.transformation import OneHotEncoder from safeds.exceptions import NonNumericColumnError, OutputLengthMismatchError, IndexOutOfBoundsError, \ - TransformerNotFittedError + TransformerNotFittedError, OutOfBoundsError, ClosedBound if TYPE_CHECKING: from torch import Tensor @@ -27,8 +29,10 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle= self._batch_size = batch_size self._next_batch_index = 0 - if not isinstance(input_data, _SingleSizeImageList): + if isinstance(input_data, _MultiSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") + elif isinstance(input_data, _EmptyImageList): + raise ValueError("The given input ImageList contains no images.") else: self._input_size = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input = input_data @@ -101,7 +105,9 @@ def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[ if batch_size is None: batch_size = self._batch_size - if batch_size * batch_number >= len(self._input): + if batch_size < 1: + raise OutOfBoundsError(batch_size, name="batch_size", lower_bound=ClosedBound(1)) + if batch_number < 0 or batch_size * batch_number >= len(self._input): raise IndexOutOfBoundsError(batch_size * batch_number) max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self._input) else len(self._input) input_tensor = self._input._tensor[self._shuffle_tensor_indices[[self._input._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 @@ -136,7 +142,7 @@ def _from_tensor(tensor: Tensor, column_names: list[str]) -> _TableAsTensor: if tensor.dim() != 2: raise ValueError(f"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got {tensor.dim()}.") if tensor.size(dim=1) != len(column_names): - raise ValueError(f"Tensor and column_names have different amounts of classes ({tensor.size(dim=1)}!={column_names}.") + raise ValueError(f"Tensor and column_names have different amounts of classes ({tensor.size(dim=1)}!={len(column_names)}).") table_as_tensor = _TableAsTensor.__new__(_TableAsTensor) table_as_tensor._tensor = tensor table_as_tensor._column_names = column_names @@ -164,7 +170,7 @@ def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncode if not one_hot_encoder.is_fitted(): raise TransformerNotFittedError() if tensor.size(dim=1) != len(one_hot_encoder.get_names_of_added_columns()): - raise ValueError(f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={one_hot_encoder.get_names_of_added_columns()}.") + raise ValueError(f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={len(one_hot_encoder.get_names_of_added_columns())}).") table_as_tensor = _ColumnAsTensor.__new__(_ColumnAsTensor) table_as_tensor._tensor = tensor table_as_tensor._column_name = column_name diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 503d0817a..fcff78081 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -65,6 +65,14 @@ def __init__(self) -> None: # Maps nan values (str of old column) to corresponding new column name self._value_to_column_nans: dict[str, str] | None = None + def __hash__(self) -> int: + return super().__hash__() + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, OneHotEncoder): + return NotImplemented + return self._column_names == other._column_names and self._value_to_column == other._value_to_column and self._value_to_column_nans == other._value_to_column_nans + # noinspection PyProtectedMember def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: """ diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 9a1d2789e..911f42cbc 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -20,14 +20,16 @@ def __init__(self, image_size: ImageSize) -> None: Parameters ---------- """ - self._image_size = image_size + self._input_size = image_size + self._output_size = None self._one_hot_encoder: OneHotEncoder | None = None self._column_name: str | None = None self._column_names: list[str] | None = None + self._output_type = None @property def _data_size(self) -> ImageSize: - return self._image_size + return self._input_size def _data_conversion_fit(self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1) -> ImageDataset: return input_data @@ -36,21 +38,23 @@ def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> Im return input_data def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: + if self._output_type is None: + self._output_type = type(input_data._output) + self._output_size = input_data.output_size + elif not isinstance(input_data._output, self._output_type): + return False if isinstance(input_data._output, _ColumnAsTensor): - if self._one_hot_encoder is None: + if self._column_name is None and self._one_hot_encoder is None: self._one_hot_encoder = input_data._output._one_hot_encoder - elif self._one_hot_encoder != input_data._output._one_hot_encoder: - return False - if self._column_name is None: self._column_name = input_data._output._column_name - elif self._column_name != input_data._output._column_name: + elif self._column_name != input_data._output._column_name or self._one_hot_encoder != input_data._output._one_hot_encoder: return False - if isinstance(input_data._output, _TableAsTensor): + elif isinstance(input_data._output, _TableAsTensor): if self._column_names is None: self._column_names = input_data._output._column_names elif self._column_names != input_data._output._column_names: return False - return input_data.input_size == self._image_size + return input_data.input_size == self._input_size and input_data.output_size == self._output_size def _is_predict_data_valid(self, input_data: ImageList) -> bool: - return isinstance(input_data, _SingleSizeImageList) and input_data.sizes[0] == self._image_size + return isinstance(input_data, _SingleSizeImageList) and input_data.sizes[0] == self._input_size diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 130354e37..879b23661 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -23,7 +23,7 @@ class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset[T]], ABC) @abstractmethod def _data_conversion(self, **kwargs) -> ImageDataset[T]: - pass + pass # pragma: no cover class OutputConversionImageToColumn(_OutputConversionImage[Column]): diff --git a/tests/safeds/data/labeled/__init__.py b/tests/safeds/data/labeled/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/labeled/containers/__init__.py b/tests/safeds/data/labeled/containers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py new file mode 100644 index 000000000..5af711fc4 --- /dev/null +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -0,0 +1,119 @@ +import math +from typing import Type + +import pytest +import torch +from torch import Tensor + +from safeds.data.image.containers import ImageList +from safeds.data.image.containers._empty_image_list import _EmptyImageList +from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList +from safeds.data.labeled.containers import ImageDataset +from safeds.data.labeled.containers._image_dataset import _TableAsTensor, _ColumnAsTensor +from safeds.data.tabular.containers import Column, Table +from safeds.data.tabular.transformation import OneHotEncoder +from safeds.exceptions import OutputLengthMismatchError, NonNumericColumnError, IndexOutOfBoundsError, OutOfBoundsError, \ + TransformerNotFittedError +from tests.helpers import resolve_resource_path, plane_png_path, white_square_png_path, images_all + + +class TestImageDatasetInit: + + @pytest.mark.parametrize( + ("input_data", "output_data", "error", "error_msg"), + [ + (_MultiSizeImageList(), Table(), ValueError, r"The given input ImageList contains images of different sizes."), + (_EmptyImageList(), Table(), ValueError, r"The given input ImageList contains no images."), + (ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])), ImageList.from_files(resolve_resource_path([plane_png_path, white_square_png_path])), ValueError, r"The given output ImageList contains images of different sizes."), + (ImageList.from_files(resolve_resource_path(plane_png_path)), _EmptyImageList(), OutputLengthMismatchError, r"The length of the output container differs"), + (ImageList.from_files(resolve_resource_path(plane_png_path)), Table(), OutputLengthMismatchError, r"The length of the output container differs"), + (ImageList.from_files(resolve_resource_path(plane_png_path)), Column("column", [1, 2]), OutputLengthMismatchError, r"The length of the output container differs"), + (ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])), OutputLengthMismatchError, r"The length of the output container differs"), + (ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"a": ["1"]}), NonNumericColumnError, r"Tried to do a numerical operation on one or multiple non-numerical columns: \nColumns \['a'\] are not numerical."), + (ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"a": [2]}), ValueError, r"Columns \['a'\] have values outside of the interval \[0, 1\]."), + (ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"a": [-1]}), ValueError, r"Columns \['a'\] have values outside of the interval \[0, 1\]."), + ] + ) + def test_should_raise_with_invalid_data(self, input_data: ImageList, output_data: Column | Table | ImageList, error: Type[Exception], error_msg: str) -> None: + with pytest.raises(error, match=error_msg): + ImageDataset(input_data, output_data) + + +class TestLength: + + def test_should_return_length(self) -> None: + image_dataset = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])) + assert len(image_dataset) == 1 + + +class TestShuffle: + + def test_should_be_different_order(self) -> None: + torch.manual_seed(1234) + image_list = ImageList.from_files(resolve_resource_path(images_all())).resize(10, 10) + image_dataset = ImageDataset(image_list, Column("images", images_all())) + image_dataset_shuffled = image_dataset.shuffle() + batch = image_dataset._get_batch(0, len(image_dataset)) + batch_shuffled = image_dataset_shuffled._get_batch(0, len(image_dataset)) + assert not torch.all(torch.eq(batch[0], batch_shuffled[0])) + assert not torch.all(torch.eq(batch[1], batch_shuffled[1])) + + +class TestBatch: + + @pytest.mark.parametrize( + ("batch_number", "batch_size"), + [ + (-1, len(images_all())), + (1, len(images_all())), + (2, math.ceil(len(images_all()) / 2)), + (3, math.ceil(len(images_all()) / 3)), + (4, math.ceil(len(images_all()) / 4)), + ] + ) + def test_should_raise_index_out_of_bounds_error(self, batch_number: int, batch_size: int) -> None: + image_list = ImageList.from_files(resolve_resource_path(images_all())).resize(10, 10) + image_dataset = ImageDataset(image_list, Column("images", images_all())) + with pytest.raises(IndexOutOfBoundsError): + image_dataset._get_batch(batch_number, batch_size) + + def test_should_raise_out_of_bounds_error(self) -> None: + image_list = ImageList.from_files(resolve_resource_path(images_all())).resize(10, 10) + image_dataset = ImageDataset(image_list, Column("images", images_all())) + with pytest.raises(OutOfBoundsError): + image_dataset._get_batch(0, -1) + + +class TestTableAsTensor: + + def test_should_raise_if_not_one_hot_encoded(self) -> None: + with pytest.raises(ValueError, match=r"The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1."): + _TableAsTensor(Table({"a": [0.2, 0.2, 0.2, 0.3, 0.2]})) + + @pytest.mark.parametrize( + ("tensor", "error_msg"), + [ + (torch.randn(10), r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 1."), + (torch.randn(10, 10, 10), r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 3."), + (torch.randn(10, 10), r"Tensor and column_names have different amounts of classes \(10!=2\)."), + ] + ) + def test_should_raise_from_tensor(self, tensor: Tensor, error_msg: str) -> None: + with pytest.raises(ValueError, match=error_msg): + _TableAsTensor._from_tensor(tensor, ["a", "b"]) + + +class TestColumnAsTensor: + + @pytest.mark.parametrize( + ("tensor", "one_hot_encoder", "error", "error_msg"), + [ + (torch.randn(10), OneHotEncoder(), ValueError, r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 1."), + (torch.randn(10, 10, 10), OneHotEncoder(), ValueError, r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 3."), + (torch.randn(10, 10), OneHotEncoder(), TransformerNotFittedError, r""), + (torch.randn(10, 10), OneHotEncoder().fit(Table({"b": ["a", "b", "c"]}), None), ValueError, r"Tensor and one_hot_encoder have different amounts of classes \(10!=3\)."), + ] + ) + def test_should_raise_from_tensor(self, tensor: Tensor, one_hot_encoder: OneHotEncoder, error: Type[Exception], error_msg: str) -> None: + with pytest.raises(error, match=error_msg): + _ColumnAsTensor._from_tensor(tensor, "a", one_hot_encoder) diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cpu].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cpu].png similarity index 100% rename from tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cpu].png rename to tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cpu].png diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cuda].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cuda].png similarity index 100% rename from tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-1234-cuda].png rename to tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-1234-cuda].png diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cpu].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cpu].png similarity index 100% rename from tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cpu].png rename to tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cpu].png diff --git a/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cuda].png b/tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cuda].png similarity index 100% rename from tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImage.test_should_train_and_predict_model[seed-4711-cuda].png rename to tests/safeds/ml/nn/__snapshots__/test_cnn_workflow/TestImageToImageRegressor.test_should_train_and_predict_model[seed-4711-cuda].png diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py new file mode 100644 index 000000000..e34d45d73 --- /dev/null +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -0,0 +1,37 @@ +import pytest + +from safeds.data.image.containers import ImageList +from safeds.data.labeled.containers import ImageDataset +from safeds.data.tabular.containers import Column, Table +from safeds.ml.nn import InputConversionImage +from tests.helpers import resolve_resource_path, images_all + +_test_image_list = ImageList.from_files(resolve_resource_path(images_all())).resize(10, 10) + + +class TestIsFitDataValid: + + @pytest.mark.parametrize( + ("image_dataset_valid", "image_dataset_invalid"), + [ + (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, _test_image_list)), + (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, _test_image_list)), + (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list, _test_image_list)), + (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list, _test_image_list)), + (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, Column("images", images_all()))), + (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}))), + (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list.resize(20, 20), Column("images", images_all()))), + (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, Column("ims", images_all()))), + (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, Column("images", [s + "10" for s in images_all()]))), + (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list.resize(20, 20), Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}))), + (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list, Table({"b": [0, 0, 1, 1, 0, 1, 0], "c": [1, 1, 0, 0, 1, 0, 1]}))), + (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list.resize(20, 20), _test_image_list)), + (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, _test_image_list.resize(20, 20))), + ] + ) + def test_should_return_false_if_fit_data_is_invalid(self, image_dataset_valid: ImageDataset, image_dataset_invalid: ImageDataset): + input_conversion = InputConversionImage(image_dataset_valid.input_size) + assert input_conversion._is_fit_data_valid(image_dataset_valid) + assert input_conversion._is_fit_data_valid(image_dataset_valid) + assert not input_conversion._is_fit_data_valid(image_dataset_invalid) + diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py new file mode 100644 index 000000000..70d105f25 --- /dev/null +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -0,0 +1,22 @@ +import pytest +import torch + +from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList +from safeds.data.tabular.transformation import OneHotEncoder +from safeds.ml.nn import OutputConversionImageToTable, OutputConversionImageToImage +from safeds.ml.nn._output_conversion_image import _OutputConversionImage, OutputConversionImageToColumn + + +class TestDataConversionToColumn: + + @pytest.mark.parametrize( + ("output_conversion", "kwargs"), + [ + (OutputConversionImageToColumn(), {"column_name": "a", "one_hot_encoder": OneHotEncoder()}), + (OutputConversionImageToTable(), {"column_names": ["a"]}), + (OutputConversionImageToImage(), {}), + ] + ) + def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversionImage, kwargs: dict): + with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): + output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) From 732b1ce2a2c2323f9e628be00c5fa85417c4ab95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Mon, 29 Apr 2024 20:27:08 +0200 Subject: [PATCH 14/42] test: added tests for `Convolutional2DLayer`, `ConvolutionalTranspose2DLayer`, `FlattenLayer`, `MaxPooling2DLayer` and `AvgPooling2DLayer` --- src/safeds/ml/nn/_convolutional2d_layer.py | 10 +++--- src/safeds/ml/nn/_pooling2d_layer.py | 10 +++--- tests/safeds/ml/nn/test_cnn_workflow.py | 13 ++++--- .../ml/nn/test_convolutional2d_layer.py | 35 +++++++++++++++++++ tests/safeds/ml/nn/test_flatten_layer.py | 15 ++++++++ tests/safeds/ml/nn/test_pooling2d_layer.py | 25 +++++++++++++ 6 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 tests/safeds/ml/nn/test_convolutional2d_layer.py create mode 100644 tests/safeds/ml/nn/test_flatten_layer.py create mode 100644 tests/safeds/ml/nn/test_pooling2d_layer.py diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index ef2f66cff..d8887e12b 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal from safeds.data.image.typing import ImageSize @@ -11,11 +11,11 @@ from safeds.ml.nn._layer import _Layer -def _create_internal_model(input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int, transpose: bool, output_padding: int = 0) -> nn.Module: +def _create_internal_model(input_size: int, output_size: int, kernel_size: int, activation_function: Literal["sigmoid", "relu", "softmax"], padding: int, stride: int, transpose: bool, output_padding: int = 0) -> nn.Module: from torch import nn class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, kernel_size: int, activation_function: str, padding: int, stride: int, transpose: bool, output_padding: int): + def __init__(self, input_size: int, output_size: int, kernel_size: int, activation_function: Literal["sigmoid", "relu", "softmax"], padding: int, stride: int, transpose: bool, output_padding: int): super().__init__() if transpose: self._layer = nn.ConvTranspose2d(in_channels=input_size, out_channels=output_size, kernel_size=kernel_size, padding=padding, stride=stride, output_padding=output_padding) @@ -47,7 +47,7 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa self._stride = stride self._padding = padding - def _get_internal_layer(self, *, activation_function: str) -> nn.Module: + def _get_internal_layer(self, *, activation_function: Literal["sigmoid", "relu", "softmax"]) -> nn.Module: return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, False) @property @@ -87,7 +87,7 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa super().__init__(output_channel, kernel_size, stride=stride, padding=padding) self._output_padding = output_padding - def _get_internal_layer(self, *, activation_function: str) -> nn.Module: + def _get_internal_layer(self, *, activation_function: Literal["sigmoid", "relu", "softmax"]) -> nn.Module: return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, True, self._output_padding) def _set_input_size(self, input_size: ImageSize) -> None: diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 581b2aa71..6eb81aa03 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal from safeds.data.image.typing import ImageSize @@ -11,19 +11,17 @@ from safeds.ml.nn._layer import _Layer -def _create_internal_model(strategy: str, kernel_size: int, padding: int, stride: int) -> nn.Module: +def _create_internal_model(strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int) -> nn.Module: from torch import nn class _InternalLayer(nn.Module): - def __init__(self, strategy: str, kernel_size: int, padding: int, stride: int): + def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int): super().__init__() match strategy: case "max": self._layer = nn.MaxPool2d(kernel_size=kernel_size, padding=padding, stride=stride) case "avg": self._layer = nn.AvgPool2d(kernel_size=kernel_size, padding=padding, stride=stride) - case _: - raise ValueError(f"Unknown pooling strategy: {strategy}") def forward(self, x: Tensor) -> Tensor: return self._layer(x) @@ -32,7 +30,7 @@ def forward(self, x: Tensor) -> Tensor: class _Pooling2DLayer(_Layer): - def __init__(self, strategy: str, kernel_size: int, *, stride: int = -1, padding: int = 0): + def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: int = -1, padding: int = 0): """ Create a Pooling 2D Layer. """ diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index 675a00f21..2815cc800 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -10,7 +10,8 @@ from safeds.data.tabular.containers import Table, Column from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ - FlattenLayer, ForwardLayer, OutputConversionImageToTable, ConvolutionalTranspose2DLayer, NeuralNetworkRegressor + FlattenLayer, ForwardLayer, OutputConversionImageToTable, ConvolutionalTranspose2DLayer, NeuralNetworkRegressor, \ + AvgPooling2DLayer from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn, OutputConversionImageToImage from tests.helpers import resolve_resource_path, images_all, device_cuda, device_cpu, skip_if_device_not_available @@ -58,9 +59,9 @@ class TestImageToColumnClassifier: @pytest.mark.parametrize( ("seed", "device", "layer_3_bias", "prediction_label"), [ - (1234, device_cuda, [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185], ["grayscale"] * 7), - (4711, device_cuda, [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957], ["white_square"] * 7), - (1234, device_cpu, [-0.6926110982894897, 0.33004942536354065, -0.32962560653686523, 0.5768553614616394], ["grayscale"] * 7), + (1234, device_cuda, [0.5805488228797913, -0.32433584332466125, 0.026305729523301125, 0.5804171562194824], ["grayscale"] * 7), + (4711, device_cuda, [-0.8114063143730164, -0.9443492889404297, 0.8557132482528687, -0.8482506275177002], ["white_square"] * 7), + (1234, device_cpu, [-0.6926037669181824, 0.33001941442489624, -0.32963910698890686, 0.5768917202949524], ["grayscale"] * 7), (4711, device_cpu, [-0.9051575660705566, -0.8625037670135498, 0.24682046473026276, -0.2612163722515106], ["white_square"] * 7), ], ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] @@ -74,12 +75,10 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa image_list = image_list.resize(20, 20) image_classes = Column("class", [re.search(r"(.*)[\\/](.*)\.", filename).group(2) for filename in filenames]) image_dataset = ImageDataset(image_list, image_classes) - print(image_dataset._output._tensor) - print(image_dataset._output._tensor.size()) layers = [ Convolutional2DLayer(1, 2), - MaxPooling2DLayer(10), + AvgPooling2DLayer(10), FlattenLayer(), ForwardLayer(image_dataset.output_size) ] diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py new file mode 100644 index 000000000..69c880b06 --- /dev/null +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -0,0 +1,35 @@ +from typing import Literal, Type + +import pytest +from torch import nn + +from safeds.data.image.typing import ImageSize +from safeds.ml.nn import Convolutional2DLayer, ConvolutionalTranspose2DLayer + + +class TestConvolutional2DLayer: + + @pytest.mark.parametrize( + ("activation_function", "activation_layer"), + [ + ("sigmoid", nn.Sigmoid), + ("relu", nn.ReLU), + ("softmax", nn.Softmax) + ], + ) + @pytest.mark.parametrize( + ("conv_type", "torch_layer", "output_channel", "kernel_size", "stride", "padding", "out_channel", "out_width", "out_height"), + [ + (Convolutional2DLayer, nn.Conv2d, 30, 2, 2, 2, 30, 7, 12), + (ConvolutionalTranspose2DLayer, nn.ConvTranspose2d, 30, 2, 2, 2, 30, 16, 36), + ], + ) + def test_should_create_pooling_layer(self, activation_function: Literal["sigmoid", "relu", "softmax"], activation_layer: Type[nn.Module], conv_type: Type[Convolutional2DLayer], torch_layer: Type[nn.Module], output_channel: int, kernel_size: int, stride: int, padding: int, out_channel: int, out_width: int, out_height: int) -> None: + layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) + input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) + layer._set_input_size(input_size) + assert layer.input_size == input_size + assert layer.output_size == ImageSize(out_width, out_height, out_channel, _ignore_invalid_channel=True) + modules = list(next(layer._get_internal_layer(activation_function=activation_function).modules()).children()) + assert isinstance(modules[0], torch_layer) + assert isinstance(modules[1], activation_layer) diff --git a/tests/safeds/ml/nn/test_flatten_layer.py b/tests/safeds/ml/nn/test_flatten_layer.py new file mode 100644 index 000000000..11cf5ef39 --- /dev/null +++ b/tests/safeds/ml/nn/test_flatten_layer.py @@ -0,0 +1,15 @@ +from torch import nn + +from safeds.data.image.typing import ImageSize +from safeds.ml.nn import FlattenLayer + + +class TestFlattenLayer: + + def test_should_create_flatten_layer(self): + layer = FlattenLayer() + input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) + layer._set_input_size(input_size) + assert layer.input_size == input_size + assert layer.output_size == input_size.width * input_size.height * input_size.channel + assert isinstance(next(next(layer._get_internal_layer().modules()).children()), nn.Flatten) diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py new file mode 100644 index 000000000..08615799c --- /dev/null +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -0,0 +1,25 @@ +from typing import Literal, Type + +import pytest +from torch import nn + +from safeds.data.image.typing import ImageSize +from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer + + +class TestPooling2DLayer: + + @pytest.mark.parametrize( + ("strategy", "torch_layer"), + [ + ("max", nn.MaxPool2d), + ("avg", nn.AvgPool2d), + ], + ) + def test_should_create_pooling_layer(self, strategy: Literal["max", "avg"], torch_layer: Type[nn.Module]) -> None: + layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) + input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) + layer._set_input_size(input_size) + assert layer.input_size == input_size + assert layer.output_size == ImageSize(7, 12, 30, _ignore_invalid_channel=True) + assert isinstance(next(next(layer._get_internal_layer().modules()).children()), torch_layer) From cf497a4b04add792a04c07bd34767b87ef14c684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Mon, 29 Apr 2024 23:51:58 +0200 Subject: [PATCH 15/42] test: added tests for `OneHotEncoder.__eq__` test: added additional tests for codecov --- src/safeds/ml/nn/_convolutional2d_layer.py | 2 -- .../transformation/test_one_hot_encoder.py | 21 +++++++++++++++++++ tests/safeds/ml/nn/test_cnn_workflow.py | 10 ++++----- tests/safeds/ml/nn/test_model.py | 1 + 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index d8887e12b..6f8382bb9 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -28,8 +28,6 @@ def __init__(self, input_size: int, output_size: int, kernel_size: int, activati self._fn = nn.ReLU() case "softmax": self._fn = nn.Softmax() - case _: - raise ValueError("Unknown Activation Function: " + activation_function) def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) diff --git a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py index 810f9bd24..19eeaf0ab 100644 --- a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py @@ -11,6 +11,27 @@ ) +class TestEq: + + def test_should_be_not_implemented(self): + assert OneHotEncoder().__eq__(Table()) is NotImplemented + + def test_should_be_equal(self): + table1 = Table({"a": ["a", "b", "c"], "b": ["a", "b", "c"]}) + table2 = Table({"b": ["a", "b", "c"], "a": ["a", "b", "c"]}) + assert OneHotEncoder().fit(table1, None) == OneHotEncoder().fit(table2, None) + + @pytest.mark.parametrize( + ("table1", "table2"), + [ + (Table({"a": ["a", "b", "c"], "b": ["a", "b", "c"]}), Table({"a": ["a", "b", "c"], "aa": ["a", "b", "c"]})), + (Table({"a": ["a", "b", "c"], "b": ["a", "b", "c"]}), Table({"a": ["a", "b", "c"], "b": ["a", "b", "d"]})), + ] + ) + def test_should_be_not_equal(self, table1: Table, table2: Table): + assert OneHotEncoder().fit(table1, None) != OneHotEncoder().fit(table2, None) + + class TestFit: def test_should_raise_if_column_not_found(self) -> None: table = Table( diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index 2815cc800..82be98d1a 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -59,10 +59,10 @@ class TestImageToColumnClassifier: @pytest.mark.parametrize( ("seed", "device", "layer_3_bias", "prediction_label"), [ - (1234, device_cuda, [0.5805488228797913, -0.32433584332466125, 0.026305729523301125, 0.5804171562194824], ["grayscale"] * 7), - (4711, device_cuda, [-0.8114063143730164, -0.9443492889404297, 0.8557132482528687, -0.8482506275177002], ["white_square"] * 7), - (1234, device_cpu, [-0.6926037669181824, 0.33001941442489624, -0.32963910698890686, 0.5768917202949524], ["grayscale"] * 7), - (4711, device_cpu, [-0.9051575660705566, -0.8625037670135498, 0.24682046473026276, -0.2612163722515106], ["white_square"] * 7), + (1234, device_cuda, [0.5805736780166626, -0.32432740926742554, 0.02629312314093113, 0.5803964138031006], ["grayscale"] * 7), + (4711, device_cuda, [-0.8114045262336731, -0.9443488717079163, 0.8557113409042358, -0.8482510447502136], ["white_square"] * 7), + (1234, device_cpu, [-0.69260174036026, 0.33002084493637085, -0.32964015007019043, 0.5768893957138062], ["grayscale"] * 7), + (4711, device_cpu, [-0.9051562547683716, -0.8625034093856812, 0.24682027101516724, -0.26121777296066284], ["white_square"] * 7), ], ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] ) @@ -74,7 +74,7 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) image_list = image_list.resize(20, 20) image_classes = Column("class", [re.search(r"(.*)[\\/](.*)\.", filename).group(2) for filename in filenames]) - image_dataset = ImageDataset(image_list, image_classes) + image_dataset = ImageDataset(image_list, image_classes, shuffle=True) layers = [ Convolutional2DLayer(1, 2), diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index ea8ac5ca2..b5641d82f 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -480,6 +480,7 @@ def callback_was_called(self) -> bool: (InputConversionTable([], ""), [FlattenLayer()], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionTable(), r"The defined model uses an input conversion for images but no output conversion for images."), (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionImageToImage(), r"The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data."), + (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ForwardLayer(1)], OutputConversionImageToImage(), r"The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data."), (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), Convolutional2DLayer(1, 1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), MaxPooling2DLayer(1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), From 4d07c6ad465bc6ff9769e1087a77c2ee44721fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Tue, 30 Apr 2024 00:37:19 +0200 Subject: [PATCH 16/42] refactor: ruff linter docs: added docstrings --- .../image/containers/_empty_image_list.py | 2 +- .../data/image/containers/_image_list.py | 4 +- .../containers/_multi_size_image_list.py | 2 +- .../containers/_single_size_image_list.py | 3 +- src/safeds/data/image/typing/_image_size.py | 26 ++++++- .../data/labeled/containers/_image_dataset.py | 67 +++++++++++++++++-- src/safeds/ml/nn/_convolutional2d_layer.py | 54 ++++++++++++--- src/safeds/ml/nn/_flatten_layer.py | 8 ++- src/safeds/ml/nn/_input_conversion.py | 8 +-- src/safeds/ml/nn/_input_conversion_image.py | 14 ++-- src/safeds/ml/nn/_layer.py | 4 +- src/safeds/ml/nn/_output_conversion_image.py | 11 ++- src/safeds/ml/nn/_output_conversion_table.py | 2 +- src/safeds/ml/nn/_pooling2d_layer.py | 45 ++++++++++++- .../ml/nn/test_input_conversion_image.py | 2 - 15 files changed, 204 insertions(+), 48 deletions(-) diff --git a/src/safeds/data/image/containers/_empty_image_list.py b/src/safeds/data/image/containers/_empty_image_list.py index 7e358c2d8..dff09549f 100644 --- a/src/safeds/data/image/containers/_empty_image_list.py +++ b/src/safeds/data/image/containers/_empty_image_list.py @@ -17,7 +17,6 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) -from safeds.data.image.typing import ImageSize from safeds.exceptions import IndexOutOfBoundsError if TYPE_CHECKING: @@ -26,6 +25,7 @@ from torch import Tensor from safeds.data.image.containers import Image + from safeds.data.image.typing import ImageSize class _EmptyImageList(ImageList): diff --git a/src/safeds/data/image/containers/_image_list.py b/src/safeds/data/image/containers/_image_list.py index 5c54bcee8..2fdded2fd 100644 --- a/src/safeds/data/image/containers/_image_list.py +++ b/src/safeds/data/image/containers/_image_list.py @@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, overload, Literal from safeds.data.image.containers._image import Image -from safeds.data.image.typing import ImageSize if TYPE_CHECKING: from collections.abc import Sequence @@ -17,6 +16,7 @@ from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList + from safeds.data.image.typing import ImageSize class ImageList(metaclass=ABCMeta): @@ -330,7 +330,7 @@ def channel(self) -> int: @abstractmethod def sizes(self) -> list[ImageSize]: """ - Return the sizes of all images + Return the sizes of all images. Returns ------- diff --git a/src/safeds/data/image/containers/_multi_size_image_list.py b/src/safeds/data/image/containers/_multi_size_image_list.py index 04ec4b5f4..f1bda860e 100644 --- a/src/safeds/data/image/containers/_multi_size_image_list.py +++ b/src/safeds/data/image/containers/_multi_size_image_list.py @@ -11,7 +11,6 @@ _check_blur_errors_and_warnings, _check_remove_images_with_size_errors, ) -from safeds.data.image.typing import ImageSize from safeds.exceptions import ( DuplicateIndexError, IllegalFormatError, @@ -24,6 +23,7 @@ from torch import Tensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList + from safeds.data.image.typing import ImageSize class _MultiSizeImageList(ImageList): diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index fea53d81a..54b5b29fc 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -132,8 +132,7 @@ def _get_batch(self, batch_number: int, batch_size: int | None = None) -> Tensor if batch_size * batch_number >= len(self): raise IndexOutOfBoundsError(batch_size * batch_number) max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self) else len(self) - input_tensor = self._tensor[[self._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]].to(torch.float32) / 255 - return input_tensor + return self._tensor[[self._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]].to(torch.float32) / 255 def clone(self) -> ImageList: cloned_image_list = self._clone_without_tensor() diff --git a/src/safeds/data/image/typing/_image_size.py b/src/safeds/data/image/typing/_image_size.py index 88bd084ac..d2ab52f71 100644 --- a/src/safeds/data/image/typing/_image_size.py +++ b/src/safeds/data/image/typing/_image_size.py @@ -12,7 +12,7 @@ class ImageSize: """ - A container for image size data + A container for image size data. Parameters ---------- @@ -59,12 +59,36 @@ def __sizeof__(self): @property def width(self) -> int: + """ + Get the width of this `ImageSize` in pixels. + + Returns + ------- + width: + The width of this `ImageSize`. + """ return self._width @property def height(self) -> int: + """ + Get the height of this `ImageSize` in pixels. + + Returns + ------- + height: + The height of this `ImageSize`. + """ return self._height @property def channel(self) -> int: + """ + Get the channel of this `ImageSize` in pixels. + + Returns + ------- + channel: + The channel of this `ImageSize`. + """ return self._channel diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 6fb1ecc82..1470959e7 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -20,6 +20,20 @@ class ImageDataset(Generic[T]): + """ + A Dataset for ImageLists as input and ImageLists, Tables or Columns as output. + + Parameters + ---------- + input_data: + the input ImageList + output_data: + the output data + batch_size: + the batch size used for training + shuffle: + weather the data should be shuffled after each epoch of training + """ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle=False) -> None: import torch @@ -30,9 +44,9 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle= self._next_batch_index = 0 if isinstance(input_data, _MultiSizeImageList): - raise ValueError("The given input ImageList contains images of different sizes.") + raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 elif isinstance(input_data, _EmptyImageList): - raise ValueError("The given input ImageList contains no images.") + raise ValueError("The given input ImageList contains no images.") # noqa: TRY004 else: self._input_size = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input = input_data @@ -59,7 +73,7 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle= _output = output_data.clone()._as_single_size_image_list() self._output_size = ImageSize(output_data.widths[0], output_data.heights[0], output_data.channel) else: - raise ValueError("The given output ImageList contains images of different sizes.") + raise ValueError("The given output ImageList contains images of different sizes.") # noqa: TRY004 self._output = _output def __iter__(self) -> ImageDataset: @@ -81,16 +95,48 @@ def __len__(self) -> int: @property def input_size(self) -> ImageSize: + """ + Get the input `ImageSize` of this dataset. + + Returns + ------- + input_size: + the input `ImageSize` + """ return self._input_size @property def output_size(self) -> ImageSize | int: + """ + Get the output size of this dataset. + + Returns + ------- + output_size: + the output size + """ return self._output_size def get_input(self) -> ImageList: + """ + Get the input data of this dataset. + + Returns + ------- + input: + the input data of this dataset + """ return self._input def get_output(self) -> T: + """ + Get the output data of this dataset. + + Returns + ------- + output: + the output data of this dataset + """ output = self._output if isinstance(output, _TableAsTensor): return output._to_table() @@ -119,6 +165,16 @@ def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[ return input_tensor, output_tensor def shuffle(self) -> ImageDataset[T]: + """ + Return a new `ImageDataset` with shuffled data. + + The original dataset list is not modified. + + Returns + ------- + image_dataset: + the shuffled `ImageDataset` + """ import torch im_dataset: ImageDataset[T] = copy.copy(self) im_dataset._shuffle_tensor_indices = torch.randperm(len(self)) @@ -149,8 +205,7 @@ def _from_tensor(tensor: Tensor, column_names: list[str]) -> _TableAsTensor: return table_as_tensor def _to_table(self) -> Table: - table = Table(dict(zip(self._column_names, self._tensor.T.tolist()))) - return table + return Table(dict(zip(self._column_names, self._tensor.T.tolist()))) class _ColumnAsTensor: @@ -168,7 +223,7 @@ def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncode if tensor.dim() != 2: raise ValueError(f"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got {tensor.dim()}.") if not one_hot_encoder.is_fitted(): - raise TransformerNotFittedError() + raise TransformerNotFittedError if tensor.size(dim=1) != len(one_hot_encoder.get_names_of_added_columns()): raise ValueError(f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={len(one_hot_encoder.get_names_of_added_columns())}).") table_as_tensor = _ColumnAsTensor.__new__(_ColumnAsTensor) diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 6f8382bb9..377fa39cb 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -39,14 +39,27 @@ class Convolutional2DLayer(_Layer): def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0): """ Create a Convolutional 2D Layer. + + Parameters + ---------- + output_channel: + the amount of output channels + kernel_size: + the size of the kernel + stride: + the stride of the convolution + padding: + the padding of the convolution """ self._output_channel = output_channel self._kernel_size = kernel_size self._stride = stride self._padding = padding + self._output_size = None + self._input_size = None def _get_internal_layer(self, *, activation_function: Literal["sigmoid", "relu", "softmax"]) -> nn.Module: - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, False) + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=False) @property def input_size(self) -> ImageSize: @@ -70,27 +83,46 @@ def output_size(self) -> ImageSize: result: The Number of Neurons in this layer. """ + if self._output_size is None and self._output_size is not None: + new_width = math.ceil((self._input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + new_height = math.ceil((self._input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) return self._output_size def _set_input_size(self, input_size: ImageSize) -> None: self._input_size = input_size - new_width = math.ceil((input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) - new_height = math.ceil((input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) - self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) + self._output_size = None class ConvolutionalTranspose2DLayer(Convolutional2DLayer): def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0, output_padding: int = 0): + """ + Create a Convolutional Transpose 2D Layer. + + Parameters + ---------- + output_channel: + the amount of output channels + kernel_size: + the size of the kernel + stride: + the stride of the transposed convolution + padding: + the padding of the transposed convolution + output_padding: + the output padding of the transposed convolution + """ super().__init__(output_channel, kernel_size, stride=stride, padding=padding) self._output_padding = output_padding def _get_internal_layer(self, *, activation_function: Literal["sigmoid", "relu", "softmax"]) -> nn.Module: - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, True, self._output_padding) - - def _set_input_size(self, input_size: ImageSize) -> None: - self._input_size = input_size - new_width = (input_size.width - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding - new_height = (input_size.height - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding - self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=True, output_padding=self._output_padding) + @property + def output_size(self) -> ImageSize: + if self._output_size is None and self._output_size is not None: + new_width = (self.input_size.width - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding + new_height = (self.input_size.height - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding + self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) + return self._output_size diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py index 5a9ca7f2b..fab851d29 100644 --- a/src/safeds/ml/nn/_flatten_layer.py +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -2,11 +2,12 @@ from typing import TYPE_CHECKING -from safeds.data.image.typing import ImageSize if TYPE_CHECKING: from torch import Tensor, nn + from safeds.data.image.typing import ImageSize + from safeds.ml.nn._layer import _Layer @@ -55,7 +56,10 @@ def output_size(self) -> int: result : The Number of Neurons in this layer. """ - return self._input_size.width * self._input_size.height * self._input_size.channel if self._input_size is not None else None + if self._output_size is None and self._input_size is not None: + self._output_size = self._input_size.width * self._input_size.height * self._input_size.channel + return self._output_size def _set_input_size(self, input_size: ImageSize) -> None: self._input_size = input_size + self._output_size = None diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 4807d38a0..da1fb6afb 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -3,17 +3,17 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar -from safeds.data.image.typing import ImageSize - if TYPE_CHECKING: from torch.utils.data import DataLoader + from safeds.data.image.typing import ImageSize + from safeds.data.tabular.containers import Table, TaggedTable, TimeSeries from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset -FT = TypeVar("FT", TaggedTable, TimeSeries) -PT = TypeVar("PT", Table, TimeSeries) +FT = TypeVar("FT", TaggedTable, TimeSeries, ImageDataset) +PT = TypeVar("PT", Table, TimeSeries, ImageList) class _InputConversion(Generic[FT, PT], ABC): diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 911f42cbc..82f9f48d8 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,11 +1,15 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset from safeds.data.labeled.containers._image_dataset import _ColumnAsTensor, _TableAsTensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList -from safeds.data.image.typing import ImageSize -from safeds.data.tabular.transformation import OneHotEncoder + +if TYPE_CHECKING: + from safeds.data.image.typing import ImageSize + from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn._input_conversion import _InputConversion @@ -19,6 +23,8 @@ def __init__(self, image_size: ImageSize) -> None: Parameters ---------- + image_size: + the size of the input images """ self._input_size = image_size self._output_size = None @@ -31,10 +37,10 @@ def __init__(self, image_size: ImageSize) -> None: def _data_size(self) -> ImageSize: return self._input_size - def _data_conversion_fit(self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1) -> ImageDataset: + def _data_conversion_fit(self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1) -> ImageDataset: # noqa: ARG002 return input_data - def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> ImageList: + def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> ImageList: # noqa: ARG002 return input_data def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index 22a179b08..58ed76955 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -3,11 +3,11 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -from safeds.data.image.typing import ImageSize - if TYPE_CHECKING: from torch import nn + from safeds.data.image.typing import ImageSize + class _Layer(ABC): @abstractmethod diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 879b23661..69c149a67 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -8,11 +8,12 @@ from safeds.data.labeled.containers._image_dataset import _TableAsTensor, _ColumnAsTensor from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.containers import Table, Column -from safeds.data.tabular.transformation import OneHotEncoder if TYPE_CHECKING: from torch import Tensor, LongTensor + from safeds.data.tabular.transformation import OneHotEncoder + from safeds.ml.nn._output_conversion import _OutputConversion T = TypeVar("T", Column, Table, ImageList) @@ -32,9 +33,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column import torch if not isinstance(input_data, _SingleSizeImageList): - raise ValueError("The given input ImageList contains images of different sizes.") - - print(output_data) + raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 output = torch.zeros(len(input_data), len(one_hot_encoder.get_names_of_added_columns())) output[torch.arange(len(input_data)), output_data] = 1 @@ -56,7 +55,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column import torch if not isinstance(input_data, _SingleSizeImageList): - raise ValueError("The given input ImageList contains images of different sizes.") + raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 output = torch.zeros(len(input_data), len(column_names)) output[torch.arange(len(input_data)), output_data] = 1 @@ -78,7 +77,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageD import torch if not isinstance(input_data, _SingleSizeImageList): - raise ValueError("The given input ImageList contains images of different sizes.") + raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 return ImageDataset[ImageList](input_data, _SingleSizeImageList._create_from_tensor((output_data * 255).to(torch.uint8), list( range(output_data.size(dim=0))))) diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index a23580276..d7b3a5622 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -23,7 +23,7 @@ def __init__(self, prediction_name: str = "prediction") -> None: """ self._prediction_name = prediction_name - def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs) -> TaggedTable: + def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs) -> TaggedTable: # noqa: ARG002 return input_data.add_column(Column(self._prediction_name, output_data.tolist())).tag_columns( self._prediction_name, ) diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 6eb81aa03..305d41312 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -33,11 +33,24 @@ class _Pooling2DLayer(_Layer): def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: int = -1, padding: int = 0): """ Create a Pooling 2D Layer. + + Parameters + ---------- + strategy: + the strategy of the pooling + kernel_size: + the size of the kernel + stride: + the stride of the pooling + padding: + the padding of the pooling """ self._strategy = strategy self._kernel_size = kernel_size self._stride = stride if stride != -1 else kernel_size self._padding = padding + self._output_size = None + self._input_size = None def _get_internal_layer(self) -> nn.Module: return _create_internal_model(self._strategy, self._kernel_size, self._padding, self._stride) @@ -64,22 +77,48 @@ def output_size(self) -> ImageSize: result: The Number of Neurons in this layer. """ + if self._output_size is None and self._input_size is not None: + new_width = math.ceil((self.input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + new_height = math.ceil((self.input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + self._output_size = ImageSize(new_width, new_height, self._input_size.channel, _ignore_invalid_channel=True) return self._output_size def _set_input_size(self, input_size: ImageSize) -> None: self._input_size = input_size - new_width = math.ceil((input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) - new_height = math.ceil((input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) - self._output_size = ImageSize(new_width, new_height, self._input_size.channel, _ignore_invalid_channel=True) + self._output_size = None class MaxPooling2DLayer(_Pooling2DLayer): def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: + """ + Create a maximum Pooling 2D Layer. + + Parameters + ---------- + kernel_size: + the size of the kernel + stride: + the stride of the pooling + padding: + the padding of the pooling + """ super().__init__("max", kernel_size, stride=stride, padding=padding) class AvgPooling2DLayer(_Pooling2DLayer): def __init__(self, kernel_size: int, *, stride: int = -1, padding: int = 0) -> None: + """ + Create a average Pooling 2D Layer. + + Parameters + ---------- + kernel_size: + the size of the kernel + stride: + the stride of the pooling + padding: + the padding of the pooling + """ super().__init__("avg", kernel_size, stride=stride, padding=padding) diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py index e34d45d73..00b41fd01 100644 --- a/tests/safeds/ml/nn/test_input_conversion_image.py +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -15,8 +15,6 @@ class TestIsFitDataValid: ("image_dataset_valid", "image_dataset_invalid"), [ (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, _test_image_list)), - (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, _test_image_list)), - (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list, _test_image_list)), (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list, _test_image_list)), (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, Column("images", images_all()))), (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}))), From 99e0d546ed473c21d711ce0168b205a99f83e136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Tue, 30 Apr 2024 02:19:19 +0200 Subject: [PATCH 17/42] refactor: mypy linter --- src/safeds/data/image/containers/_image.py | 6 +-- src/safeds/data/image/typing/_image_size.py | 20 +++++++++- .../data/labeled/containers/_image_dataset.py | 20 +++++----- src/safeds/exceptions/_ml.py | 5 ++- src/safeds/ml/nn/_convolutional2d_layer.py | 38 +++++++++++++++---- src/safeds/ml/nn/_flatten_layer.py | 31 +++++++++++---- src/safeds/ml/nn/_forward_layer.py | 10 +++-- src/safeds/ml/nn/_input_conversion.py | 4 +- src/safeds/ml/nn/_input_conversion_image.py | 10 ++--- src/safeds/ml/nn/_layer.py | 4 +- src/safeds/ml/nn/_model.py | 17 ++++++--- src/safeds/ml/nn/_output_conversion.py | 4 +- src/safeds/ml/nn/_output_conversion_image.py | 10 ++--- src/safeds/ml/nn/_output_conversion_table.py | 4 +- src/safeds/ml/nn/_pooling2d_layer.py | 28 +++++++++++--- .../data/image/typing/test_image_size.py | 12 +++++- .../labeled/containers/test_image_dataset.py | 7 +++- .../transformation/test_one_hot_encoder.py | 6 +-- tests/safeds/ml/nn/test_cnn_workflow.py | 27 +++++++++---- .../ml/nn/test_convolutional2d_layer.py | 36 ++++++++++++++++++ tests/safeds/ml/nn/test_flatten_layer.py | 15 +++++++- tests/safeds/ml/nn/test_forward_layer.py | 7 ++++ .../ml/nn/test_input_conversion_image.py | 2 +- .../ml/nn/test_output_conversion_image.py | 2 +- tests/safeds/ml/nn/test_pooling2d_layer.py | 28 ++++++++++++++ 25 files changed, 274 insertions(+), 79 deletions(-) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index c75198887..82990bc45 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -25,7 +25,7 @@ from torch import Tensor from torch.types import Device - from numpy import ndarray + from numpy import ndarray, dtype class Image: @@ -167,7 +167,7 @@ def __sizeof__(self) -> int: """ return sys.getsizeof(self._image_tensor) + self._image_tensor.element_size() * self._image_tensor.nelement() - def __array__(self, dtype=None) -> ndarray: + def __array__(self, numpy_dtype: str | dtype = None) -> ndarray: """ Return the image as a numpy array. @@ -178,7 +178,7 @@ def __array__(self, dtype=None) -> ndarray: """ from numpy import uint8 - return self._image_tensor.permute(1, 2, 0).detach().cpu().numpy().astype(uint8 if dtype is None else dtype) + return self._image_tensor.permute(1, 2, 0).detach().cpu().numpy().astype(uint8 if numpy_dtype is None else numpy_dtype) def _repr_jpeg_(self) -> bytes | None: """ diff --git a/src/safeds/data/image/typing/_image_size.py b/src/safeds/data/image/typing/_image_size.py index d2ab52f71..049be09aa 100644 --- a/src/safeds/data/image/typing/_image_size.py +++ b/src/safeds/data/image/typing/_image_size.py @@ -44,6 +44,19 @@ def __init__(self, width: int, height: int, channel: int, *, _ignore_invalid_cha @staticmethod def from_image(image: Image) -> ImageSize: + """ + Create a `ImageSize` of a given image. + + Parameters + ---------- + image: + the given image for the `ImageSize` + + Returns + ------- + image_size: + the calculated `ImageSize` + """ return ImageSize(image.width, image.height, image.channel) def __eq__(self, other: object) -> bool: @@ -51,12 +64,15 @@ def __eq__(self, other: object) -> bool: return NotImplemented return self._width == other._width and self._height == other._height and self._channel == other._channel - def __hash__(self): + def __hash__(self) -> int: return _structural_hash(self._width, self._height, self._channel) - def __sizeof__(self): + def __sizeof__(self) -> int: return sys.getsizeof(self._width) + sys.getsizeof(self._height) + sys.getsizeof(self._channel) + def __str__(self): + return f"{self._width}x{self._height}x{self._channel} (WxHxC)" + @property def width(self) -> int: """ diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 1470959e7..ce831443c 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -35,7 +35,7 @@ class ImageDataset(Generic[T]): weather the data should be shuffled after each epoch of training """ - def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle=False) -> None: + def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, shuffle: bool = False) -> None: import torch self._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data)))) @@ -49,7 +49,7 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle= raise ValueError("The given input ImageList contains no images.") # noqa: TRY004 else: self._input_size = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) - self._input = input_data + self._input: _SingleSizeImageList = input_data._as_single_size_image_list() if ((isinstance(output_data, Table) or isinstance(output_data, Column)) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): raise OutputLengthMismatchError(f"{len(input_data)} != {output_data.number_of_rows if isinstance(output_data, Table) else len(output_data)}") if isinstance(output_data, Table): @@ -64,17 +64,19 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle= raise NonNumericColumnError(f"Columns {non_numerical_columns} are not numerical.") if len(wrong_interval_columns) > 0: raise ValueError(f"Columns {wrong_interval_columns} have values outside of the interval [0, 1].") - _output = _TableAsTensor(output_data) - self._output_size = output_data.number_of_columns + _output: _TableAsTensor | _ColumnAsTensor | _SingleSizeImageList = _TableAsTensor(output_data) + _output_size: int | ImageSize = output_data.number_of_columns elif isinstance(output_data, Column): - _output = _ColumnAsTensor(output_data) - self._output_size = len(_output._one_hot_encoder.get_names_of_added_columns()) + _column_as_tensor = _ColumnAsTensor(output_data) + _output_size = len(_column_as_tensor._one_hot_encoder.get_names_of_added_columns()) + _output = _column_as_tensor elif isinstance(output_data, _SingleSizeImageList): _output = output_data.clone()._as_single_size_image_list() - self._output_size = ImageSize(output_data.widths[0], output_data.heights[0], output_data.channel) + _output_size = ImageSize(output_data.widths[0], output_data.heights[0], output_data.channel) else: raise ValueError("The given output ImageList contains images of different sizes.") # noqa: TRY004 self._output = _output + self._output_size = _output_size def __iter__(self) -> ImageDataset: if self._shuffle_after_epoch: @@ -138,9 +140,9 @@ def get_output(self) -> T: the output data of this dataset """ output = self._output - if isinstance(output, _TableAsTensor): + if self.__orig_class__.__args__[0] == _TableAsTensor: return output._to_table() - elif isinstance(output, _ColumnAsTensor): + elif self.__orig_class__.__args__[0] == _ColumnAsTensor: return output._to_column() else: return output diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 28cad77a8..151ab7d4d 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -1,3 +1,6 @@ +from safeds.data.image.typing import ImageSize + + class DatasetContainsTargetError(ValueError): """ Raised when a dataset contains the target column already. @@ -87,7 +90,7 @@ def __init__(self) -> None: class InputSizeError(Exception): """Raised when the amount of features being passed to a network does not match with its input size.""" - def __init__(self, table_size: int, input_layer_size: int) -> None: + def __init__(self, table_size: int, input_layer_size: int | ImageSize) -> None: super().__init__( f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.", ) diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 377fa39cb..2127540a3 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, Unpack, Any from safeds.data.image.typing import ImageSize @@ -55,10 +55,12 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa self._kernel_size = kernel_size self._stride = stride self._padding = padding - self._output_size = None - self._input_size = None + self._input_size: ImageSize | None = None + self._output_size: ImageSize | None = None - def _get_internal_layer(self, *, activation_function: Literal["sigmoid", "relu", "softmax"]) -> nn.Module: + def _get_internal_layer(self, activation_function: Literal["sigmoid", "relu", "softmax"], **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 + if self._input_size is None: + raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=False) @property @@ -70,7 +72,14 @@ def input_size(self) -> ImageSize: ------- result: The amount of values being passed into this layer. + + Raises + ------ + ValueError + If the input_size is not yet set """ + if self._input_size is None: + raise ValueError("The input_size is not yet set.") return self._input_size @property @@ -82,14 +91,23 @@ def output_size(self) -> ImageSize: ------- result: The Number of Neurons in this layer. + + Raises + ------ + ValueError + If the input_size is not yet set """ - if self._output_size is None and self._output_size is not None: + if self._input_size is None: + raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + if self._output_size is None: new_width = math.ceil((self._input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) new_height = math.ceil((self._input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) return self._output_size - def _set_input_size(self, input_size: ImageSize) -> None: + def _set_input_size(self, input_size: int | ImageSize) -> None: + if isinstance(input_size, int): + raise ValueError("The input_size of a convolution layer has to be of type ImageSize.") self._input_size = input_size self._output_size = None @@ -116,12 +134,16 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa super().__init__(output_channel, kernel_size, stride=stride, padding=padding) self._output_padding = output_padding - def _get_internal_layer(self, *, activation_function: Literal["sigmoid", "relu", "softmax"]) -> nn.Module: + def _get_internal_layer(self, activation_function: Literal["sigmoid", "relu", "softmax"], **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 + if self._input_size is None: + raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=True, output_padding=self._output_padding) @property def output_size(self) -> ImageSize: - if self._output_size is None and self._output_size is not None: + if self._input_size is None: + raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + if self._output_size is None: new_width = (self.input_size.width - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding new_height = (self.input_size.height - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py index fab851d29..89cf3c2d0 100644 --- a/src/safeds/ml/nn/_flatten_layer.py +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -1,7 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING - +from typing import TYPE_CHECKING, Unpack, Any if TYPE_CHECKING: from torch import Tensor, nn @@ -15,7 +14,7 @@ def _create_internal_model() -> nn.Module: from torch import nn class _InternalLayer(nn.Module): - def __init__(self): + def __init__(self) -> None: super().__init__() self._layer = nn.Flatten() @@ -26,12 +25,12 @@ def forward(self, x: Tensor) -> Tensor: class FlattenLayer(_Layer): - def __init__(self): + def __init__(self) -> None: """Create a Flatten Layer.""" self._input_size: ImageSize | None = None - self._output_size: ImageSize | None = None + self._output_size: int | None = None - def _get_internal_layer(self) -> nn.Module: + def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 return _create_internal_model() @property @@ -43,7 +42,14 @@ def input_size(self) -> ImageSize: ------- result : The amount of values being passed into this layer. + + Raises + ------ + ValueError + If the input_size is not yet set """ + if self._input_size is None: + raise ValueError("The input_size is not yet set.") return self._input_size @property @@ -55,11 +61,20 @@ def output_size(self) -> int: ------- result : The Number of Neurons in this layer. + + Raises + ------ + ValueError + If the input_size is not yet set """ - if self._output_size is None and self._input_size is not None: + if self._input_size is None: + raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + if self._output_size is None: self._output_size = self._input_size.width * self._input_size.height * self._input_size.channel return self._output_size - def _set_input_size(self, input_size: ImageSize) -> None: + def _set_input_size(self, input_size: int | ImageSize) -> None: + if isinstance(input_size, int): + raise ValueError("The input_size of a flatten layer has to be of type ImageSize.") self._input_size = input_size self._output_size = None diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 632ac2dc0..6d8ead829 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,6 +1,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Unpack, Any + +from safeds.data.image.typing import ImageSize if TYPE_CHECKING: from torch import Tensor, nn @@ -60,7 +62,7 @@ def __init__(self, output_size: int, input_size: int | None = None): raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) self._output_size = output_size - def _get_internal_layer(self, *, activation_function: str) -> nn.Module: + def _get_internal_layer(self, activation_function: str, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 return _create_internal_model(self._input_size, self._output_size, activation_function) @property @@ -87,7 +89,9 @@ def output_size(self) -> int: """ return self._output_size - def _set_input_size(self, input_size: int) -> None: + def _set_input_size(self, input_size: int | ImageSize) -> None: + if isinstance(input_size, ImageSize): + raise ValueError("The input_size of a forward layer has to be of type int.") if input_size < 1: raise OutOfBoundsError(actual=input_size, name="input_size", lower_bound=ClosedBound(1)) self._input_size = input_size diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index da1fb6afb..a01f6c5b3 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList + if TYPE_CHECKING: from torch.utils.data import DataLoader @@ -29,7 +31,7 @@ def _data_conversion_fit(self, input_data: FT, batch_size: int, num_of_classes: pass # pragma: no cover @abstractmethod - def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader | ImageList: + def _data_conversion_predict(self, input_data: PT, batch_size: int) -> DataLoader | _SingleSizeImageList: pass # pragma: no cover @abstractmethod diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 82f9f48d8..96c59eab9 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Type from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset @@ -27,11 +27,11 @@ def __init__(self, image_size: ImageSize) -> None: the size of the input images """ self._input_size = image_size - self._output_size = None + self._output_size: ImageSize | int | None = None self._one_hot_encoder: OneHotEncoder | None = None self._column_name: str | None = None self._column_names: list[str] | None = None - self._output_type = None + self._output_type: Type | None = None @property def _data_size(self) -> ImageSize: @@ -40,8 +40,8 @@ def _data_size(self) -> ImageSize: def _data_conversion_fit(self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1) -> ImageDataset: # noqa: ARG002 return input_data - def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> ImageList: # noqa: ARG002 - return input_data + def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> _SingleSizeImageList: # noqa: ARG002 + return input_data._as_single_size_image_list() def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: if self._output_type is None: diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index 58ed76955..567663fd1 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Unpack, Any if TYPE_CHECKING: from torch import nn @@ -15,7 +15,7 @@ def __init__(self) -> None: pass # pragma: no cover @abstractmethod - def _get_internal_layer(self, **kwargs) -> nn.Module: + def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: pass # pragma: no cover @property diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 32cca4a39..41943f8ed 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -28,6 +28,10 @@ from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion + from safeds.data.tabular.transformation import OneHotEncoder + + from safeds.data.image.typing import ImageSize + IFT = TypeVar("IFT", TaggedTable, TimeSeries, ImageDataset) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType OT = TypeVar("OT", TaggedTable, TimeSeries, ImageDataset) # OutputType @@ -288,7 +292,7 @@ def __init__( self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False - self._num_of_classes = layers[-1].output_size + self._num_of_classes = int(layers[-1].output_size) self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 @@ -426,9 +430,12 @@ def predict(self, test_data: IPT) -> OT: else: predictions.append(elem.squeeze(dim=1).round()) if isinstance(self._output_conversion, OutputConversionImageToTable) and isinstance(self._input_conversion, InputConversionImage): - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_names=self._input_conversion._column_names) + _column_names: list[str] = self._input_conversion._column_names + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_names=_column_names) if isinstance(self._output_conversion, OutputConversionImageToColumn) and isinstance(self._input_conversion, InputConversionImage): - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_name=self._input_conversion._column_name, one_hot_encoder=self._input_conversion._one_hot_encoder) + _column_name: str = self._input_conversion._column_name + _one_hot_encoder: OneHotEncoder = self._input_conversion._one_hot_encoder + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_name=_column_name, one_hot_encoder=_one_hot_encoder) return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) @property @@ -468,14 +475,14 @@ def __init__(self, layers: list[_Layer], is_for_classification: bool) -> None: if is_for_classification: internal_layers.pop() - if layers[-1].output_size > 2: + if int(layers[-1].output_size) > 2: internal_layers.append(layers[-1]._get_internal_layer(activation_function="none")) else: internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) self._pytorch_layers = nn.Sequential(*internal_layers) @property - def input_size(self) -> int: + def input_size(self) -> int | ImageSize: return self._layer_list[0].input_size def forward(self, x: Tensor) -> Tensor: diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 2575b5faa..032fb610f 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar +from typing import TYPE_CHECKING, Generic, TypeVar, Unpack, Any from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset @@ -19,5 +19,5 @@ class _OutputConversion(Generic[IT, OT], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: IT, output_data: Tensor, **kwargs) -> OT: + def _data_conversion(self, input_data: IT, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> OT: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 69c149a67..5604e7398 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, TypeVar +from typing import TYPE_CHECKING, TypeVar, Unpack, Any from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset @@ -23,13 +23,13 @@ class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset[T]], ABC) """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, **kwargs) -> ImageDataset[T]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[T]: pass # pragma: no cover class OutputConversionImageToColumn(_OutputConversionImage[Column]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column_name: str, one_hot_encoder: OneHotEncoder) -> ImageDataset[Column]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, column_name: str, one_hot_encoder: OneHotEncoder, **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[Column]: import torch if not isinstance(input_data, _SingleSizeImageList): @@ -51,7 +51,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column class OutputConversionImageToTable(_OutputConversionImage[Table]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column_names: list[str]) -> ImageDataset[Table]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, column_names: list[str], **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[Table]: import torch if not isinstance(input_data, _SingleSizeImageList): @@ -73,7 +73,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, *, column class OutputConversionImageToImage(_OutputConversionImage[ImageList]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor) -> ImageDataset[ImageList]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[ImageList]: import torch if not isinstance(input_data, _SingleSizeImageList): diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index d7b3a5622..9d3e0a5f0 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Unpack, Any if TYPE_CHECKING: from torch import Tensor @@ -23,7 +23,7 @@ def __init__(self, prediction_name: str = "prediction") -> None: """ self._prediction_name = prediction_name - def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs) -> TaggedTable: # noqa: ARG002 + def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> TaggedTable: # noqa: ARG002 return input_data.add_column(Column(self._prediction_name, output_data.tolist())).tag_columns( self._prediction_name, ) diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 305d41312..edd233dfa 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, Unpack, Any from safeds.data.image.typing import ImageSize @@ -49,10 +49,10 @@ def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: self._kernel_size = kernel_size self._stride = stride if stride != -1 else kernel_size self._padding = padding - self._output_size = None - self._input_size = None + self._input_size: ImageSize | None = None + self._output_size: ImageSize | None = None - def _get_internal_layer(self) -> nn.Module: + def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 return _create_internal_model(self._strategy, self._kernel_size, self._padding, self._stride) @property @@ -64,7 +64,14 @@ def input_size(self) -> ImageSize: ------- result: The amount of values being passed into this layer. + + Raises + ------ + ValueError + If the input_size is not yet set """ + if self._input_size is None: + raise ValueError("The input_size is not yet set.") return self._input_size @property @@ -76,14 +83,23 @@ def output_size(self) -> ImageSize: ------- result: The Number of Neurons in this layer. + + Raises + ------ + ValueError + If the input_size is not yet set """ - if self._output_size is None and self._input_size is not None: + if self._input_size is None: + raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + if self._output_size is None: new_width = math.ceil((self.input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) new_height = math.ceil((self.input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) self._output_size = ImageSize(new_width, new_height, self._input_size.channel, _ignore_invalid_channel=True) return self._output_size - def _set_input_size(self, input_size: ImageSize) -> None: + def _set_input_size(self, input_size: int | ImageSize) -> None: + if isinstance(input_size, int): + raise ValueError("The input_size of a pooling layer has to be of type ImageSize.") self._input_size = input_size self._output_size = None diff --git a/tests/safeds/data/image/typing/test_image_size.py b/tests/safeds/data/image/typing/test_image_size.py index 273f21176..c980ea3e6 100644 --- a/tests/safeds/data/image/typing/test_image_size.py +++ b/tests/safeds/data/image/typing/test_image_size.py @@ -88,6 +88,16 @@ def test_should_size_be_greater_than_normal_object(self, image_size: ImageSize) assert sys.getsizeof(image_size) >= sys.getsizeof(0) * 3 +class TestStr: + + @pytest.mark.parametrize( + "image_size", + [ImageSize(1, 2, 3)] + ) + def test_should_size_be_greater_than_normal_object(self, image_size: ImageSize) -> None: + assert str(image_size) == f"{image_size.width}x{image_size.height}x{image_size.channel} (WxHxC)" + + class TestProperties: @pytest.mark.parametrize( @@ -102,7 +112,7 @@ class TestProperties: "channel", [1, 3, 4] ) - def test_width_height_channel(self, width: int, height: int, channel: int): + def test_width_height_channel(self, width: int, height: int, channel: int) -> None: image_size = ImageSize(width, height, channel) assert image_size.width == width assert image_size.height == height diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index 5af711fc4..b3a1d6d2f 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -1,5 +1,5 @@ import math -from typing import Type +from typing import Type, TypeVar import pytest import torch @@ -17,6 +17,9 @@ from tests.helpers import resolve_resource_path, plane_png_path, white_square_png_path, images_all +T = TypeVar("T", Column, Table, ImageList) + + class TestImageDatasetInit: @pytest.mark.parametrize( @@ -34,7 +37,7 @@ class TestImageDatasetInit: (ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"a": [-1]}), ValueError, r"Columns \['a'\] have values outside of the interval \[0, 1\]."), ] ) - def test_should_raise_with_invalid_data(self, input_data: ImageList, output_data: Column | Table | ImageList, error: Type[Exception], error_msg: str) -> None: + def test_should_raise_with_invalid_data(self, input_data: ImageList, output_data: T, error: Type[Exception], error_msg: str) -> None: with pytest.raises(error, match=error_msg): ImageDataset(input_data, output_data) diff --git a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py index 19eeaf0ab..872c5fc10 100644 --- a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py @@ -13,10 +13,10 @@ class TestEq: - def test_should_be_not_implemented(self): + def test_should_be_not_implemented(self) -> None: assert OneHotEncoder().__eq__(Table()) is NotImplemented - def test_should_be_equal(self): + def test_should_be_equal(self) -> None: table1 = Table({"a": ["a", "b", "c"], "b": ["a", "b", "c"]}) table2 = Table({"b": ["a", "b", "c"], "a": ["a", "b", "c"]}) assert OneHotEncoder().fit(table1, None) == OneHotEncoder().fit(table2, None) @@ -28,7 +28,7 @@ def test_should_be_equal(self): (Table({"a": ["a", "b", "c"], "b": ["a", "b", "c"]}), Table({"a": ["a", "b", "c"], "b": ["a", "b", "d"]})), ] ) - def test_should_be_not_equal(self, table1: Table, table2: Table): + def test_should_be_not_equal(self, table1: Table, table2: Table) -> None: assert OneHotEncoder().fit(table1, None) != OneHotEncoder().fit(table2, None) diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index 82be98d1a..425bfba70 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -12,6 +12,7 @@ from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ FlattenLayer, ForwardLayer, OutputConversionImageToTable, ConvolutionalTranspose2DLayer, NeuralNetworkRegressor, \ AvgPooling2DLayer +from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn, OutputConversionImageToImage from tests.helpers import resolve_resource_path, images_all, device_cuda, device_cpu, skip_if_device_not_available @@ -28,14 +29,19 @@ class TestImageToTableClassifier: ], ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] ) - def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device): + def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device) -> None: skip_if_device_not_available(device) torch.set_default_device(device) torch.manual_seed(seed) image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) image_list = image_list.resize(20, 20) - image_classes = Table({"class": [re.search(r"(.*)[\\/](.*)\.", filename).group(2) for filename in filenames]}) + classes = [] + for filename in filenames: + groups = re.search(r"(.*)[\\/](.*)\.", filename) + if groups is not None: + classes.append(groups.group(2)) + image_classes = Table({"class": classes}) one_hot_encoder = OneHotEncoder().fit(image_classes, ["class"]) image_classes_one_hot_encoded = one_hot_encoder.transform(image_classes) image_dataset = ImageDataset(image_list, image_classes_one_hot_encoded) @@ -43,7 +49,7 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa Convolutional2DLayer(1, 2), MaxPooling2DLayer(10), FlattenLayer(), - ForwardLayer(image_dataset.output_size) + ForwardLayer(int(image_dataset.output_size)) ] nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToTable()) @@ -66,21 +72,26 @@ class TestImageToColumnClassifier: ], ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] ) - def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device): + def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device) -> None: skip_if_device_not_available(device) torch.set_default_device(device) torch.manual_seed(seed) image_list, filenames = ImageList.from_files(resolve_resource_path(images_all()), return_filenames=True) image_list = image_list.resize(20, 20) - image_classes = Column("class", [re.search(r"(.*)[\\/](.*)\.", filename).group(2) for filename in filenames]) + classes = [] + for filename in filenames: + groups = re.search(r"(.*)[\\/](.*)\.", filename) + if groups is not None: + classes.append(groups.group(2)) + image_classes = Column("class", classes) image_dataset = ImageDataset(image_list, image_classes, shuffle=True) layers = [ Convolutional2DLayer(1, 2), AvgPooling2DLayer(10), FlattenLayer(), - ForwardLayer(image_dataset.output_size) + ForwardLayer(int(image_dataset.output_size)) ] nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToColumn()) @@ -103,7 +114,7 @@ class TestImageToImageRegressor: ], ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] ) - def test_should_train_and_predict_model(self, seed: int, snapshot_png_image_list: SnapshotAssertion, layer_3_bias: list[float], device: Device): + def test_should_train_and_predict_model(self, seed: int, snapshot_png_image_list: SnapshotAssertion, layer_3_bias: list[float], device: Device) -> None: skip_if_device_not_available(device) torch.set_default_device(device) torch.manual_seed(seed) @@ -113,7 +124,7 @@ def test_should_train_and_predict_model(self, seed: int, snapshot_png_image_list image_list_grayscale = image_list.convert_to_grayscale() image_dataset = ImageDataset(image_list, image_list_grayscale) - layers = [ + layers: list[_Layer] = [ Convolutional2DLayer(6, 2), Convolutional2DLayer(12, 2), ConvolutionalTranspose2DLayer(6, 2), diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index 69c880b06..f60cdc23d 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -33,3 +33,39 @@ def test_should_create_pooling_layer(self, activation_function: Literal["sigmoid modules = list(next(layer._get_internal_layer(activation_function=activation_function).modules()).children()) assert isinstance(modules[0], torch_layer) assert isinstance(modules[1], activation_layer) + + @pytest.mark.parametrize( + "activation_function", + [ + "sigmoid", + "relu", + "softmax", + ], + ) + @pytest.mark.parametrize( + ("conv_type", "torch_layer", "output_channel", "kernel_size", "stride", "padding", "out_channel", "out_width", "out_height"), + [ + (Convolutional2DLayer, nn.Conv2d, 30, 2, 2, 2, 30, 7, 12), + (ConvolutionalTranspose2DLayer, nn.ConvTranspose2d, 30, 2, 2, 2, 30, 16, 36), + ], + ) + def test_should_raise_if_input_size_not_set(self, activation_function: Literal["sigmoid", "relu", "softmax"], conv_type: Type[Convolutional2DLayer], torch_layer: Type[nn.Module], output_channel: int, kernel_size: int, stride: int, padding: int, out_channel: int, out_width: int, out_height: int) -> None: + layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) + with pytest.raises(ValueError, match=r"The input_size is not yet set."): + layer.input_size + with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): + layer.output_size + with pytest.raises(ValueError, match=r"The input_size is not yet set. The internal layer can only be created when the input_size is set."): + layer._get_internal_layer(activation_function) + + @pytest.mark.parametrize( + ("conv_type", "torch_layer", "output_channel", "kernel_size", "stride", "padding", "out_channel", "out_width", "out_height"), + [ + (Convolutional2DLayer, nn.Conv2d, 30, 2, 2, 2, 30, 7, 12), + (ConvolutionalTranspose2DLayer, nn.ConvTranspose2d, 30, 2, 2, 2, 30, 16, 36), + ], + ) + def test_should_raise_if_input_size_is_set_with_int(self, conv_type: Type[Convolutional2DLayer], torch_layer: Type[nn.Module], output_channel: int, kernel_size: int, stride: int, padding: int, out_channel: int, out_width: int, out_height: int) -> None: + layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) + with pytest.raises(ValueError, match=r"The input_size of a convolution layer has to be of type ImageSize."): + layer._set_input_size(1) diff --git a/tests/safeds/ml/nn/test_flatten_layer.py b/tests/safeds/ml/nn/test_flatten_layer.py index 11cf5ef39..b9334f9f5 100644 --- a/tests/safeds/ml/nn/test_flatten_layer.py +++ b/tests/safeds/ml/nn/test_flatten_layer.py @@ -1,3 +1,4 @@ +import pytest from torch import nn from safeds.data.image.typing import ImageSize @@ -6,10 +7,22 @@ class TestFlattenLayer: - def test_should_create_flatten_layer(self): + def test_should_create_flatten_layer(self) -> None: layer = FlattenLayer() input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) layer._set_input_size(input_size) assert layer.input_size == input_size assert layer.output_size == input_size.width * input_size.height * input_size.channel assert isinstance(next(next(layer._get_internal_layer().modules()).children()), nn.Flatten) + + def test_should_raise_if_input_size_not_set(self) -> None: + layer = FlattenLayer() + with pytest.raises(ValueError, match=r"The input_size is not yet set."): + layer.input_size + with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): + layer.output_size + + def test_should_raise_if_input_size_is_set_with_int(self) -> None: + layer = FlattenLayer() + with pytest.raises(ValueError, match=r"The input_size of a flatten layer has to be of type ImageSize."): + layer._set_input_size(1) diff --git a/tests/safeds/ml/nn/test_forward_layer.py b/tests/safeds/ml/nn/test_forward_layer.py index 63caadfbe..9f3cab493 100644 --- a/tests/safeds/ml/nn/test_forward_layer.py +++ b/tests/safeds/ml/nn/test_forward_layer.py @@ -4,6 +4,7 @@ import pytest from torch import nn +from safeds.data.image.typing import ImageSize from safeds.exceptions import OutOfBoundsError from safeds.ml.nn import ForwardLayer @@ -92,6 +93,12 @@ def test_should_raise_if_output_size_doesnt_match(output_size: int) -> None: assert ForwardLayer(output_size=output_size, input_size=1).output_size == output_size +def test_should_raise_if_input_size_is_set_with_image_size() -> None: + layer = ForwardLayer(1) + with pytest.raises(ValueError, match=r"The input_size of a forward layer has to be of type int."): + layer._set_input_size(ImageSize(1, 2, 3)) + + @pytest.mark.parametrize( ("layer1", "layer2", "equal"), [ diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py index 00b41fd01..33ca77e70 100644 --- a/tests/safeds/ml/nn/test_input_conversion_image.py +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -27,7 +27,7 @@ class TestIsFitDataValid: (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, _test_image_list.resize(20, 20))), ] ) - def test_should_return_false_if_fit_data_is_invalid(self, image_dataset_valid: ImageDataset, image_dataset_invalid: ImageDataset): + def test_should_return_false_if_fit_data_is_invalid(self, image_dataset_valid: ImageDataset, image_dataset_invalid: ImageDataset) -> None: input_conversion = InputConversionImage(image_dataset_valid.input_size) assert input_conversion._is_fit_data_valid(image_dataset_valid) assert input_conversion._is_fit_data_valid(image_dataset_valid) diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index 70d105f25..e88e1ce92 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -17,6 +17,6 @@ class TestDataConversionToColumn: (OutputConversionImageToImage(), {}), ] ) - def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversionImage, kwargs: dict): + def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversionImage, kwargs: dict) -> None: with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py index 08615799c..eed4a0ba2 100644 --- a/tests/safeds/ml/nn/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -19,7 +19,35 @@ class TestPooling2DLayer: def test_should_create_pooling_layer(self, strategy: Literal["max", "avg"], torch_layer: Type[nn.Module]) -> None: layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) + with pytest.raises(ValueError, match=r"The input_size of a pooling layer has to be of type ImageSize."): + layer._set_input_size(1) layer._set_input_size(input_size) assert layer.input_size == input_size assert layer.output_size == ImageSize(7, 12, 30, _ignore_invalid_channel=True) assert isinstance(next(next(layer._get_internal_layer().modules()).children()), torch_layer) + + @pytest.mark.parametrize( + "strategy", + [ + "max", + "avg", + ], + ) + def test_should_raise_if_input_size_not_set(self, strategy: Literal["max", "avg"]): + layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) + with pytest.raises(ValueError, match=r"The input_size is not yet set."): + layer.input_size + with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): + layer.output_size + + @pytest.mark.parametrize( + "strategy", + [ + "max", + "avg", + ], + ) + def test_should_raise_if_input_size_is_set_with_int(self, strategy: Literal["max", "avg"]): + layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) + with pytest.raises(ValueError, match=r"The input_size of a pooling layer has to be of type ImageSize."): + layer._set_input_size(1) From a43ecda1a66babe294e077959d24adcb63720c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 1 May 2024 22:22:19 +0200 Subject: [PATCH 18/42] refactor: finish merge --- src/safeds/data/labeled/containers/_image_dataset.py | 6 +++--- tests/safeds/ml/nn/test_model.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index ce831443c..5e8f15001 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -140,9 +140,9 @@ def get_output(self) -> T: the output data of this dataset """ output = self._output - if self.__orig_class__.__args__[0] == _TableAsTensor: + if isinstance(output, _TableAsTensor): return output._to_table() - elif self.__orig_class__.__args__[0] == _ColumnAsTensor: + elif isinstance(output, _ColumnAsTensor): return output._to_column() else: return output @@ -224,7 +224,7 @@ def __init__(self, column: Column) -> None: def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncoder) -> _ColumnAsTensor: if tensor.dim() != 2: raise ValueError(f"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got {tensor.dim()}.") - if not one_hot_encoder.is_fitted(): + if not one_hot_encoder.is_fitted: raise TransformerNotFittedError if tensor.size(dim=1) != len(one_hot_encoder.get_names_of_added_columns()): raise ValueError(f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={len(one_hot_encoder.get_names_of_added_columns())}).") diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index df67df0ca..5f4008846 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -2,7 +2,7 @@ from safeds.data.image.typing import ImageSize from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Table from safeds.exceptions import FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError, \ InvalidModelStructureError from safeds.ml.nn import ( From e4e5239d140e2c01b9ffab1bb7f2c8de62299745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Wed, 1 May 2024 23:47:39 +0200 Subject: [PATCH 19/42] refactor: linter --- src/safeds/data/image/typing/_image_size.py | 2 +- .../data/labeled/containers/_image_dataset.py | 21 ++++---- src/safeds/exceptions/_ml.py | 4 +- src/safeds/ml/nn/__init__.py | 4 +- src/safeds/ml/nn/_convolutional2d_layer.py | 20 ++++--- src/safeds/ml/nn/_flatten_layer.py | 6 +-- src/safeds/ml/nn/_forward_layer.py | 10 ++-- src/safeds/ml/nn/_input_conversion.py | 9 ++-- src/safeds/ml/nn/_input_conversion_image.py | 9 +++- src/safeds/ml/nn/_input_conversion_table.py | 5 +- src/safeds/ml/nn/_layer.py | 4 +- src/safeds/ml/nn/_model.py | 22 +++----- src/safeds/ml/nn/_output_conversion.py | 4 +- src/safeds/ml/nn/_output_conversion_image.py | 27 +++++++--- src/safeds/ml/nn/_output_conversion_table.py | 2 +- src/safeds/ml/nn/_pooling2d_layer.py | 6 +-- tests/safeds/ml/nn/test_cnn_workflow.py | 11 ++-- .../ml/nn/test_convolutional2d_layer.py | 52 ++++++++++++++----- tests/safeds/ml/nn/test_flatten_layer.py | 6 +-- tests/safeds/ml/nn/test_forward_layer.py | 8 ++- .../ml/nn/test_output_conversion_image.py | 25 +++++++-- tests/safeds/ml/nn/test_pooling2d_layer.py | 12 ++--- 22 files changed, 178 insertions(+), 91 deletions(-) diff --git a/src/safeds/data/image/typing/_image_size.py b/src/safeds/data/image/typing/_image_size.py index 049be09aa..fe0faee54 100644 --- a/src/safeds/data/image/typing/_image_size.py +++ b/src/safeds/data/image/typing/_image_size.py @@ -70,7 +70,7 @@ def __hash__(self) -> int: def __sizeof__(self) -> int: return sys.getsizeof(self._width) + sys.getsizeof(self._height) + sys.getsizeof(self._channel) - def __str__(self): + def __str__(self) -> str: return f"{self._width}x{self._height}x{self._channel} (WxHxC)" @property diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 5e8f15001..a7690d0df 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -38,20 +38,21 @@ class ImageDataset(Generic[T]): def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, shuffle: bool = False) -> None: import torch - self._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data)))) - self._shuffle_after_epoch = shuffle - self._batch_size = batch_size - self._next_batch_index = 0 + self._shuffle_tensor_indices: torch.LongTensor = torch.LongTensor(list(range(len(input_data)))) + self._shuffle_after_epoch: bool = shuffle + self._batch_size: int = batch_size + self._next_batch_index: int = 0 if isinstance(input_data, _MultiSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 elif isinstance(input_data, _EmptyImageList): raise ValueError("The given input ImageList contains no images.") # noqa: TRY004 else: - self._input_size = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) + self._input_size: ImageSize = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input: _SingleSizeImageList = input_data._as_single_size_image_list() if ((isinstance(output_data, Table) or isinstance(output_data, Column)) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): - raise OutputLengthMismatchError(f"{len(input_data)} != {output_data.number_of_rows if isinstance(output_data, Table) else len(output_data)}") + output_len = output_data.number_of_rows if isinstance(output_data, Table) else len(output_data) + raise OutputLengthMismatchError(f"{len(input_data)} != {output_len}") if isinstance(output_data, Table): non_numerical_columns = [] wrong_interval_columns = [] @@ -140,12 +141,14 @@ def get_output(self) -> T: the output data of this dataset """ output = self._output + safeds_output: T if isinstance(output, _TableAsTensor): - return output._to_table() + safeds_output = output._to_table() elif isinstance(output, _ColumnAsTensor): - return output._to_column() + safeds_output = output._to_column() else: - return output + safeds_output = output + return safeds_output def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 6ba38cbd3..64214effe 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -90,9 +90,9 @@ def __init__(self) -> None: class InputSizeError(Exception): """Raised when the amount of features being passed to a network does not match with its input size.""" - def __init__(self, table_size: int, input_layer_size: int | ImageSize) -> None: + def __init__(self, data_size: int | ImageSize, input_layer_size: int | ImageSize) -> None: super().__init__( - f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.", + f"The data size being passed to the network({data_size}) does not match with its input size({input_layer_size}). Consider changing the data size of the model or reformatting the data.", ) diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index e12d78681..769d7b350 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -13,7 +13,7 @@ from ._input_conversion_table import InputConversionTable from ._pooling2d_layer import MaxPooling2DLayer from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor - from ._output_conversion_image import OutputConversionImageToImage, OutputConversionImageToTable + from ._output_conversion_image import OutputConversionImageToImage, OutputConversionImageToTable, OutputConversionImageToColumn from ._output_conversion_table import OutputConversionTable apipkg.initpkg( @@ -29,6 +29,7 @@ "MaxPooling2DLayer": "._pooling2d_layer:MaxPooling2DLayer", "NeuralNetworkClassifier": "._model:NeuralNetworkClassifier", "NeuralNetworkRegressor": "._model:NeuralNetworkRegressor", + "OutputConversionImageToColumn": "._output_conversion_image:OutputConversionImageToColumn", "OutputConversionImageToImage": "._output_conversion_image:OutputConversionImageToImage", "OutputConversionImageToTable": "._output_conversion_image:OutputConversionImageToTable", "OutputConversionTable": "._output_conversion_table:OutputConversionTable", @@ -47,6 +48,7 @@ "MaxPooling2DLayer", "NeuralNetworkClassifier", "NeuralNetworkRegressor", + "OutputConversionImageToColumn", "OutputConversionImageToImage", "OutputConversionImageToTable", "OutputConversionTable", diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 2127540a3..4ab5b4a05 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, Literal, Unpack, Any +from typing import TYPE_CHECKING, Literal, Unpack, Any, TypedDict from safeds.data.image.typing import ImageSize @@ -58,10 +58,14 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa self._input_size: ImageSize | None = None self._output_size: ImageSize | None = None - def _get_internal_layer(self, activation_function: Literal["sigmoid", "relu", "softmax"], **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 if self._input_size is None: raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=False) + if "activation_function" not in kwargs: + raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") + if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: + raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, kwargs.get("activation_function"), self._padding, self._stride, transpose=False) @property def input_size(self) -> ImageSize: @@ -107,7 +111,7 @@ def output_size(self) -> ImageSize: def _set_input_size(self, input_size: int | ImageSize) -> None: if isinstance(input_size, int): - raise ValueError("The input_size of a convolution layer has to be of type ImageSize.") + raise TypeError("The input_size of a convolution layer has to be of type ImageSize.") self._input_size = input_size self._output_size = None @@ -134,10 +138,14 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa super().__init__(output_channel, kernel_size, stride=stride, padding=padding) self._output_padding = output_padding - def _get_internal_layer(self, activation_function: Literal["sigmoid", "relu", "softmax"], **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 if self._input_size is None: raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=True, output_padding=self._output_padding) + if "activation_function" not in kwargs: + raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") + if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: + raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, kwargs.get("activation_function"), self._padding, self._stride, transpose=True, output_padding=self._output_padding) @property def output_size(self) -> ImageSize: diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py index 89cf3c2d0..463f383cd 100644 --- a/src/safeds/ml/nn/_flatten_layer.py +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Unpack, Any +from typing import TYPE_CHECKING, Unpack, Any, TypedDict if TYPE_CHECKING: from torch import Tensor, nn @@ -30,7 +30,7 @@ def __init__(self) -> None: self._input_size: ImageSize | None = None self._output_size: int | None = None - def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 return _create_internal_model() @property @@ -75,6 +75,6 @@ def output_size(self) -> int: def _set_input_size(self, input_size: int | ImageSize) -> None: if isinstance(input_size, int): - raise ValueError("The input_size of a flatten layer has to be of type ImageSize.") + raise TypeError("The input_size of a flatten layer has to be of type ImageSize.") self._input_size = input_size self._output_size = None diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 9be77d7d7..2577713a3 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Unpack, Any +from typing import TYPE_CHECKING, Unpack, Any, TypedDict from safeds.data.image.typing import ImageSize @@ -62,8 +62,10 @@ def __init__(self, output_size: int, input_size: int | None = None): raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) self._output_size = output_size - def _get_internal_layer(self, activation_function: str, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 - return _create_internal_model(self._input_size, self._output_size, activation_function) + def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 + if "activation_function" not in kwargs: + raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") + return _create_internal_model(self._input_size, self._output_size, kwargs.get("activation_function")) @property def input_size(self) -> int: @@ -91,7 +93,7 @@ def output_size(self) -> int: def _set_input_size(self, input_size: int | ImageSize) -> None: if isinstance(input_size, ImageSize): - raise ValueError("The input_size of a forward layer has to be of type int.") + raise TypeError("The input_size of a forward layer has to be of type int.") if input_size < 1: raise OutOfBoundsError(actual=input_size, name="input_size", lower_bound=ClosedBound(1)) self._input_size = input_size diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 93583577f..b84000b9b 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -1,14 +1,13 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar - -from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList +from typing import TYPE_CHECKING, Generic, TypeVar, TypedDict, Any if TYPE_CHECKING: from torch.utils.data import DataLoader from safeds.data.image.typing import ImageSize + from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.containers import Table, TimeSeries from safeds.data.image.containers import ImageList @@ -41,3 +40,7 @@ def _is_fit_data_valid(self, input_data: FT) -> bool: @abstractmethod def _is_predict_data_valid(self, input_data: PT) -> bool: pass # pragma: no cover + + @abstractmethod + def _get_output_configuration(self) -> TypedDict[str, Any]: + pass # pragma: no cover diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 96c59eab9..73e5a524c 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Type +from typing import TYPE_CHECKING, Type, TypedDict, Any from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset @@ -37,7 +37,9 @@ def __init__(self, image_size: ImageSize) -> None: def _data_size(self) -> ImageSize: return self._input_size - def _data_conversion_fit(self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1) -> ImageDataset: # noqa: ARG002 + def _data_conversion_fit( + self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1 # noqa: ARG002 + ) -> ImageDataset: return input_data def _data_conversion_predict(self, input_data: ImageList, batch_size: int) -> _SingleSizeImageList: # noqa: ARG002 @@ -64,3 +66,6 @@ def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: def _is_predict_data_valid(self, input_data: ImageList) -> bool: return isinstance(input_data, _SingleSizeImageList) and input_data.sizes[0] == self._input_size + + def _get_output_configuration(self) -> TypedDict[str, Any]: + return {"column_names": self._column_names, "column_name": self._column_name, "one_hot_encoder": self._one_hot_encoder} diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index ccf729102..f6fdf16dc 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict, Any if TYPE_CHECKING: from torch.utils.data import DataLoader @@ -45,3 +45,6 @@ def _is_fit_data_valid(self, input_data: TabularDataset) -> bool: def _is_predict_data_valid(self, input_data: Table) -> bool: return (sorted(input_data.column_names)).__eq__(sorted(self._feature_names)) + + def _get_output_configuration(self) -> TypedDict[str, Any]: + return {} diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index 567663fd1..901b8db1f 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Unpack, Any +from typing import TYPE_CHECKING, Unpack, Any, TypedDict if TYPE_CHECKING: from torch import nn @@ -15,7 +15,7 @@ def __init__(self) -> None: pass # pragma: no cover @abstractmethod - def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: + def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: pass # pragma: no cover @property diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 4d6bc9cbd..6c806b79e 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -15,8 +15,8 @@ OutOfBoundsError, ) from safeds.ml.nn import InputConversionImage, FlattenLayer, OutputConversionImageToTable, Convolutional2DLayer, \ - ForwardLayer, OutputConversionImageToImage -from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn, _OutputConversionImage + ForwardLayer, OutputConversionImageToImage, OutputConversionImageToColumn +from safeds.ml.nn._output_conversion_image import _OutputConversionImage from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer if TYPE_CHECKING: @@ -28,8 +28,6 @@ from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion - from safeds.data.tabular.transformation import OneHotEncoder - from safeds.data.image.typing import ImageSize IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType @@ -220,7 +218,7 @@ def predict(self, test_data: IPT) -> OT: for x in dataloader: elem = self._model(x) predictions.append(elem.squeeze(dim=1)) - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration()) @property def is_fitted(self) -> bool: @@ -285,7 +283,7 @@ def __init__( self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False - self._num_of_classes = int(layers[-1].output_size) + self._num_of_classes = layers[-1].output_size if isinstance(layers[-1].output_size, int) else -1 # Is always int but linter doesn´t know self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 @@ -422,14 +420,8 @@ def predict(self, test_data: IPT) -> OT: predictions.append(torch.argmax(elem, dim=1)) else: predictions.append(elem.squeeze(dim=1).round()) - if isinstance(self._output_conversion, OutputConversionImageToTable) and isinstance(self._input_conversion, InputConversionImage): - _column_names: list[str] = self._input_conversion._column_names - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_names=_column_names) - if isinstance(self._output_conversion, OutputConversionImageToColumn) and isinstance(self._input_conversion, InputConversionImage): - _column_name: str = self._input_conversion._column_name - _one_hot_encoder: OneHotEncoder = self._input_conversion._one_hot_encoder - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), column_name=_column_name, one_hot_encoder=_one_hot_encoder) - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0)) + print(self._input_conversion._get_output_configuration()) + return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration()) @property def is_fitted(self) -> bool: @@ -461,7 +453,7 @@ def __init__(self, layers: list[_Layer], is_for_classification: bool) -> None: if is_for_classification: internal_layers.pop() - if int(layers[-1].output_size) > 2: + if isinstance(layers[-1].output_size, int) and layers[-1].output_size > 2: internal_layers.append(layers[-1]._get_internal_layer(activation_function="none")) else: internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index b41c85d08..4c4b838ea 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar, Unpack, Any +from typing import TYPE_CHECKING, Generic, TypeVar, Unpack, Any, TypedDict from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset, TabularDataset @@ -19,5 +19,5 @@ class _OutputConversion(Generic[IT, OT], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: IT, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> OT: + def _data_conversion(self, input_data: IT, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> OT: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 5604e7398..a8a929607 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,21 +1,21 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, TypeVar, Unpack, Any +from typing import TYPE_CHECKING, Any, TypedDict, Unpack, TypeVar from safeds.data.image.containers import ImageList +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.labeled.containers import ImageDataset from safeds.data.labeled.containers._image_dataset import _TableAsTensor, _ColumnAsTensor -from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.containers import Table, Column if TYPE_CHECKING: from torch import Tensor, LongTensor - from safeds.data.tabular.transformation import OneHotEncoder - from safeds.ml.nn._output_conversion import _OutputConversion +from safeds.data.tabular.transformation import OneHotEncoder + T = TypeVar("T", Column, Table, ImageList) @@ -23,17 +23,23 @@ class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset[T]], ABC) """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[T]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> ImageDataset[T]: pass # pragma: no cover class OutputConversionImageToColumn(_OutputConversionImage[Column]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, column_name: str, one_hot_encoder: OneHotEncoder, **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[Column]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> ImageDataset[Column]: import torch if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 + if "column_name" not in kwargs or not isinstance(kwargs.get("column_name"), str): + raise ValueError("The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.") + if "one_hot_encoder" not in kwargs or not isinstance(kwargs.get("one_hot_encoder"), OneHotEncoder): + raise ValueError("The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.") + one_hot_encoder: OneHotEncoder = kwargs.get("one_hot_encoder") + column_name: str = kwargs.get("column_name") output = torch.zeros(len(input_data), len(one_hot_encoder.get_names_of_added_columns())) output[torch.arange(len(input_data)), output_data] = 1 @@ -51,11 +57,14 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, column_na class OutputConversionImageToTable(_OutputConversionImage[Table]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, column_names: list[str], **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[Table]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> ImageDataset[Table]: import torch if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 + if "column_names" not in kwargs or not isinstance(kwargs.get("column_names"), list) and all([isinstance(element, str) for element in kwargs.get("column_names")]): + raise ValueError("The column_names are not set. The data can only be converted if the column_names are provided as `list[str]` in the kwargs.") + column_names: list[str] = kwargs.get("column_names") output = torch.zeros(len(input_data), len(column_names)) output[torch.arange(len(input_data)), output_data] = 1 @@ -73,7 +82,9 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, column_na class OutputConversionImageToImage(_OutputConversionImage[ImageList]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> ImageDataset[ImageList]: + def _data_conversion( + self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]] # noqa: ARG002 + ) -> ImageDataset[ImageList]: import torch if not isinstance(input_data, _SingleSizeImageList): diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index eef3c0c7c..dcbe56d97 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Unpack, Any +from typing import TYPE_CHECKING, Unpack, Any, TypedDict if TYPE_CHECKING: from torch import Tensor diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index edd233dfa..c5467204b 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, Literal, Unpack, Any +from typing import TYPE_CHECKING, Literal, Unpack, Any, TypedDict from safeds.data.image.typing import ImageSize @@ -52,7 +52,7 @@ def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: self._input_size: ImageSize | None = None self._output_size: ImageSize | None = None - def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 return _create_internal_model(self._strategy, self._kernel_size, self._padding, self._stride) @property @@ -99,7 +99,7 @@ def output_size(self) -> ImageSize: def _set_input_size(self, input_size: int | ImageSize) -> None: if isinstance(input_size, int): - raise ValueError("The input_size of a pooling layer has to be of type ImageSize.") + raise TypeError("The input_size of a pooling layer has to be of type ImageSize.") self._input_size = input_size self._output_size = None diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index 425bfba70..bed33aa3d 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -1,4 +1,5 @@ import re +from typing import TYPE_CHECKING import pytest import torch @@ -12,10 +13,12 @@ from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ FlattenLayer, ForwardLayer, OutputConversionImageToTable, ConvolutionalTranspose2DLayer, NeuralNetworkRegressor, \ AvgPooling2DLayer -from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn, OutputConversionImageToImage from tests.helpers import resolve_resource_path, images_all, device_cuda, device_cpu, skip_if_device_not_available +if TYPE_CHECKING: + from safeds.ml.nn._layer import _Layer + class TestImageToTableClassifier: @@ -45,11 +48,12 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa one_hot_encoder = OneHotEncoder().fit(image_classes, ["class"]) image_classes_one_hot_encoded = one_hot_encoder.transform(image_classes) image_dataset = ImageDataset(image_list, image_classes_one_hot_encoded) + num_of_classes: int = image_dataset.output_size if isinstance(image_dataset.output_size, int) else 0 layers = [ Convolutional2DLayer(1, 2), MaxPooling2DLayer(10), FlattenLayer(), - ForwardLayer(int(image_dataset.output_size)) + ForwardLayer(num_of_classes) ] nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToTable()) @@ -86,12 +90,13 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa classes.append(groups.group(2)) image_classes = Column("class", classes) image_dataset = ImageDataset(image_list, image_classes, shuffle=True) + num_of_classes: int = image_dataset.output_size if isinstance(image_dataset.output_size, int) else 0 layers = [ Convolutional2DLayer(1, 2), AvgPooling2DLayer(10), FlattenLayer(), - ForwardLayer(int(image_dataset.output_size)) + ForwardLayer(num_of_classes) ] nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToColumn()) diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index f60cdc23d..b9ff48291 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -43,29 +43,57 @@ def test_should_create_pooling_layer(self, activation_function: Literal["sigmoid ], ) @pytest.mark.parametrize( - ("conv_type", "torch_layer", "output_channel", "kernel_size", "stride", "padding", "out_channel", "out_width", "out_height"), + ("conv_type", "output_channel", "kernel_size", "stride", "padding"), [ - (Convolutional2DLayer, nn.Conv2d, 30, 2, 2, 2, 30, 7, 12), - (ConvolutionalTranspose2DLayer, nn.ConvTranspose2d, 30, 2, 2, 2, 30, 16, 36), + (Convolutional2DLayer, 30, 2, 2, 2), + (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), ], ) - def test_should_raise_if_input_size_not_set(self, activation_function: Literal["sigmoid", "relu", "softmax"], conv_type: Type[Convolutional2DLayer], torch_layer: Type[nn.Module], output_channel: int, kernel_size: int, stride: int, padding: int, out_channel: int, out_width: int, out_height: int) -> None: + def test_should_raise_if_input_size_not_set(self, activation_function: Literal["sigmoid", "relu", "softmax"], conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) with pytest.raises(ValueError, match=r"The input_size is not yet set."): - layer.input_size + layer.input_size # noqa B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): - layer.output_size + layer.output_size # noqa B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The internal layer can only be created when the input_size is set."): - layer._get_internal_layer(activation_function) + layer._get_internal_layer(activation_function=activation_function) @pytest.mark.parametrize( - ("conv_type", "torch_layer", "output_channel", "kernel_size", "stride", "padding", "out_channel", "out_width", "out_height"), + ("conv_type", "output_channel", "kernel_size", "stride", "padding"), [ - (Convolutional2DLayer, nn.Conv2d, 30, 2, 2, 2, 30, 7, 12), - (ConvolutionalTranspose2DLayer, nn.ConvTranspose2d, 30, 2, 2, 2, 30, 16, 36), + (Convolutional2DLayer, 30, 2, 2, 2), + (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), + ], + ) + def test_should_raise_if_activation_function_not_set(self, conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: + layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) + input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) + layer._set_input_size(input_size) + with pytest.raises(ValueError, match=r"The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs."): + layer._get_internal_layer() + + @pytest.mark.parametrize( + ("conv_type", "output_channel", "kernel_size", "stride", "padding"), + [ + (Convolutional2DLayer, 30, 2, 2, 2), + (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), + ], + ) + def test_should_raise_if_unsupported_activation_function_is_set(self, conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: + layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) + input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) + layer._set_input_size(input_size) + with pytest.raises(ValueError, match=r"The activation_function 'unknown' is not supported. Please choose one of the following: \['sigmoid', 'relu', 'softmax'\]."): + layer._get_internal_layer(activation_function="unknown") + + @pytest.mark.parametrize( + ("conv_type", "output_channel", "kernel_size", "stride", "padding"), + [ + (Convolutional2DLayer, 30, 2, 2, 2), + (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), ], ) - def test_should_raise_if_input_size_is_set_with_int(self, conv_type: Type[Convolutional2DLayer], torch_layer: Type[nn.Module], output_channel: int, kernel_size: int, stride: int, padding: int, out_channel: int, out_width: int, out_height: int) -> None: + def test_should_raise_if_input_size_is_set_with_int(self, conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) - with pytest.raises(ValueError, match=r"The input_size of a convolution layer has to be of type ImageSize."): + with pytest.raises(TypeError, match=r"The input_size of a convolution layer has to be of type ImageSize."): layer._set_input_size(1) diff --git a/tests/safeds/ml/nn/test_flatten_layer.py b/tests/safeds/ml/nn/test_flatten_layer.py index b9334f9f5..3dda93df7 100644 --- a/tests/safeds/ml/nn/test_flatten_layer.py +++ b/tests/safeds/ml/nn/test_flatten_layer.py @@ -18,11 +18,11 @@ def test_should_create_flatten_layer(self) -> None: def test_should_raise_if_input_size_not_set(self) -> None: layer = FlattenLayer() with pytest.raises(ValueError, match=r"The input_size is not yet set."): - layer.input_size + layer.input_size # noqa B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): - layer.output_size + layer.output_size # noqa B018 def test_should_raise_if_input_size_is_set_with_int(self) -> None: layer = FlattenLayer() - with pytest.raises(ValueError, match=r"The input_size of a flatten layer has to be of type ImageSize."): + with pytest.raises(TypeError, match=r"The input_size of a flatten layer has to be of type ImageSize."): layer._set_input_size(1) diff --git a/tests/safeds/ml/nn/test_forward_layer.py b/tests/safeds/ml/nn/test_forward_layer.py index 9f3cab493..9fd8c2ece 100644 --- a/tests/safeds/ml/nn/test_forward_layer.py +++ b/tests/safeds/ml/nn/test_forward_layer.py @@ -95,10 +95,16 @@ def test_should_raise_if_output_size_doesnt_match(output_size: int) -> None: def test_should_raise_if_input_size_is_set_with_image_size() -> None: layer = ForwardLayer(1) - with pytest.raises(ValueError, match=r"The input_size of a forward layer has to be of type int."): + with pytest.raises(TypeError, match=r"The input_size of a forward layer has to be of type int."): layer._set_input_size(ImageSize(1, 2, 3)) +def test_should_raise_if_activation_function_not_set() -> None: + layer = ForwardLayer(1) + with pytest.raises(ValueError, match=r"The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs."): + layer._get_internal_layer() + + @pytest.mark.parametrize( ("layer1", "layer2", "equal"), [ diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index e88e1ce92..a6f297e28 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -2,12 +2,13 @@ import torch from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.transformation import OneHotEncoder -from safeds.ml.nn import OutputConversionImageToTable, OutputConversionImageToImage -from safeds.ml.nn._output_conversion_image import _OutputConversionImage, OutputConversionImageToColumn +from safeds.ml.nn import OutputConversionImageToTable, OutputConversionImageToImage, OutputConversionImageToColumn +from safeds.ml.nn._output_conversion_image import _OutputConversionImage -class TestDataConversionToColumn: +class TestDataConversionImage: @pytest.mark.parametrize( ("output_conversion", "kwargs"), @@ -20,3 +21,21 @@ class TestDataConversionToColumn: def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversionImage, kwargs: dict) -> None: with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) + + +class TestOutputConversionImageToColumn: + + def test_should_raise_if_column_name_not_set(self) -> None: + with pytest.raises(ValueError, match=r"The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs."): + OutputConversionImageToColumn()._data_conversion(input_data=_SingleSizeImageList(), output_data=torch.empty(1), one_hot_encoder=OneHotEncoder()) + + def test_should_raise_if_one_hot_encoder_not_set(self) -> None: + with pytest.raises(ValueError, match=r"The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs."): + OutputConversionImageToColumn()._data_conversion(input_data=_SingleSizeImageList(), output_data=torch.empty(1), column_name="column_name") + + +class TestOutputConversionImageToTable: + + def test_should_raise_if_column_names_not_set(self) -> None: + with pytest.raises(ValueError, match=r"The column_names are not set. The data can only be converted if the column_names are provided as `list\[str\]` in the kwargs."): + OutputConversionImageToTable()._data_conversion(input_data=_SingleSizeImageList(), output_data=torch.empty(1)) diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py index eed4a0ba2..e75ce8e41 100644 --- a/tests/safeds/ml/nn/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -19,7 +19,7 @@ class TestPooling2DLayer: def test_should_create_pooling_layer(self, strategy: Literal["max", "avg"], torch_layer: Type[nn.Module]) -> None: layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) - with pytest.raises(ValueError, match=r"The input_size of a pooling layer has to be of type ImageSize."): + with pytest.raises(TypeError, match=r"The input_size of a pooling layer has to be of type ImageSize."): layer._set_input_size(1) layer._set_input_size(input_size) assert layer.input_size == input_size @@ -33,12 +33,12 @@ def test_should_create_pooling_layer(self, strategy: Literal["max", "avg"], torc "avg", ], ) - def test_should_raise_if_input_size_not_set(self, strategy: Literal["max", "avg"]): + def test_should_raise_if_input_size_not_set(self, strategy: Literal["max", "avg"]) -> None: layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) with pytest.raises(ValueError, match=r"The input_size is not yet set."): - layer.input_size + layer.input_size # noqa B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): - layer.output_size + layer.output_size # noqa B018 @pytest.mark.parametrize( "strategy", @@ -47,7 +47,7 @@ def test_should_raise_if_input_size_not_set(self, strategy: Literal["max", "avg" "avg", ], ) - def test_should_raise_if_input_size_is_set_with_int(self, strategy: Literal["max", "avg"]): + def test_should_raise_if_input_size_is_set_with_int(self, strategy: Literal["max", "avg"]) -> None: layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) - with pytest.raises(ValueError, match=r"The input_size of a pooling layer has to be of type ImageSize."): + with pytest.raises(TypeError, match=r"The input_size of a pooling layer has to be of type ImageSize."): layer._set_input_size(1) From d87bc92ced8045856d21147265b7727069518bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 00:08:29 +0200 Subject: [PATCH 20/42] refactor: linter --- .../data/labeled/containers/_image_dataset.py | 13 +++++++------ src/safeds/ml/nn/_convolutional2d_layer.py | 12 ++++++++---- src/safeds/ml/nn/_flatten_layer.py | 2 +- src/safeds/ml/nn/_forward_layer.py | 8 +++++--- src/safeds/ml/nn/_input_conversion.py | 4 ++-- src/safeds/ml/nn/_input_conversion_image.py | 6 +++--- src/safeds/ml/nn/_input_conversion_table.py | 4 ++-- src/safeds/ml/nn/_layer.py | 4 ++-- src/safeds/ml/nn/_model.py | 3 +-- src/safeds/ml/nn/_output_conversion.py | 4 ++-- src/safeds/ml/nn/_output_conversion_image.py | 12 ++++++------ src/safeds/ml/nn/_output_conversion_table.py | 4 ++-- src/safeds/ml/nn/_pooling2d_layer.py | 2 +- tests/safeds/ml/nn/test_convolutional2d_layer.py | 4 ++-- tests/safeds/ml/nn/test_flatten_layer.py | 4 ++-- tests/safeds/ml/nn/test_pooling2d_layer.py | 4 ++-- 16 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index a7690d0df..d0cd5471c 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -51,7 +51,10 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, s self._input_size: ImageSize = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input: _SingleSizeImageList = input_data._as_single_size_image_list() if ((isinstance(output_data, Table) or isinstance(output_data, Column)) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): - output_len = output_data.number_of_rows if isinstance(output_data, Table) else len(output_data) + if isinstance(output_data, Table): + output_len = output_data.number_of_rows + else: + output_len = len(output_data) raise OutputLengthMismatchError(f"{len(input_data)} != {output_len}") if isinstance(output_data, Table): non_numerical_columns = [] @@ -141,14 +144,12 @@ def get_output(self) -> T: the output data of this dataset """ output = self._output - safeds_output: T if isinstance(output, _TableAsTensor): - safeds_output = output._to_table() + return output._to_table() # type: ignore elif isinstance(output, _ColumnAsTensor): - safeds_output = output._to_column() + return output._to_column() # type: ignore else: - safeds_output = output - return safeds_output + return output # type: ignore def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 4ab5b4a05..b74230523 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -58,14 +58,16 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa self._input_size: ImageSize | None = None self._output_size: ImageSize | None = None - def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 if self._input_size is None: raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") if "activation_function" not in kwargs: raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, kwargs.get("activation_function"), self._padding, self._stride, transpose=False) + else: + activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs.get("activation_function") + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=False) @property def input_size(self) -> ImageSize: @@ -138,14 +140,16 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa super().__init__(output_channel, kernel_size, stride=stride, padding=padding) self._output_padding = output_padding - def _get_internal_layer(self, **kwargs: Unpack[dict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 if self._input_size is None: raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") if "activation_function" not in kwargs: raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, kwargs.get("activation_function"), self._padding, self._stride, transpose=True, output_padding=self._output_padding) + else: + activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs.get("activation_function") + return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=True, output_padding=self._output_padding) @property def output_size(self) -> ImageSize: diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py index 463f383cd..2734c2a4f 100644 --- a/src/safeds/ml/nn/_flatten_layer.py +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -30,7 +30,7 @@ def __init__(self) -> None: self._input_size: ImageSize | None = None self._output_size: int | None = None - def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 return _create_internal_model() @property diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index 2577713a3..f66767373 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Unpack, Any, TypedDict +from typing import TYPE_CHECKING, Any from safeds.data.image.typing import ImageSize @@ -62,10 +62,12 @@ def __init__(self, output_size: int, input_size: int | None = None): raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) self._output_size = output_size - def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 if "activation_function" not in kwargs: raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") - return _create_internal_model(self._input_size, self._output_size, kwargs.get("activation_function")) + else: + activation_function: str = kwargs.get("activation_function") + return _create_internal_model(self._input_size, self._output_size, activation_function) @property def input_size(self) -> int: diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index b84000b9b..e0b093026 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar, TypedDict, Any +from typing import TYPE_CHECKING, Generic, TypeVar, Any if TYPE_CHECKING: from torch.utils.data import DataLoader @@ -42,5 +42,5 @@ def _is_predict_data_valid(self, input_data: PT) -> bool: pass # pragma: no cover @abstractmethod - def _get_output_configuration(self) -> TypedDict[str, Any]: + def _get_output_configuration(self) -> dict[str, Any]: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 73e5a524c..c8b76eb14 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,11 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Type, TypedDict, Any +from typing import TYPE_CHECKING, Type, Any from safeds.data.image.containers import ImageList +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.labeled.containers import ImageDataset from safeds.data.labeled.containers._image_dataset import _ColumnAsTensor, _TableAsTensor -from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList if TYPE_CHECKING: from safeds.data.image.typing import ImageSize @@ -67,5 +67,5 @@ def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: def _is_predict_data_valid(self, input_data: ImageList) -> bool: return isinstance(input_data, _SingleSizeImageList) and input_data.sizes[0] == self._input_size - def _get_output_configuration(self) -> TypedDict[str, Any]: + def _get_output_configuration(self) -> dict[str, Any]: return {"column_names": self._column_names, "column_name": self._column_name, "one_hot_encoder": self._one_hot_encoder} diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index f6fdf16dc..31decf105 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, TypedDict, Any +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from torch.utils.data import DataLoader @@ -46,5 +46,5 @@ def _is_fit_data_valid(self, input_data: TabularDataset) -> bool: def _is_predict_data_valid(self, input_data: Table) -> bool: return (sorted(input_data.column_names)).__eq__(sorted(self._feature_names)) - def _get_output_configuration(self) -> TypedDict[str, Any]: + def _get_output_configuration(self) -> dict[str, Any]: return {} diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index 901b8db1f..364369f90 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Unpack, Any, TypedDict +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from torch import nn @@ -15,7 +15,7 @@ def __init__(self) -> None: pass # pragma: no cover @abstractmethod - def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: pass # pragma: no cover @property diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 6c806b79e..b7757f6ef 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -283,7 +283,7 @@ def __init__( self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False - self._num_of_classes = layers[-1].output_size if isinstance(layers[-1].output_size, int) else -1 # Is always int but linter doesn´t know + self._num_of_classes = layers[-1].output_size if isinstance(layers[-1].output_size, int) else -1 # Is always int but linter doesn't know self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 @@ -420,7 +420,6 @@ def predict(self, test_data: IPT) -> OT: predictions.append(torch.argmax(elem, dim=1)) else: predictions.append(elem.squeeze(dim=1).round()) - print(self._input_conversion._get_output_configuration()) return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration()) @property diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 4c4b838ea..ce0845408 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar, Unpack, Any, TypedDict +from typing import TYPE_CHECKING, Generic, TypeVar, Any from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset, TabularDataset @@ -19,5 +19,5 @@ class _OutputConversion(Generic[IT, OT], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: IT, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> OT: + def _data_conversion(self, input_data: IT, output_data: Tensor, **kwargs: Any) -> OT: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index a8a929607..3249d780b 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, TypedDict, Unpack, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList @@ -23,13 +23,13 @@ class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset[T]], ABC) """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> ImageDataset[T]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[T]: pass # pragma: no cover class OutputConversionImageToColumn(_OutputConversionImage[Column]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> ImageDataset[Column]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Column]: import torch if not isinstance(input_data, _SingleSizeImageList): @@ -57,12 +57,12 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: class OutputConversionImageToTable(_OutputConversionImage[Table]): - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]]) -> ImageDataset[Table]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Table]: import torch if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 - if "column_names" not in kwargs or not isinstance(kwargs.get("column_names"), list) and all([isinstance(element, str) for element in kwargs.get("column_names")]): + if "column_names" not in kwargs or not isinstance(kwargs.get("column_names"), list) and all(isinstance(element, str) for element in kwargs.get("column_names")): raise ValueError("The column_names are not set. The data can only be converted if the column_names are provided as `list[str]` in the kwargs.") column_names: list[str] = kwargs.get("column_names") @@ -83,7 +83,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: class OutputConversionImageToImage(_OutputConversionImage[ImageList]): def _data_conversion( - self, input_data: ImageList, output_data: Tensor, **kwargs: Unpack[TypedDict[str, Any]] # noqa: ARG002 + self, input_data: ImageList, output_data: Tensor, **kwargs: Any # noqa: ARG002 ) -> ImageDataset[ImageList]: import torch diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index dcbe56d97..deb6dfff5 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Unpack, Any, TypedDict +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from torch import Tensor @@ -24,7 +24,7 @@ def __init__(self, prediction_name: str = "prediction") -> None: """ self._prediction_name = prediction_name - def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs: Unpack[dict[str, Any]]) -> TabularDataset: + def _data_conversion(self, input_data: Table, output_data: Tensor, **kwargs: Any) -> TabularDataset: # noqa: ARG002 return input_data.add_column(Column(self._prediction_name, output_data.tolist())).to_tabular_dataset( self._prediction_name, ) diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index c5467204b..146919139 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -52,7 +52,7 @@ def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: self._input_size: ImageSize | None = None self._output_size: ImageSize | None = None - def _get_internal_layer(self, **kwargs: Unpack[TypedDict[str, Any]]) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 return _create_internal_model(self._strategy, self._kernel_size, self._padding, self._stride) @property diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index b9ff48291..c76625bd6 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -52,9 +52,9 @@ def test_should_create_pooling_layer(self, activation_function: Literal["sigmoid def test_should_raise_if_input_size_not_set(self, activation_function: Literal["sigmoid", "relu", "softmax"], conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) with pytest.raises(ValueError, match=r"The input_size is not yet set."): - layer.input_size # noqa B018 + layer.input_size # noqa: B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): - layer.output_size # noqa B018 + layer.output_size # noqa: B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The internal layer can only be created when the input_size is set."): layer._get_internal_layer(activation_function=activation_function) diff --git a/tests/safeds/ml/nn/test_flatten_layer.py b/tests/safeds/ml/nn/test_flatten_layer.py index 3dda93df7..f95476341 100644 --- a/tests/safeds/ml/nn/test_flatten_layer.py +++ b/tests/safeds/ml/nn/test_flatten_layer.py @@ -18,9 +18,9 @@ def test_should_create_flatten_layer(self) -> None: def test_should_raise_if_input_size_not_set(self) -> None: layer = FlattenLayer() with pytest.raises(ValueError, match=r"The input_size is not yet set."): - layer.input_size # noqa B018 + layer.input_size # noqa: B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): - layer.output_size # noqa B018 + layer.output_size # noqa: B018 def test_should_raise_if_input_size_is_set_with_int(self) -> None: layer = FlattenLayer() diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py index e75ce8e41..1af4d279d 100644 --- a/tests/safeds/ml/nn/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -36,9 +36,9 @@ def test_should_create_pooling_layer(self, strategy: Literal["max", "avg"], torc def test_should_raise_if_input_size_not_set(self, strategy: Literal["max", "avg"]) -> None: layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) with pytest.raises(ValueError, match=r"The input_size is not yet set."): - layer.input_size # noqa B018 + layer.input_size # noqa: B018 with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): - layer.output_size # noqa B018 + layer.output_size # noqa: B018 @pytest.mark.parametrize( "strategy", From 5bdce23a874f7fec5efa534c419188bac29b4ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 00:16:15 +0200 Subject: [PATCH 21/42] refactor: linter --- .../data/labeled/containers/_image_dataset.py | 6 +++--- src/safeds/exceptions/_ml.py | 14 -------------- src/safeds/ml/nn/_convolutional2d_layer.py | 4 ++-- src/safeds/ml/nn/_forward_layer.py | 2 +- src/safeds/ml/nn/_output_conversion.py | 4 ++-- src/safeds/ml/nn/_output_conversion_image.py | 8 ++++---- 6 files changed, 12 insertions(+), 26 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index d0cd5471c..fa22d69ee 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -145,11 +145,11 @@ def get_output(self) -> T: """ output = self._output if isinstance(output, _TableAsTensor): - return output._to_table() # type: ignore + return output._to_table() # type: ignore[assignment] elif isinstance(output, _ColumnAsTensor): - return output._to_column() # type: ignore + return output._to_column() # type: ignore[assignment] else: - return output # type: ignore + return output # type: ignore[assignment] def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 64214effe..d87960f94 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -1,20 +1,6 @@ from safeds.data.image.typing import ImageSize -class DatasetContainsTargetError(ValueError): - """ - Raised when a dataset contains the target column already. - - Parameters - ---------- - target_name: str - The name of the target column. - """ - - def __init__(self, target_name: str): - super().__init__(f"Dataset already contains the target column '{target_name}'.") - - class DatasetMissesFeaturesError(ValueError): """ Raised when a dataset misses feature columns. diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index b74230523..e09162aaf 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -66,7 +66,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") else: - activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs.get("activation_function") + activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs["activation_function"] return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=False) @property @@ -148,7 +148,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") else: - activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs.get("activation_function") + activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs["activation_function"] return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=True, output_padding=self._output_padding) @property diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index f66767373..e8b212a84 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -66,7 +66,7 @@ def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 if "activation_function" not in kwargs: raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") else: - activation_function: str = kwargs.get("activation_function") + activation_function: str = kwargs["activation_function"] return _create_internal_model(self._input_size, self._output_size, activation_function) @property diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index ce0845408..071eac408 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -9,10 +9,10 @@ if TYPE_CHECKING: from torch import Tensor -from safeds.data.tabular.containers import Table, TimeSeries +from safeds.data.tabular.containers import Table, TimeSeries, Column IT = TypeVar("IT", Table, TimeSeries, ImageList) -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) class _OutputConversion(Generic[IT, OT], ABC): diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 3249d780b..71968085d 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -38,8 +38,8 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: raise ValueError("The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.") if "one_hot_encoder" not in kwargs or not isinstance(kwargs.get("one_hot_encoder"), OneHotEncoder): raise ValueError("The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.") - one_hot_encoder: OneHotEncoder = kwargs.get("one_hot_encoder") - column_name: str = kwargs.get("column_name") + one_hot_encoder: OneHotEncoder = kwargs["one_hot_encoder"] + column_name: str = kwargs["column_name"] output = torch.zeros(len(input_data), len(one_hot_encoder.get_names_of_added_columns())) output[torch.arange(len(input_data)), output_data] = 1 @@ -62,9 +62,9 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 - if "column_names" not in kwargs or not isinstance(kwargs.get("column_names"), list) and all(isinstance(element, str) for element in kwargs.get("column_names")): + if "column_names" not in kwargs or not isinstance(kwargs.get("column_names"), list) and all(isinstance(element, str) for element in kwargs["column_names"]): raise ValueError("The column_names are not set. The data can only be converted if the column_names are provided as `list[str]` in the kwargs.") - column_names: list[str] = kwargs.get("column_names") + column_names: list[str] = kwargs["column_names"] output = torch.zeros(len(input_data), len(column_names)) output[torch.arange(len(input_data)), output_data] = 1 From 032f58f1682c772bea99e611273fa81362ecd4c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 00:23:51 +0200 Subject: [PATCH 22/42] refactor: mypy linter --- src/safeds/data/labeled/containers/_image_dataset.py | 6 +++--- src/safeds/ml/nn/_model.py | 4 ++-- src/safeds/ml/nn/_output_conversion_image.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index fa22d69ee..50cab8741 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -145,11 +145,11 @@ def get_output(self) -> T: """ output = self._output if isinstance(output, _TableAsTensor): - return output._to_table() # type: ignore[assignment] + return output._to_table() # mypy: ignore[assignment] ignore[return-value] elif isinstance(output, _ColumnAsTensor): - return output._to_column() # type: ignore[assignment] + return output._to_column() # mypy: ignore[assignment] ignore[return-value] else: - return output # type: ignore[assignment] + return output # mypy: ignore[assignment] ignore[return-value] def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index b7757f6ef..1e311d71b 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -5,7 +5,7 @@ from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset, TabularDataset -from safeds.data.tabular.containers import Table, TimeSeries +from safeds.data.tabular.containers import Table, TimeSeries, Column from safeds.exceptions import ( ClosedBound, FeatureDataMismatchError, @@ -32,7 +32,7 @@ IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) # OutputType +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) # OutputType class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 71968085d..6ce808be5 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -16,10 +16,10 @@ from safeds.data.tabular.transformation import OneHotEncoder -T = TypeVar("T", Column, Table, ImageList) +T = TypeVar("T", ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) -class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset[T]], ABC): +class _OutputConversionImage(_OutputConversion[ImageList, T], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod From 4b30a5873c11e1ffc5cc06af5eb843cccc0d0b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 00:31:36 +0200 Subject: [PATCH 23/42] refactor: mypy linter --- src/safeds/data/labeled/containers/_image_dataset.py | 6 +++--- src/safeds/ml/nn/_model.py | 2 +- src/safeds/ml/nn/_output_conversion.py | 2 +- src/safeds/ml/nn/_output_conversion_image.py | 12 ++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 50cab8741..e63487007 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -145,11 +145,11 @@ def get_output(self) -> T: """ output = self._output if isinstance(output, _TableAsTensor): - return output._to_table() # mypy: ignore[assignment] ignore[return-value] + return output._to_table() # mypy: ignore[assignment, return-value] elif isinstance(output, _ColumnAsTensor): - return output._to_column() # mypy: ignore[assignment] ignore[return-value] + return output._to_column() # mypy: ignore[assignment, return-value] else: - return output # mypy: ignore[assignment] ignore[return-value] + return output # mypy: ignore[assignment, return-value] def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 1e311d71b..5fada61be 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -32,7 +32,7 @@ IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) # OutputType +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) # OutputType class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 071eac408..f1c0070b8 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -12,7 +12,7 @@ from safeds.data.tabular.containers import Table, TimeSeries, Column IT = TypeVar("IT", Table, TimeSeries, ImageList) -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) class _OutputConversion(Generic[IT, OT], ABC): diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 6ce808be5..cc4b9d7e0 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar, Generic from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList @@ -19,15 +19,15 @@ T = TypeVar("T", ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) -class _OutputConversionImage(_OutputConversion[ImageList, T], ABC): +class _OutputConversionImage(_OutputConversion[ImageList, Generic[T]], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[T]: + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> T: pass # pragma: no cover -class OutputConversionImageToColumn(_OutputConversionImage[Column]): +class OutputConversionImageToColumn(_OutputConversionImage[ImageDataset[Column]]): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Column]: import torch @@ -55,7 +55,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToTable(_OutputConversionImage[Table]): +class OutputConversionImageToTable(_OutputConversionImage[ImageDataset[Table]]): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Table]: import torch @@ -80,7 +80,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToImage(_OutputConversionImage[ImageList]): +class OutputConversionImageToImage(_OutputConversionImage[ImageDataset[ImageList]]): def _data_conversion( self, input_data: ImageList, output_data: Tensor, **kwargs: Any # noqa: ARG002 From 33e8db88da39e916bb059174dd047737355d83f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 00:40:36 +0200 Subject: [PATCH 24/42] refactor: mypy linter --- src/safeds/data/labeled/containers/_image_dataset.py | 6 +++--- src/safeds/ml/nn/_output_conversion.py | 2 +- src/safeds/ml/nn/_output_conversion_image.py | 2 +- tests/safeds/ml/nn/test_cnn_workflow.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index e63487007..21ff623c6 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -145,11 +145,11 @@ def get_output(self) -> T: """ output = self._output if isinstance(output, _TableAsTensor): - return output._to_table() # mypy: ignore[assignment, return-value] + return output._to_table() # mypy: ignore[return-value] elif isinstance(output, _ColumnAsTensor): - return output._to_column() # mypy: ignore[assignment, return-value] + return output._to_column() # mypy: ignore[return-value] else: - return output # mypy: ignore[assignment, return-value] + return output # mypy: ignore[return-value] def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index f1c0070b8..ce0845408 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: from torch import Tensor -from safeds.data.tabular.containers import Table, TimeSeries, Column +from safeds.data.tabular.containers import Table, TimeSeries IT = TypeVar("IT", Table, TimeSeries, ImageList) OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index cc4b9d7e0..6f5702401 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -19,7 +19,7 @@ T = TypeVar("T", ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) -class _OutputConversionImage(_OutputConversion[ImageList, Generic[T]], ABC): +class _OutputConversionImage(_OutputConversion[ImageList, T], ABC): """The output conversion for a neural network, defines the output parameters for the neural network.""" @abstractmethod diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index bed33aa3d..d3c905890 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -60,7 +60,7 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias - prediction = nn.predict(image_dataset.get_input()) + prediction: ImageDataset = nn.predict(image_dataset.get_input()) assert one_hot_encoder.inverse_transform(prediction.get_output()) == Table({"class": prediction_label}) @@ -103,7 +103,7 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias - prediction = nn.predict(image_dataset.get_input()) + prediction: ImageDataset = nn.predict(image_dataset.get_input()) assert prediction.get_output() == Column("class", prediction_label) @@ -140,5 +140,5 @@ def test_should_train_and_predict_model(self, seed: int, snapshot_png_image_list nn = nn_original.fit(image_dataset, epoch_size=20) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias - prediction = nn.predict(image_dataset.get_input()) + prediction: ImageDataset = nn.predict(image_dataset.get_input()) assert prediction.get_output() == snapshot_png_image_list From 19f1f24138a7177d428b3f7981156b48cfdf47c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 00:53:18 +0200 Subject: [PATCH 25/42] refactor: mypy linter --- .../data/labeled/containers/_image_dataset.py | 6 +++--- src/safeds/ml/nn/_output_conversion_image.py | 14 +++++--------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 21ff623c6..ee5e5e4fe 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -145,11 +145,11 @@ def get_output(self) -> T: """ output = self._output if isinstance(output, _TableAsTensor): - return output._to_table() # mypy: ignore[return-value] + return output._to_table() # type: ignore[return-value] elif isinstance(output, _ColumnAsTensor): - return output._to_column() # mypy: ignore[return-value] + return output._to_column() # type: ignore[return-value] else: - return output # mypy: ignore[return-value] + return output # type: ignore[return-value] def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 6f5702401..a5760c559 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -19,15 +19,11 @@ T = TypeVar("T", ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) -class _OutputConversionImage(_OutputConversion[ImageList, T], ABC): - """The output conversion for a neural network, defines the output parameters for the neural network.""" +class _OutputConversionImage: + pass # pragma: no cover - @abstractmethod - def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> T: - pass # pragma: no cover - -class OutputConversionImageToColumn(_OutputConversionImage[ImageDataset[Column]]): +class OutputConversionImageToColumn(_OutputConversion[ImageList, ImageDataset[Column]], _OutputConversionImage): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Column]: import torch @@ -55,7 +51,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToTable(_OutputConversionImage[ImageDataset[Table]]): +class OutputConversionImageToTable(_OutputConversion[ImageList, ImageDataset[Table]], _OutputConversionImage): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Table]: import torch @@ -80,7 +76,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToImage(_OutputConversionImage[ImageDataset[ImageList]]): +class OutputConversionImageToImage(_OutputConversion[ImageList, ImageDataset[ImageList]], _OutputConversionImage): def _data_conversion( self, input_data: ImageList, output_data: Tensor, **kwargs: Any # noqa: ARG002 From 550331250f2618778da6ffe7bf3dabe3c36ba53c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 00:58:19 +0200 Subject: [PATCH 26/42] refactor: mypy linter --- src/safeds/ml/nn/_model.py | 2 +- src/safeds/ml/nn/_output_conversion.py | 4 ++-- src/safeds/ml/nn/_output_conversion_image.py | 8 +++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 5fada61be..1e311d71b 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -32,7 +32,7 @@ IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) # OutputType +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) # OutputType class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index ce0845408..071eac408 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -9,10 +9,10 @@ if TYPE_CHECKING: from torch import Tensor -from safeds.data.tabular.containers import Table, TimeSeries +from safeds.data.tabular.containers import Table, TimeSeries, Column IT = TypeVar("IT", Table, TimeSeries, ImageList) -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) class _OutputConversion(Generic[IT, OT], ABC): diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index a5760c559..362c3ce9b 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,7 +1,7 @@ from __future__ import annotations -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, TypeVar, Generic +from abc import ABC +from typing import TYPE_CHECKING, Any from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList @@ -16,10 +16,8 @@ from safeds.data.tabular.transformation import OneHotEncoder -T = TypeVar("T", ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) - -class _OutputConversionImage: +class _OutputConversionImage(_OutputConversion, ABC): pass # pragma: no cover From aedf2be4fe5aa9884b610c0d9388d9876638401d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 01:08:01 +0200 Subject: [PATCH 27/42] refactor: mypy linter --- src/safeds/ml/nn/_model.py | 2 +- src/safeds/ml/nn/_output_conversion.py | 4 ++-- src/safeds/ml/nn/_output_conversion_image.py | 2 +- tests/safeds/ml/nn/test_output_conversion_image.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 1e311d71b..5fada61be 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -32,7 +32,7 @@ IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) # OutputType +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) # OutputType class NeuralNetworkRegressor(Generic[IFT, IPT, OT]): diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index 071eac408..ce0845408 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -9,10 +9,10 @@ if TYPE_CHECKING: from torch import Tensor -from safeds.data.tabular.containers import Table, TimeSeries, Column +from safeds.data.tabular.containers import Table, TimeSeries IT = TypeVar("IT", Table, TimeSeries, ImageList) -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Column], ImageDataset[Table], ImageDataset[ImageList]) +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) class _OutputConversion(Generic[IT, OT], ABC): diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 362c3ce9b..d9dbfa93c 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -17,7 +17,7 @@ from safeds.data.tabular.transformation import OneHotEncoder -class _OutputConversionImage(_OutputConversion, ABC): +class _OutputConversionImage: pass # pragma: no cover diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index a6f297e28..816a4bc32 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -5,7 +5,7 @@ from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn import OutputConversionImageToTable, OutputConversionImageToImage, OutputConversionImageToColumn -from safeds.ml.nn._output_conversion_image import _OutputConversionImage +from safeds.ml.nn._output_conversion import _OutputConversion class TestDataConversionImage: @@ -18,7 +18,7 @@ class TestDataConversionImage: (OutputConversionImageToImage(), {}), ] ) - def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversionImage, kwargs: dict) -> None: + def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversion, kwargs: dict) -> None: with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) From 2953f36ae7f096266b372f50ae1657bef2246a4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 01:11:16 +0200 Subject: [PATCH 28/42] refactor: mypy linter --- src/safeds/ml/nn/_output_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index ce0845408..c2ddd02d6 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -12,7 +12,7 @@ from safeds.data.tabular.containers import Table, TimeSeries IT = TypeVar("IT", Table, TimeSeries, ImageList) -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Any]) class _OutputConversion(Generic[IT, OT], ABC): From 09748072e3479d5a22c32c0af48d73356c2ae9d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 01:15:23 +0200 Subject: [PATCH 29/42] refactor: mypy linter --- src/safeds/ml/nn/_output_conversion.py | 2 +- src/safeds/ml/nn/_output_conversion_image.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index c2ddd02d6..ce0845408 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -12,7 +12,7 @@ from safeds.data.tabular.containers import Table, TimeSeries IT = TypeVar("IT", Table, TimeSeries, ImageList) -OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset[Any]) +OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) class _OutputConversion(Generic[IT, OT], ABC): diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index d9dbfa93c..937787679 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -21,7 +21,7 @@ class _OutputConversionImage: pass # pragma: no cover -class OutputConversionImageToColumn(_OutputConversion[ImageList, ImageDataset[Column]], _OutputConversionImage): +class OutputConversionImageToColumn(_OutputConversion[ImageList, ImageDataset], _OutputConversionImage): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Column]: import torch @@ -49,7 +49,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToTable(_OutputConversion[ImageList, ImageDataset[Table]], _OutputConversionImage): +class OutputConversionImageToTable(_OutputConversion[ImageList, ImageDataset], _OutputConversionImage): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Table]: import torch @@ -74,7 +74,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToImage(_OutputConversion[ImageList, ImageDataset[ImageList]], _OutputConversionImage): +class OutputConversionImageToImage(_OutputConversion[ImageList, ImageDataset], _OutputConversionImage): def _data_conversion( self, input_data: ImageList, output_data: Tensor, **kwargs: Any # noqa: ARG002 From ff407cd150b3d1fc649da7d0c37f77b5fc792506 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 1 May 2024 23:17:06 +0000 Subject: [PATCH 30/42] style: apply automated linter fixes --- .../image/containers/_empty_image_list.py | 4 +- src/safeds/data/image/containers/_image.py | 11 +- .../data/image/containers/_image_list.py | 14 +- .../containers/_multi_size_image_list.py | 2 +- .../containers/_single_size_image_list.py | 15 +- src/safeds/data/image/typing/_image_size.py | 2 +- .../data/labeled/containers/_image_dataset.py | 76 +++- .../transformation/_one_hot_encoder.py | 8 +- src/safeds/ml/nn/__init__.py | 9 +- src/safeds/ml/nn/_convolutional2d_layer.py | 129 ++++-- src/safeds/ml/nn/_flatten_layer.py | 6 +- src/safeds/ml/nn/_forward_layer.py | 6 +- src/safeds/ml/nn/_input_conversion.py | 10 +- src/safeds/ml/nn/_input_conversion_image.py | 17 +- src/safeds/ml/nn/_model.py | 98 +++-- src/safeds/ml/nn/_output_conversion.py | 2 +- src/safeds/ml/nn/_output_conversion_image.py | 38 +- src/safeds/ml/nn/_pooling2d_layer.py | 14 +- .../data/image/containers/test_image.py | 7 +- .../data/image/containers/test_image_list.py | 42 +- .../data/image/typing/test_image_size.py | 91 ++--- .../labeled/containers/test_image_dataset.py | 123 ++++-- .../transformation/test_one_hot_encoder.py | 2 +- tests/safeds/ml/nn/test_cnn_workflow.py | 128 ++++-- .../ml/nn/test_convolutional2d_layer.py | 80 +++- tests/safeds/ml/nn/test_flatten_layer.py | 8 +- tests/safeds/ml/nn/test_forward_layer.py | 18 +- .../ml/nn/test_input_conversion_image.py | 68 +++- tests/safeds/ml/nn/test_model.py | 384 +++++++++++++++--- .../ml/nn/test_output_conversion_image.py | 32 +- tests/safeds/ml/nn/test_pooling2d_layer.py | 12 +- 31 files changed, 1071 insertions(+), 385 deletions(-) diff --git a/src/safeds/data/image/containers/_empty_image_list.py b/src/safeds/data/image/containers/_empty_image_list.py index dff09549f..d69b50f92 100644 --- a/src/safeds/data/image/containers/_empty_image_list.py +++ b/src/safeds/data/image/containers/_empty_image_list.py @@ -4,8 +4,6 @@ from typing import TYPE_CHECKING, Self from safeds._utils import _structural_hash -from safeds.data.image.containers._image_list import ImageList -from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_add_noise_errors, _check_adjust_brightness_errors_and_warnings, @@ -17,6 +15,8 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) +from safeds.data.image.containers._image_list import ImageList +from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.exceptions import IndexOutOfBoundsError if TYPE_CHECKING: diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index 82990bc45..b63bb31d8 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -22,11 +22,10 @@ from safeds.exceptions import IllegalFormatError if TYPE_CHECKING: + from numpy import dtype, ndarray from torch import Tensor from torch.types import Device - from numpy import ndarray, dtype - class Image: """ @@ -178,7 +177,13 @@ def __array__(self, numpy_dtype: str | dtype = None) -> ndarray: """ from numpy import uint8 - return self._image_tensor.permute(1, 2, 0).detach().cpu().numpy().astype(uint8 if numpy_dtype is None else numpy_dtype) + return ( + self._image_tensor.permute(1, 2, 0) + .detach() + .cpu() + .numpy() + .astype(uint8 if numpy_dtype is None else numpy_dtype) + ) def _repr_jpeg_(self) -> bytes | None: """ diff --git a/src/safeds/data/image/containers/_image_list.py b/src/safeds/data/image/containers/_image_list.py index 2fdded2fd..0af2c6931 100644 --- a/src/safeds/data/image/containers/_image_list.py +++ b/src/safeds/data/image/containers/_image_list.py @@ -5,7 +5,7 @@ import os from abc import ABCMeta, abstractmethod from pathlib import Path -from typing import TYPE_CHECKING, overload, Literal +from typing import TYPE_CHECKING, Literal, overload from safeds.data.image.containers._image import Image @@ -90,14 +90,20 @@ def from_files(path: str | Path | Sequence[str | Path], return_filenames: Litera @staticmethod @overload - def from_files(path: str | Path | Sequence[str | Path], return_filenames: Literal[True]) -> tuple[ImageList, list[str]]: ... + def from_files( + path: str | Path | Sequence[str | Path], return_filenames: Literal[True], + ) -> tuple[ImageList, list[str]]: ... @staticmethod @overload - def from_files(path: str | Path | Sequence[str | Path], return_filenames: bool) -> ImageList | tuple[ImageList, list[str]]: ... + def from_files( + path: str | Path | Sequence[str | Path], return_filenames: bool, + ) -> ImageList | tuple[ImageList, list[str]]: ... @staticmethod - def from_files(path: str | Path | Sequence[str | Path], return_filenames: bool = False) -> ImageList | tuple[ImageList, list[str]]: + def from_files( + path: str | Path | Sequence[str | Path], return_filenames: bool = False, + ) -> ImageList | tuple[ImageList, list[str]]: """ Create an ImageList from a directory or a list of files. diff --git a/src/safeds/data/image/containers/_multi_size_image_list.py b/src/safeds/data/image/containers/_multi_size_image_list.py index f1bda860e..e21f3eba0 100644 --- a/src/safeds/data/image/containers/_multi_size_image_list.py +++ b/src/safeds/data/image/containers/_multi_size_image_list.py @@ -6,11 +6,11 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds.data.image.containers import Image, ImageList from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_blur_errors_and_warnings, _check_remove_images_with_size_errors, ) +from safeds.data.image.containers import Image, ImageList from safeds.exceptions import ( DuplicateIndexError, IllegalFormatError, diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index 54b5b29fc..32a4059d0 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -7,8 +7,6 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds.data.image.containers._image import Image -from safeds.data.image.containers._image_list import ImageList from safeds.data.image._utils._image_transformation_error_and_warning_checks import ( _check_add_noise_errors, _check_adjust_brightness_errors_and_warnings, @@ -20,6 +18,8 @@ _check_resize_errors, _check_sharpen_errors_and_warnings, ) +from safeds.data.image.containers._image import Image +from safeds.data.image.containers._image_list import ImageList from safeds.data.image.typing import ImageSize from safeds.exceptions import ( DuplicateIndexError, @@ -132,7 +132,12 @@ def _get_batch(self, batch_number: int, batch_size: int | None = None) -> Tensor if batch_size * batch_number >= len(self): raise IndexOutOfBoundsError(batch_size * batch_number) max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self) else len(self) - return self._tensor[[self._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]].to(torch.float32) / 255 + return ( + self._tensor[ + [self._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)] + ].to(torch.float32) + / 255 + ) def clone(self) -> ImageList: cloned_image_list = self._clone_without_tensor() @@ -224,7 +229,9 @@ def channel(self) -> int: @property def sizes(self) -> list[ImageSize]: - return [ImageSize(self._tensor.size(dim=3), self._tensor.size(dim=2), self._tensor.size(dim=1))] * self.number_of_images + return [ + ImageSize(self._tensor.size(dim=3), self._tensor.size(dim=2), self._tensor.size(dim=1)), + ] * self.number_of_images @property def number_of_sizes(self) -> int: diff --git a/src/safeds/data/image/typing/_image_size.py b/src/safeds/data/image/typing/_image_size.py index fe0faee54..e3283a584 100644 --- a/src/safeds/data/image/typing/_image_size.py +++ b/src/safeds/data/image/typing/_image_size.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING from safeds._utils import _structural_hash -from safeds.exceptions import OutOfBoundsError, ClosedBound +from safeds.exceptions import ClosedBound, OutOfBoundsError if TYPE_CHECKING: from safeds.data.image.containers import Image diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index ee5e5e4fe..592f291bf 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -1,17 +1,23 @@ from __future__ import annotations import copy -from typing import TYPE_CHECKING, TypeVar, Generic +from typing import TYPE_CHECKING, Generic, TypeVar from safeds.data.image.containers import ImageList from safeds.data.image.containers._empty_image_list import _EmptyImageList from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.image.typing import ImageSize -from safeds.data.tabular.containers import Table, Column +from safeds.data.tabular.containers import Column, Table from safeds.data.tabular.transformation import OneHotEncoder -from safeds.exceptions import NonNumericColumnError, OutputLengthMismatchError, IndexOutOfBoundsError, \ - TransformerNotFittedError, OutOfBoundsError, ClosedBound +from safeds.exceptions import ( + ClosedBound, + IndexOutOfBoundsError, + NonNumericColumnError, + OutOfBoundsError, + OutputLengthMismatchError, + TransformerNotFittedError, +) if TYPE_CHECKING: from torch import Tensor @@ -50,7 +56,10 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, s else: self._input_size: ImageSize = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input: _SingleSizeImageList = input_data._as_single_size_image_list() - if ((isinstance(output_data, Table) or isinstance(output_data, Column)) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): + if ( + (isinstance(output_data, Column | Table)) + and len(input_data) != output_data.number_of_rows + ) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): if isinstance(output_data, Table): output_len = output_data.number_of_rows else: @@ -62,7 +71,10 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, s for column_name in output_data.column_names: if not output_data.get_column_type(column_name).is_numeric(): non_numerical_columns.append(column_name) - elif output_data.get_column(column_name).minimum() < 0 or output_data.get_column(column_name).maximum() > 1: + elif ( + output_data.get_column(column_name).minimum() < 0 + or output_data.get_column(column_name).maximum() > 1 + ): wrong_interval_columns.append(column_name) if len(non_numerical_columns) > 0: raise NonNumericColumnError(f"Columns {non_numerical_columns} are not numerical.") @@ -153,7 +165,6 @@ def get_output(self) -> T: def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]: import torch - from torch import Tensor if batch_size is None: batch_size = self._batch_size @@ -161,13 +172,35 @@ def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[ raise OutOfBoundsError(batch_size, name="batch_size", lower_bound=ClosedBound(1)) if batch_number < 0 or batch_size * batch_number >= len(self._input): raise IndexOutOfBoundsError(batch_size * batch_number) - max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self._input) else len(self._input) - input_tensor = self._input._tensor[self._shuffle_tensor_indices[[self._input._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 + max_index = ( + batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self._input) else len(self._input) + ) + input_tensor = ( + self._input._tensor[ + self._shuffle_tensor_indices[ + [ + self._input._indices_to_tensor_positions[index] + for index in range(batch_size * batch_number, max_index) + ] + ] + ].to(torch.float32) + / 255 + ) output_tensor: Tensor if isinstance(self._output, _SingleSizeImageList): - output_tensor = self._output._tensor[self._shuffle_tensor_indices[[self._output._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255 + output_tensor = ( + self._output._tensor[ + self._shuffle_tensor_indices[ + [ + self._output._indices_to_tensor_positions[index] + for index in range(batch_size * batch_number, max_index) + ] + ] + ].to(torch.float32) + / 255 + ) else: # _output is instance of _TableAsTensor - output_tensor = self._output._tensor[self._shuffle_tensor_indices[batch_size * batch_number:max_index]] + output_tensor = self._output._tensor[self._shuffle_tensor_indices[batch_size * batch_number : max_index]] return input_tensor, output_tensor def shuffle(self) -> ImageDataset[T]: @@ -182,6 +215,7 @@ def shuffle(self) -> ImageDataset[T]: the shuffled `ImageDataset` """ import torch + im_dataset: ImageDataset[T] = copy.copy(self) im_dataset._shuffle_tensor_indices = torch.randperm(len(self)) im_dataset._next_batch_index = 0 @@ -197,21 +231,25 @@ def __init__(self, table: Table) -> None: self._tensor = torch.Tensor(table._data.to_numpy(copy=True)).to(torch.get_default_device()) if not torch.all(self._tensor.sum(dim=1) == torch.ones(self._tensor.size(dim=0))): - raise ValueError("The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.") + raise ValueError( + "The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.", + ) @staticmethod def _from_tensor(tensor: Tensor, column_names: list[str]) -> _TableAsTensor: if tensor.dim() != 2: raise ValueError(f"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got {tensor.dim()}.") if tensor.size(dim=1) != len(column_names): - raise ValueError(f"Tensor and column_names have different amounts of classes ({tensor.size(dim=1)}!={len(column_names)}).") + raise ValueError( + f"Tensor and column_names have different amounts of classes ({tensor.size(dim=1)}!={len(column_names)}).", + ) table_as_tensor = _TableAsTensor.__new__(_TableAsTensor) table_as_tensor._tensor = tensor table_as_tensor._column_names = column_names return table_as_tensor def _to_table(self) -> Table: - return Table(dict(zip(self._column_names, self._tensor.T.tolist()))) + return Table(dict(zip(self._column_names, self._tensor.T.tolist(), strict=False))) class _ColumnAsTensor: @@ -222,7 +260,9 @@ def __init__(self, column: Column) -> None: self._column_name = column.name column_as_table = Table.from_columns([column]) self._one_hot_encoder = OneHotEncoder().fit(column_as_table, [self._column_name]) - self._tensor = torch.Tensor(self._one_hot_encoder.transform(column_as_table)._data.to_numpy(copy=True)).to(torch.get_default_device()) + self._tensor = torch.Tensor(self._one_hot_encoder.transform(column_as_table)._data.to_numpy(copy=True)).to( + torch.get_default_device(), + ) @staticmethod def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncoder) -> _ColumnAsTensor: @@ -231,7 +271,9 @@ def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncode if not one_hot_encoder.is_fitted: raise TransformerNotFittedError if tensor.size(dim=1) != len(one_hot_encoder.get_names_of_added_columns()): - raise ValueError(f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={len(one_hot_encoder.get_names_of_added_columns())}).") + raise ValueError( + f"Tensor and one_hot_encoder have different amounts of classes ({tensor.size(dim=1)}!={len(one_hot_encoder.get_names_of_added_columns())}).", + ) table_as_tensor = _ColumnAsTensor.__new__(_ColumnAsTensor) table_as_tensor._tensor = tensor table_as_tensor._column_name = column_name @@ -239,5 +281,5 @@ def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncode return table_as_tensor def _to_column(self) -> Column: - table = Table(dict(zip(self._one_hot_encoder.get_names_of_added_columns(), self._tensor.T.tolist()))) + table = Table(dict(zip(self._one_hot_encoder.get_names_of_added_columns(), self._tensor.T.tolist(), strict=False))) return self._one_hot_encoder.inverse_transform(table).get_column(self._column_name) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 9b5955f04..430a07dce 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -68,10 +68,14 @@ def __init__(self) -> None: def __hash__(self) -> int: return super().__hash__() - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if not isinstance(other, OneHotEncoder): return NotImplemented - return self._column_names == other._column_names and self._value_to_column == other._value_to_column and self._value_to_column_nans == other._value_to_column_nans + return ( + self._column_names == other._column_names + and self._value_to_column == other._value_to_column + and self._value_to_column_nans == other._value_to_column_nans + ) # noinspection PyProtectedMember def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 769d7b350..4eae85362 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -5,16 +5,19 @@ import apipkg if TYPE_CHECKING: - from ._pooling2d_layer import AvgPooling2DLayer from ._convolutional2d_layer import Convolutional2DLayer, ConvolutionalTranspose2DLayer from ._flatten_layer import FlattenLayer from ._forward_layer import ForwardLayer from ._input_conversion_image import InputConversionImage from ._input_conversion_table import InputConversionTable - from ._pooling2d_layer import MaxPooling2DLayer from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor - from ._output_conversion_image import OutputConversionImageToImage, OutputConversionImageToTable, OutputConversionImageToColumn + from ._output_conversion_image import ( + OutputConversionImageToColumn, + OutputConversionImageToImage, + OutputConversionImageToTable, + ) from ._output_conversion_table import OutputConversionTable + from ._pooling2d_layer import AvgPooling2DLayer, MaxPooling2DLayer apipkg.initpkg( __name__, diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index e09162aaf..751aca9fb 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, Literal, Unpack, Any, TypedDict +from typing import TYPE_CHECKING, Any, Literal from safeds.data.image.typing import ImageSize @@ -11,16 +11,48 @@ from safeds.ml.nn._layer import _Layer -def _create_internal_model(input_size: int, output_size: int, kernel_size: int, activation_function: Literal["sigmoid", "relu", "softmax"], padding: int, stride: int, transpose: bool, output_padding: int = 0) -> nn.Module: +def _create_internal_model( + input_size: int, + output_size: int, + kernel_size: int, + activation_function: Literal["sigmoid", "relu", "softmax"], + padding: int, + stride: int, + transpose: bool, + output_padding: int = 0, +) -> nn.Module: from torch import nn class _InternalLayer(nn.Module): - def __init__(self, input_size: int, output_size: int, kernel_size: int, activation_function: Literal["sigmoid", "relu", "softmax"], padding: int, stride: int, transpose: bool, output_padding: int): + def __init__( + self, + input_size: int, + output_size: int, + kernel_size: int, + activation_function: Literal["sigmoid", "relu", "softmax"], + padding: int, + stride: int, + transpose: bool, + output_padding: int, + ): super().__init__() if transpose: - self._layer = nn.ConvTranspose2d(in_channels=input_size, out_channels=output_size, kernel_size=kernel_size, padding=padding, stride=stride, output_padding=output_padding) + self._layer = nn.ConvTranspose2d( + in_channels=input_size, + out_channels=output_size, + kernel_size=kernel_size, + padding=padding, + stride=stride, + output_padding=output_padding, + ) else: - self._layer = nn.Conv2d(in_channels=input_size, out_channels=output_size, kernel_size=kernel_size, padding=padding, stride=stride) + self._layer = nn.Conv2d( + in_channels=input_size, + out_channels=output_size, + kernel_size=kernel_size, + padding=padding, + stride=stride, + ) match activation_function: case "sigmoid": self._fn = nn.Sigmoid() @@ -32,7 +64,9 @@ def __init__(self, input_size: int, output_size: int, kernel_size: int, activati def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) - return _InternalLayer(input_size, output_size, kernel_size, activation_function, padding, stride, transpose, output_padding) + return _InternalLayer( + input_size, output_size, kernel_size, activation_function, padding, stride, transpose, output_padding, + ) class Convolutional2DLayer(_Layer): @@ -58,16 +92,30 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa self._input_size: ImageSize | None = None self._output_size: ImageSize | None = None - def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: if self._input_size is None: - raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") + raise ValueError( + "The input_size is not yet set. The internal layer can only be created when the input_size is set.", + ) if "activation_function" not in kwargs: - raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") + raise ValueError( + "The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.", + ) if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: - raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") + raise ValueError( + f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].", + ) else: activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs["activation_function"] - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=False) + return _create_internal_model( + self._input_size.channel, + self._output_channel, + self._kernel_size, + activation_function, + self._padding, + self._stride, + transpose=False, + ) @property def input_size(self) -> ImageSize: @@ -104,10 +152,16 @@ def output_size(self) -> ImageSize: If the input_size is not yet set """ if self._input_size is None: - raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + raise ValueError( + "The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.", + ) if self._output_size is None: - new_width = math.ceil((self._input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) - new_height = math.ceil((self._input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + new_width = math.ceil( + (self._input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride), + ) + new_height = math.ceil( + (self._input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride), + ) self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) return self._output_size @@ -120,7 +174,9 @@ def _set_input_size(self, input_size: int | ImageSize) -> None: class ConvolutionalTranspose2DLayer(Convolutional2DLayer): - def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0, output_padding: int = 0): + def __init__( + self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0, output_padding: int = 0, + ): """ Create a Convolutional Transpose 2D Layer. @@ -140,23 +196,50 @@ def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, pa super().__init__(output_channel, kernel_size, stride=stride, padding=padding) self._output_padding = output_padding - def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: if self._input_size is None: - raise ValueError("The input_size is not yet set. The internal layer can only be created when the input_size is set.") + raise ValueError( + "The input_size is not yet set. The internal layer can only be created when the input_size is set.", + ) if "activation_function" not in kwargs: - raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") + raise ValueError( + "The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.", + ) if kwargs.get("activation_function") not in ["sigmoid", "relu", "softmax"]: - raise ValueError(f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].") + raise ValueError( + f"The activation_function '{kwargs.get('activation_function')}' is not supported. Please choose one of the following: ['sigmoid', 'relu', 'softmax'].", + ) else: activation_function: Literal["sigmoid", "relu", "softmax"] = kwargs["activation_function"] - return _create_internal_model(self._input_size.channel, self._output_channel, self._kernel_size, activation_function, self._padding, self._stride, transpose=True, output_padding=self._output_padding) + return _create_internal_model( + self._input_size.channel, + self._output_channel, + self._kernel_size, + activation_function, + self._padding, + self._stride, + transpose=True, + output_padding=self._output_padding, + ) @property def output_size(self) -> ImageSize: if self._input_size is None: - raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + raise ValueError( + "The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.", + ) if self._output_size is None: - new_width = (self.input_size.width - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding - new_height = (self.input_size.height - 1) * self._stride - 2 * self._padding + self._kernel_size + self._output_padding + new_width = ( + (self.input_size.width - 1) * self._stride + - 2 * self._padding + + self._kernel_size + + self._output_padding + ) + new_height = ( + (self.input_size.height - 1) * self._stride + - 2 * self._padding + + self._kernel_size + + self._output_padding + ) self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) return self._output_size diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py index 2734c2a4f..855e5829e 100644 --- a/src/safeds/ml/nn/_flatten_layer.py +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Unpack, Any, TypedDict +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from torch import Tensor, nn @@ -68,7 +68,9 @@ def output_size(self) -> int: If the input_size is not yet set """ if self._input_size is None: - raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + raise ValueError( + "The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.", + ) if self._output_size is None: self._output_size = self._input_size.width * self._input_size.height * self._input_size.channel return self._output_size diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index e8b212a84..26be38169 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -62,9 +62,11 @@ def __init__(self, output_size: int, input_size: int | None = None): raise OutOfBoundsError(actual=output_size, name="output_size", lower_bound=ClosedBound(1)) self._output_size = output_size - def _get_internal_layer(self, **kwargs: Any) -> nn.Module: # noqa: ARG002 + def _get_internal_layer(self, **kwargs: Any) -> nn.Module: if "activation_function" not in kwargs: - raise ValueError("The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.") + raise ValueError( + "The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.", + ) else: activation_function: str = kwargs["activation_function"] return _create_internal_model(self._input_size, self._output_size, activation_function) diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index e0b093026..71bfe1a1e 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -1,17 +1,17 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar, Any +from typing import TYPE_CHECKING, Any, Generic, TypeVar if TYPE_CHECKING: from torch.utils.data import DataLoader - from safeds.data.image.typing import ImageSize from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList + from safeds.data.image.typing import ImageSize -from safeds.data.tabular.containers import Table, TimeSeries from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset, TabularDataset +from safeds.data.tabular.containers import Table, TimeSeries FT = TypeVar("FT", TabularDataset, TimeSeries, ImageDataset) PT = TypeVar("PT", Table, TimeSeries, ImageList) @@ -26,7 +26,9 @@ def _data_size(self) -> int | ImageSize: pass # pragma: no cover @abstractmethod - def _data_conversion_fit(self, input_data: FT, batch_size: int, num_of_classes: int = 1) -> DataLoader | ImageDataset: + def _data_conversion_fit( + self, input_data: FT, batch_size: int, num_of_classes: int = 1, + ) -> DataLoader | ImageDataset: pass # pragma: no cover @abstractmethod diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index c8b76eb14..37d4553f5 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Type, Any +from typing import TYPE_CHECKING, Any from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList @@ -31,14 +31,14 @@ def __init__(self, image_size: ImageSize) -> None: self._one_hot_encoder: OneHotEncoder | None = None self._column_name: str | None = None self._column_names: list[str] | None = None - self._output_type: Type | None = None + self._output_type: type | None = None @property def _data_size(self) -> ImageSize: return self._input_size def _data_conversion_fit( - self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1 # noqa: ARG002 + self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1, # noqa: ARG002 ) -> ImageDataset: return input_data @@ -55,7 +55,10 @@ def _is_fit_data_valid(self, input_data: ImageDataset) -> bool: if self._column_name is None and self._one_hot_encoder is None: self._one_hot_encoder = input_data._output._one_hot_encoder self._column_name = input_data._output._column_name - elif self._column_name != input_data._output._column_name or self._one_hot_encoder != input_data._output._one_hot_encoder: + elif ( + self._column_name != input_data._output._column_name + or self._one_hot_encoder != input_data._output._one_hot_encoder + ): return False elif isinstance(input_data._output, _TableAsTensor): if self._column_names is None: @@ -68,4 +71,8 @@ def _is_predict_data_valid(self, input_data: ImageList) -> bool: return isinstance(input_data, _SingleSizeImageList) and input_data.sizes[0] == self._input_size def _get_output_configuration(self) -> dict[str, Any]: - return {"column_names": self._column_names, "column_name": self._column_name, "one_hot_encoder": self._one_hot_encoder} + return { + "column_names": self._column_names, + "column_name": self._column_name, + "one_hot_encoder": self._one_hot_encoder, + } diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 5fada61be..7b08c70d6 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -5,17 +5,24 @@ from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset, TabularDataset -from safeds.data.tabular.containers import Table, TimeSeries, Column +from safeds.data.tabular.containers import Table, TimeSeries from safeds.exceptions import ( ClosedBound, FeatureDataMismatchError, - InvalidModelStructureError, InputSizeError, + InvalidModelStructureError, ModelNotFittedError, OutOfBoundsError, ) -from safeds.ml.nn import InputConversionImage, FlattenLayer, OutputConversionImageToTable, Convolutional2DLayer, \ - ForwardLayer, OutputConversionImageToImage, OutputConversionImageToColumn +from safeds.ml.nn import ( + Convolutional2DLayer, + FlattenLayer, + ForwardLayer, + InputConversionImage, + OutputConversionImageToColumn, + OutputConversionImageToImage, + OutputConversionImageToTable, +) from safeds.ml.nn._output_conversion_image import _OutputConversionImage from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer @@ -24,12 +31,11 @@ from torch import Tensor, nn + from safeds.data.image.typing import ImageSize from safeds.ml.nn._input_conversion import _InputConversion from safeds.ml.nn._layer import _Layer from safeds.ml.nn._output_conversion import _OutputConversion - from safeds.data.image.typing import ImageSize - IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType OT = TypeVar("OT", TabularDataset, TimeSeries, ImageDataset) # OutputType @@ -64,26 +70,42 @@ def __init__( raise InvalidModelStructureError("You need to provide at least one layer to a neural network.") if isinstance(input_conversion, InputConversionImage): if not isinstance(output_conversion, _OutputConversionImage): - raise InvalidModelStructureError("The defined model uses an input conversion for images but no output conversion for images.") - elif isinstance(output_conversion, OutputConversionImageToTable) or isinstance(output_conversion, OutputConversionImageToColumn): - raise InvalidModelStructureError("A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output.") + raise InvalidModelStructureError( + "The defined model uses an input conversion for images but no output conversion for images.", + ) + elif isinstance(output_conversion, OutputConversionImageToColumn | OutputConversionImageToTable): + raise InvalidModelStructureError( + "A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output.", + ) data_dimensions = 2 for layer in layers: - if data_dimensions == 2 and (isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer)): + if data_dimensions == 2 and ( + isinstance(layer, Convolutional2DLayer | _Pooling2DLayer) + ): continue elif data_dimensions == 2 and isinstance(layer, FlattenLayer): data_dimensions = 1 elif data_dimensions == 1 and isinstance(layer, ForwardLayer): continue else: - raise InvalidModelStructureError("The 2-dimensional data has to be flattened before using a 1-dimensional layer." if data_dimensions == 2 else "You cannot use a 2-dimensional layer with 1-dimensional data.") + raise InvalidModelStructureError( + "The 2-dimensional data has to be flattened before using a 1-dimensional layer." + if data_dimensions == 2 + else "You cannot use a 2-dimensional layer with 1-dimensional data.", + ) if data_dimensions == 1 and isinstance(output_conversion, OutputConversionImageToImage): - raise InvalidModelStructureError("The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data.") + raise InvalidModelStructureError( + "The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data.", + ) elif isinstance(output_conversion, _OutputConversionImage): - raise InvalidModelStructureError("The defined model uses an output conversion for images but no input conversion for images.") + raise InvalidModelStructureError( + "The defined model uses an output conversion for images but no input conversion for images.", + ) else: for layer in layers: - if isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer) or isinstance(layer, FlattenLayer): + if ( + isinstance(layer, Convolutional2DLayer | FlattenLayer | _Pooling2DLayer) + ): raise InvalidModelStructureError("You cannot use a 2-dimensional layer with 1-dimensional data.") self._input_conversion: _InputConversion[IFT, IPT] = input_conversion @@ -218,7 +240,9 @@ def predict(self, test_data: IPT) -> OT: for x in dataloader: elem = self._model(x) predictions.append(elem.squeeze(dim=1)) - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration()) + return self._output_conversion._data_conversion( + test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration(), + ) @property def is_fitted(self) -> bool: @@ -257,24 +281,40 @@ def __init__( raise InvalidModelStructureError("A NeuralNetworkClassifier cannot be used with images as output.") elif isinstance(input_conversion, InputConversionImage): if not isinstance(output_conversion, _OutputConversionImage): - raise InvalidModelStructureError("The defined model uses an input conversion for images but no output conversion for images.") + raise InvalidModelStructureError( + "The defined model uses an input conversion for images but no output conversion for images.", + ) data_dimensions = 2 for layer in layers: - if data_dimensions == 2 and (isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer)): + if data_dimensions == 2 and ( + isinstance(layer, Convolutional2DLayer | _Pooling2DLayer) + ): continue elif data_dimensions == 2 and isinstance(layer, FlattenLayer): data_dimensions = 1 elif data_dimensions == 1 and isinstance(layer, ForwardLayer): continue else: - raise InvalidModelStructureError("The 2-dimensional data has to be flattened before using a 1-dimensional layer." if data_dimensions == 2 else "You cannot use a 2-dimensional layer with 1-dimensional data.") - if data_dimensions == 2 and (isinstance(output_conversion, OutputConversionImageToTable) or isinstance(output_conversion, OutputConversionImageToColumn)): - raise InvalidModelStructureError("The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.") + raise InvalidModelStructureError( + "The 2-dimensional data has to be flattened before using a 1-dimensional layer." + if data_dimensions == 2 + else "You cannot use a 2-dimensional layer with 1-dimensional data.", + ) + if data_dimensions == 2 and ( + isinstance(output_conversion, OutputConversionImageToColumn | OutputConversionImageToTable) + ): + raise InvalidModelStructureError( + "The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ) elif isinstance(output_conversion, _OutputConversionImage): - raise InvalidModelStructureError("The defined model uses an output conversion for images but no input conversion for images.") + raise InvalidModelStructureError( + "The defined model uses an output conversion for images but no input conversion for images.", + ) else: for layer in layers: - if isinstance(layer, Convolutional2DLayer) or isinstance(layer, _Pooling2DLayer) or isinstance(layer, FlattenLayer): + if ( + isinstance(layer, Convolutional2DLayer | FlattenLayer | _Pooling2DLayer) + ): raise InvalidModelStructureError("You cannot use a 2-dimensional layer with 1-dimensional data.") self._input_conversion: _InputConversion[IFT, IPT] = input_conversion @@ -283,7 +323,9 @@ def __init__( self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False - self._num_of_classes = layers[-1].output_size if isinstance(layers[-1].output_size, int) else -1 # Is always int but linter doesn't know + self._num_of_classes = ( + layers[-1].output_size if isinstance(layers[-1].output_size, int) else -1 + ) # Is always int but linter doesn't know self._total_number_of_batches_done = 0 self._total_number_of_epochs_done = 0 @@ -420,7 +462,9 @@ def predict(self, test_data: IPT) -> OT: predictions.append(torch.argmax(elem, dim=1)) else: predictions.append(elem.squeeze(dim=1).round()) - return self._output_conversion._data_conversion(test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration()) + return self._output_conversion._data_conversion( + test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration(), + ) @property def is_fitted(self) -> bool: @@ -428,7 +472,9 @@ def is_fitted(self) -> bool: return self._is_fitted -def _create_internal_model(input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], is_for_classification: bool) -> nn.Module: +def _create_internal_model( + input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], is_for_classification: bool, +) -> nn.Module: from torch import nn class _InternalModel(nn.Module): @@ -444,7 +490,7 @@ def __init__(self, layers: list[_Layer], is_for_classification: bool) -> None: layer._set_input_size(previous_output_size) elif isinstance(input_conversion, InputConversionImage): layer._set_input_size(input_conversion._data_size) - if isinstance(layer, FlattenLayer) or isinstance(layer, _Pooling2DLayer): + if isinstance(layer, FlattenLayer | _Pooling2DLayer): internal_layers.append(layer._get_internal_layer()) else: internal_layers.append(layer._get_internal_layer(activation_function="relu")) diff --git a/src/safeds/ml/nn/_output_conversion.py b/src/safeds/ml/nn/_output_conversion.py index ce0845408..fec11a082 100644 --- a/src/safeds/ml/nn/_output_conversion.py +++ b/src/safeds/ml/nn/_output_conversion.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Generic, TypeVar, Any +from typing import TYPE_CHECKING, Any, Generic, TypeVar from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset, TabularDataset diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 937787679..1026745d2 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,20 +1,18 @@ from __future__ import annotations -from abc import ABC from typing import TYPE_CHECKING, Any from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.labeled.containers import ImageDataset -from safeds.data.labeled.containers._image_dataset import _TableAsTensor, _ColumnAsTensor -from safeds.data.tabular.containers import Table, Column +from safeds.data.labeled.containers._image_dataset import _ColumnAsTensor, _TableAsTensor +from safeds.data.tabular.containers import Column, Table if TYPE_CHECKING: - from torch import Tensor, LongTensor - -from safeds.ml.nn._output_conversion import _OutputConversion + from torch import Tensor from safeds.data.tabular.transformation import OneHotEncoder +from safeds.ml.nn._output_conversion import _OutputConversion class _OutputConversionImage: @@ -29,9 +27,13 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 if "column_name" not in kwargs or not isinstance(kwargs.get("column_name"), str): - raise ValueError("The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.") + raise ValueError( + "The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.", + ) if "one_hot_encoder" not in kwargs or not isinstance(kwargs.get("one_hot_encoder"), OneHotEncoder): - raise ValueError("The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.") + raise ValueError( + "The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.", + ) one_hot_encoder: OneHotEncoder = kwargs["one_hot_encoder"] column_name: str = kwargs["column_name"] @@ -56,8 +58,14 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 - if "column_names" not in kwargs or not isinstance(kwargs.get("column_names"), list) and all(isinstance(element, str) for element in kwargs["column_names"]): - raise ValueError("The column_names are not set. The data can only be converted if the column_names are provided as `list[str]` in the kwargs.") + if ( + "column_names" not in kwargs + or not isinstance(kwargs.get("column_names"), list) + and all(isinstance(element, str) for element in kwargs["column_names"]) + ): + raise ValueError( + "The column_names are not set. The data can only be converted if the column_names are provided as `list[str]` in the kwargs.", + ) column_names: list[str] = kwargs["column_names"] output = torch.zeros(len(input_data), len(column_names)) @@ -77,12 +85,16 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: class OutputConversionImageToImage(_OutputConversion[ImageList, ImageDataset], _OutputConversionImage): def _data_conversion( - self, input_data: ImageList, output_data: Tensor, **kwargs: Any # noqa: ARG002 + self, input_data: ImageList, output_data: Tensor, **kwargs: Any, # noqa: ARG002 ) -> ImageDataset[ImageList]: import torch if not isinstance(input_data, _SingleSizeImageList): raise ValueError("The given input ImageList contains images of different sizes.") # noqa: TRY004 - return ImageDataset[ImageList](input_data, _SingleSizeImageList._create_from_tensor((output_data * 255).to(torch.uint8), list( - range(output_data.size(dim=0))))) + return ImageDataset[ImageList]( + input_data, + _SingleSizeImageList._create_from_tensor( + (output_data * 255).to(torch.uint8), list(range(output_data.size(dim=0))), + ), + ) diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 146919139..1f8e67b52 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import TYPE_CHECKING, Literal, Unpack, Any, TypedDict +from typing import TYPE_CHECKING, Any, Literal from safeds.data.image.typing import ImageSize @@ -90,10 +90,16 @@ def output_size(self) -> ImageSize: If the input_size is not yet set """ if self._input_size is None: - raise ValueError("The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.") + raise ValueError( + "The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.", + ) if self._output_size is None: - new_width = math.ceil((self.input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) - new_height = math.ceil((self.input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride)) + new_width = math.ceil( + (self.input_size.width + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride), + ) + new_height = math.ceil( + (self.input_size.height + self._padding * 2 - self._kernel_size + 1) / (1.0 * self._stride), + ) self._output_size = ImageSize(new_width, new_height, self._input_size.channel, _ignore_invalid_channel=True) return self._output_size diff --git a/tests/safeds/data/image/containers/test_image.py b/tests/safeds/data/image/containers/test_image.py index ea4f251a4..2c5334903 100644 --- a/tests/safeds/data/image/containers/test_image.py +++ b/tests/safeds/data/image/containers/test_image.py @@ -3,8 +3,8 @@ from pathlib import Path from tempfile import NamedTemporaryFile -import PIL.Image import numpy as np +import PIL.Image import pytest import torch from safeds.data.image.containers import Image @@ -16,6 +16,8 @@ from tests.helpers import ( device_cuda, + get_devices, + get_devices_ids, grayscale_jpg_id, grayscale_jpg_path, grayscale_png_id, @@ -31,10 +33,11 @@ resolve_resource_path, rgba_png_id, rgba_png_path, + skip_if_device_not_available, white_square_jpg_id, white_square_jpg_path, white_square_png_id, - white_square_png_path, get_devices, get_devices_ids, skip_if_device_not_available, + white_square_png_path, ) diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index 709d34e1f..e0a3a9a73 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -6,8 +6,6 @@ import pytest import torch -from torch import Tensor - from safeds._config import _get_device from safeds.data.image.containers import Image, ImageList from safeds.data.image.containers._empty_image_list import _EmptyImageList @@ -16,6 +14,7 @@ from safeds.data.tabular.containers import Table from safeds.exceptions import DuplicateIndexError, IllegalFormatError, IndexOutOfBoundsError, OutOfBoundsError from syrupy import SnapshotAssertion +from torch import Tensor from tests.helpers import ( grayscale_jpg_path, @@ -155,7 +154,11 @@ def test_from_files(self, resource_path1: str, resource_path2: str, resource_pat assert image_list.channel == expected_channel # Test sizes - assert image_list.sizes == [image1_with_expected_channel.size, image2_with_expected_channel.size, image3_with_expected_channel.size] + assert image_list.sizes == [ + image1_with_expected_channel.size, + image2_with_expected_channel.size, + image3_with_expected_channel.size, + ] # Test number_of_sizes assert image_list.number_of_sizes == len({(image.width, image.height) for image in [image1, image2, image3]}) @@ -454,7 +457,9 @@ class TestFromFiles: def test_from_files_creation(self, resource_path: str | Path, snapshot_png_image_list: SnapshotAssertion) -> None: torch.set_default_device(torch.device("cpu")) image_list = ImageList.from_files(resolve_resource_path(resource_path)) - image_list_returned_filenames, filenames = ImageList.from_files(resolve_resource_path(resource_path), return_filenames=True) + image_list_returned_filenames, filenames = ImageList.from_files( + resolve_resource_path(resource_path), return_filenames=True, + ) assert image_list == snapshot_png_image_list assert image_list == image_list_returned_filenames assert len(image_list) == len(filenames) @@ -1220,7 +1225,7 @@ class TestSingleSizeImageList: "tensor", [ torch.ones(4, 1, 1), - ] + ], ) def test_create_from_tensor_3_dim(self, tensor: Tensor) -> None: expected_tensor = tensor.unsqueeze(dim=1) @@ -1235,7 +1240,7 @@ def test_create_from_tensor_3_dim(self, tensor: Tensor) -> None: "tensor", [ torch.ones(4, 3, 1, 1), - ] + ], ) def test_create_from_tensor_4_dim(self, tensor: Tensor) -> None: image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) @@ -1245,23 +1250,18 @@ def test_create_from_tensor_4_dim(self, tensor: Tensor) -> None: assert image_list.heights[0] == tensor.size(2) assert image_list.channel == tensor.size(1) - @pytest.mark.parametrize( - "tensor", - [ - torch.ones(4, 3, 1, 1, 1), - torch.ones(4, 3) - ], - ids=["5-dim", "2-dim"] - ) + @pytest.mark.parametrize("tensor", [torch.ones(4, 3, 1, 1, 1), torch.ones(4, 3)], ids=["5-dim", "2-dim"]) def test_should_raise_from_invalid_tensor(self, tensor: Tensor) -> None: - with pytest.raises(ValueError, match=rf"Invalid Tensor. This Tensor requires 3 or 4 dimensions but has {tensor.dim()}"): + with pytest.raises( + ValueError, match=rf"Invalid Tensor. This Tensor requires 3 or 4 dimensions but has {tensor.dim()}", + ): _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) @pytest.mark.parametrize( "tensor", [ torch.randn(16, 4, 4), - ] + ], ) def test_get_batch_and_iterate_3_dim(self, tensor: Tensor) -> None: expected_tensor = tensor.unsqueeze(dim=1) @@ -1269,8 +1269,12 @@ def test_get_batch_and_iterate_3_dim(self, tensor: Tensor) -> None: batch_size = math.ceil(expected_tensor.size(0) / 1.999) assert image_list._get_batch(0, batch_size).size(0) == batch_size assert torch.all(torch.eq(image_list._get_batch(0, 1), image_list._get_batch(0))) - assert torch.all(torch.eq(image_list._get_batch(0, batch_size), expected_tensor[:batch_size].to(torch.float32) / 255)) - assert torch.all(torch.eq(image_list._get_batch(1, batch_size), expected_tensor[batch_size:].to(torch.float32) / 255)) + assert torch.all( + torch.eq(image_list._get_batch(0, batch_size), expected_tensor[:batch_size].to(torch.float32) / 255), + ) + assert torch.all( + torch.eq(image_list._get_batch(1, batch_size), expected_tensor[batch_size:].to(torch.float32) / 255), + ) iterate_image_list = iter(image_list) assert iterate_image_list == image_list assert iterate_image_list is not image_list @@ -1286,7 +1290,7 @@ def test_get_batch_and_iterate_3_dim(self, tensor: Tensor) -> None: "tensor", [ torch.randn(16, 4, 4, 4), - ] + ], ) def test_get_batch_and_iterate_4_dim(self, tensor: Tensor) -> None: image_list = _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) diff --git a/tests/safeds/data/image/typing/test_image_size.py b/tests/safeds/data/image/typing/test_image_size.py index c980ea3e6..d54174a2c 100644 --- a/tests/safeds/data/image/typing/test_image_size.py +++ b/tests/safeds/data/image/typing/test_image_size.py @@ -2,23 +2,26 @@ from typing import Any import pytest -from torch.types import Device - from safeds.data.image.containers import Image from safeds.data.image.typing import ImageSize from safeds.exceptions import OutOfBoundsError -from tests.helpers import resolve_resource_path, images_all, images_all_ids, get_devices, get_devices_ids, \ - skip_if_device_not_available, plane_png_path +from torch.types import Device + +from tests.helpers import ( + get_devices, + get_devices_ids, + images_all, + images_all_ids, + plane_png_path, + resolve_resource_path, + skip_if_device_not_available, +) class TestFromImage: @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids()) - @pytest.mark.parametrize( - "resource_path", - images_all(), - ids=images_all_ids() - ) + @pytest.mark.parametrize("resource_path", images_all(), ids=images_all_ids()) def test_should_create(self, resource_path: str, device: Device) -> None: skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) @@ -28,25 +31,11 @@ def test_should_create(self, resource_path: str, device: Device) -> None: class TestEq: - @pytest.mark.parametrize( - ("image_size", "width", "height", "channel"), - [ - ( - ImageSize(1, 2, 3), 1, 2, 3 - ) - ] - ) + @pytest.mark.parametrize(("image_size", "width", "height", "channel"), [(ImageSize(1, 2, 3), 1, 2, 3)]) def test_should_be_equal(self, image_size: ImageSize, width: int, height: int, channel: int) -> None: assert image_size == ImageSize(width, height, channel) - @pytest.mark.parametrize( - ("image_size", "width", "height", "channel"), - [ - ( - ImageSize(1, 2, 3), 3, 2, 1 - ) - ] - ) + @pytest.mark.parametrize(("image_size", "width", "height", "channel"), [(ImageSize(1, 2, 3), 3, 2, 1)]) def test_should_not_be_equal(self, image_size: ImageSize, width: int, height: int, channel: int) -> None: assert image_size != ImageSize(width, height, channel) @@ -56,7 +45,7 @@ def test_should_not_be_equal(self, image_size: ImageSize, width: int, height: in (ImageSize(1, 2, 3), None), (ImageSize(1, 2, 3), Image.from_file(resolve_resource_path(plane_png_path))), ], - ids=["None", "Image"] + ids=["None", "Image"], ) def test_should_be_not_implemented(self, image_size: ImageSize, other: Any) -> None: assert image_size.__eq__(other) is NotImplemented @@ -80,82 +69,52 @@ def test_hash_should_not_be_equal(self) -> None: class TestSizeOf: - @pytest.mark.parametrize( - "image_size", - [ImageSize(1, 2, 3)] - ) + @pytest.mark.parametrize("image_size", [ImageSize(1, 2, 3)]) def test_should_size_be_greater_than_normal_object(self, image_size: ImageSize) -> None: assert sys.getsizeof(image_size) >= sys.getsizeof(0) * 3 class TestStr: - @pytest.mark.parametrize( - "image_size", - [ImageSize(1, 2, 3)] - ) + @pytest.mark.parametrize("image_size", [ImageSize(1, 2, 3)]) def test_should_size_be_greater_than_normal_object(self, image_size: ImageSize) -> None: assert str(image_size) == f"{image_size.width}x{image_size.height}x{image_size.channel} (WxHxC)" class TestProperties: - @pytest.mark.parametrize( - "width", - list(range(1, 5)) - ) - @pytest.mark.parametrize( - "height", - list(range(1, 5)) - ) - @pytest.mark.parametrize( - "channel", - [1, 3, 4] - ) + @pytest.mark.parametrize("width", list(range(1, 5))) + @pytest.mark.parametrize("height", list(range(1, 5))) + @pytest.mark.parametrize("channel", [1, 3, 4]) def test_width_height_channel(self, width: int, height: int, channel: int) -> None: image_size = ImageSize(width, height, channel) assert image_size.width == width assert image_size.height == height assert image_size.channel == channel - @pytest.mark.parametrize( - "channel", - [2, 5, 6] - ) + @pytest.mark.parametrize("channel", [2, 5, 6]) def test_should_ignore_invalid_channel(self, channel: int) -> None: assert ImageSize(1, 1, channel, _ignore_invalid_channel=True).channel == channel class TestErrors: - @pytest.mark.parametrize( - "width", - [-1, 0] - ) + @pytest.mark.parametrize("width", [-1, 0]) def test_should_raise_invalid_width(self, width: int) -> None: with pytest.raises(OutOfBoundsError, match=rf"{width} is not inside \[1, \u221e\)."): ImageSize(width, 1, 1) - @pytest.mark.parametrize( - "height", - [-1, 0] - ) + @pytest.mark.parametrize("height", [-1, 0]) def test_should_raise_invalid_height(self, height: int) -> None: with pytest.raises(OutOfBoundsError, match=rf"{height} is not inside \[1, \u221e\)."): ImageSize(1, height, 1) - @pytest.mark.parametrize( - "channel", - [-1, 0, 2, 5] - ) + @pytest.mark.parametrize("channel", [-1, 0, 2, 5]) def test_should_raise_invalid_channel(self, channel: int) -> None: with pytest.raises(ValueError, match=rf"Channel {channel} is not a valid channel option. Use either 1, 3 or 4"): ImageSize(1, 1, channel) - @pytest.mark.parametrize( - "channel", - [-1, 0] - ) + @pytest.mark.parametrize("channel", [-1, 0]) def test_should_raise_negative_channel_ignore_invalid_channel(self, channel: int) -> None: with pytest.raises(OutOfBoundsError, match=rf"channel \(={channel}\) is not inside \[1, \u221e\)."): ImageSize(1, 1, channel, _ignore_invalid_channel=True) diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index b3a1d6d2f..c3437ba55 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -1,21 +1,25 @@ import math -from typing import Type, TypeVar +from typing import TypeVar import pytest import torch -from torch import Tensor - from safeds.data.image.containers import ImageList from safeds.data.image.containers._empty_image_list import _EmptyImageList from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.labeled.containers import ImageDataset -from safeds.data.labeled.containers._image_dataset import _TableAsTensor, _ColumnAsTensor +from safeds.data.labeled.containers._image_dataset import _ColumnAsTensor, _TableAsTensor from safeds.data.tabular.containers import Column, Table from safeds.data.tabular.transformation import OneHotEncoder -from safeds.exceptions import OutputLengthMismatchError, NonNumericColumnError, IndexOutOfBoundsError, OutOfBoundsError, \ - TransformerNotFittedError -from tests.helpers import resolve_resource_path, plane_png_path, white_square_png_path, images_all +from safeds.exceptions import ( + IndexOutOfBoundsError, + NonNumericColumnError, + OutOfBoundsError, + OutputLengthMismatchError, + TransformerNotFittedError, +) +from torch import Tensor +from tests.helpers import images_all, plane_png_path, resolve_resource_path, white_square_png_path T = TypeVar("T", Column, Table, ImageList) @@ -25,19 +29,66 @@ class TestImageDatasetInit: @pytest.mark.parametrize( ("input_data", "output_data", "error", "error_msg"), [ - (_MultiSizeImageList(), Table(), ValueError, r"The given input ImageList contains images of different sizes."), + ( + _MultiSizeImageList(), + Table(), + ValueError, + r"The given input ImageList contains images of different sizes.", + ), (_EmptyImageList(), Table(), ValueError, r"The given input ImageList contains no images."), - (ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])), ImageList.from_files(resolve_resource_path([plane_png_path, white_square_png_path])), ValueError, r"The given output ImageList contains images of different sizes."), - (ImageList.from_files(resolve_resource_path(plane_png_path)), _EmptyImageList(), OutputLengthMismatchError, r"The length of the output container differs"), - (ImageList.from_files(resolve_resource_path(plane_png_path)), Table(), OutputLengthMismatchError, r"The length of the output container differs"), - (ImageList.from_files(resolve_resource_path(plane_png_path)), Column("column", [1, 2]), OutputLengthMismatchError, r"The length of the output container differs"), - (ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])), OutputLengthMismatchError, r"The length of the output container differs"), - (ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"a": ["1"]}), NonNumericColumnError, r"Tried to do a numerical operation on one or multiple non-numerical columns: \nColumns \['a'\] are not numerical."), - (ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"a": [2]}), ValueError, r"Columns \['a'\] have values outside of the interval \[0, 1\]."), - (ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"a": [-1]}), ValueError, r"Columns \['a'\] have values outside of the interval \[0, 1\]."), - ] + ( + ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])), + ImageList.from_files(resolve_resource_path([plane_png_path, white_square_png_path])), + ValueError, + r"The given output ImageList contains images of different sizes.", + ), + ( + ImageList.from_files(resolve_resource_path(plane_png_path)), + _EmptyImageList(), + OutputLengthMismatchError, + r"The length of the output container differs", + ), + ( + ImageList.from_files(resolve_resource_path(plane_png_path)), + Table(), + OutputLengthMismatchError, + r"The length of the output container differs", + ), + ( + ImageList.from_files(resolve_resource_path(plane_png_path)), + Column("column", [1, 2]), + OutputLengthMismatchError, + r"The length of the output container differs", + ), + ( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])), + OutputLengthMismatchError, + r"The length of the output container differs", + ), + ( + ImageList.from_files(resolve_resource_path(plane_png_path)), + Table({"a": ["1"]}), + NonNumericColumnError, + r"Tried to do a numerical operation on one or multiple non-numerical columns: \nColumns \['a'\] are not numerical.", + ), + ( + ImageList.from_files(resolve_resource_path(plane_png_path)), + Table({"a": [2]}), + ValueError, + r"Columns \['a'\] have values outside of the interval \[0, 1\].", + ), + ( + ImageList.from_files(resolve_resource_path(plane_png_path)), + Table({"a": [-1]}), + ValueError, + r"Columns \['a'\] have values outside of the interval \[0, 1\].", + ), + ], ) - def test_should_raise_with_invalid_data(self, input_data: ImageList, output_data: T, error: Type[Exception], error_msg: str) -> None: + def test_should_raise_with_invalid_data( + self, input_data: ImageList, output_data: T, error: type[Exception], error_msg: str, + ) -> None: with pytest.raises(error, match=error_msg): ImageDataset(input_data, output_data) @@ -72,7 +123,7 @@ class TestBatch: (2, math.ceil(len(images_all()) / 2)), (3, math.ceil(len(images_all()) / 3)), (4, math.ceil(len(images_all()) / 4)), - ] + ], ) def test_should_raise_index_out_of_bounds_error(self, batch_number: int, batch_size: int) -> None: image_list = ImageList.from_files(resolve_resource_path(images_all())).resize(10, 10) @@ -90,7 +141,10 @@ def test_should_raise_out_of_bounds_error(self) -> None: class TestTableAsTensor: def test_should_raise_if_not_one_hot_encoded(self) -> None: - with pytest.raises(ValueError, match=r"The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1."): + with pytest.raises( + ValueError, + match=r"The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.", + ): _TableAsTensor(Table({"a": [0.2, 0.2, 0.2, 0.3, 0.2]})) @pytest.mark.parametrize( @@ -99,7 +153,7 @@ def test_should_raise_if_not_one_hot_encoded(self) -> None: (torch.randn(10), r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 1."), (torch.randn(10, 10, 10), r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 3."), (torch.randn(10, 10), r"Tensor and column_names have different amounts of classes \(10!=2\)."), - ] + ], ) def test_should_raise_from_tensor(self, tensor: Tensor, error_msg: str) -> None: with pytest.raises(ValueError, match=error_msg): @@ -111,12 +165,29 @@ class TestColumnAsTensor: @pytest.mark.parametrize( ("tensor", "one_hot_encoder", "error", "error_msg"), [ - (torch.randn(10), OneHotEncoder(), ValueError, r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 1."), - (torch.randn(10, 10, 10), OneHotEncoder(), ValueError, r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 3."), + ( + torch.randn(10), + OneHotEncoder(), + ValueError, + r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 1.", + ), + ( + torch.randn(10, 10, 10), + OneHotEncoder(), + ValueError, + r"Tensor has an invalid amount of dimensions. Needed 2 dimensions but got 3.", + ), (torch.randn(10, 10), OneHotEncoder(), TransformerNotFittedError, r""), - (torch.randn(10, 10), OneHotEncoder().fit(Table({"b": ["a", "b", "c"]}), None), ValueError, r"Tensor and one_hot_encoder have different amounts of classes \(10!=3\)."), - ] + ( + torch.randn(10, 10), + OneHotEncoder().fit(Table({"b": ["a", "b", "c"]}), None), + ValueError, + r"Tensor and one_hot_encoder have different amounts of classes \(10!=3\).", + ), + ], ) - def test_should_raise_from_tensor(self, tensor: Tensor, one_hot_encoder: OneHotEncoder, error: Type[Exception], error_msg: str) -> None: + def test_should_raise_from_tensor( + self, tensor: Tensor, one_hot_encoder: OneHotEncoder, error: type[Exception], error_msg: str, + ) -> None: with pytest.raises(error, match=error_msg): _ColumnAsTensor._from_tensor(tensor, "a", one_hot_encoder) diff --git a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py index a36d93f05..06934452f 100644 --- a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py +++ b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py @@ -26,7 +26,7 @@ def test_should_be_equal(self) -> None: [ (Table({"a": ["a", "b", "c"], "b": ["a", "b", "c"]}), Table({"a": ["a", "b", "c"], "aa": ["a", "b", "c"]})), (Table({"a": ["a", "b", "c"], "b": ["a", "b", "c"]}), Table({"a": ["a", "b", "c"], "b": ["a", "b", "d"]})), - ] + ], ) def test_should_be_not_equal(self, table1: Table, table2: Table) -> None: assert OneHotEncoder().fit(table1, None) != OneHotEncoder().fit(table2, None) diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index d3c905890..4e92d3687 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -3,18 +3,27 @@ import pytest import torch -from syrupy import SnapshotAssertion -from torch.types import Device - from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset -from safeds.data.tabular.containers import Table, Column +from safeds.data.tabular.containers import Column, Table from safeds.data.tabular.transformation import OneHotEncoder -from safeds.ml.nn import NeuralNetworkClassifier, InputConversionImage, Convolutional2DLayer, MaxPooling2DLayer, \ - FlattenLayer, ForwardLayer, OutputConversionImageToTable, ConvolutionalTranspose2DLayer, NeuralNetworkRegressor, \ - AvgPooling2DLayer +from safeds.ml.nn import ( + AvgPooling2DLayer, + Convolutional2DLayer, + ConvolutionalTranspose2DLayer, + FlattenLayer, + ForwardLayer, + InputConversionImage, + MaxPooling2DLayer, + NeuralNetworkClassifier, + NeuralNetworkRegressor, + OutputConversionImageToTable, +) from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn, OutputConversionImageToImage -from tests.helpers import resolve_resource_path, images_all, device_cuda, device_cpu, skip_if_device_not_available +from syrupy import SnapshotAssertion +from torch.types import Device + +from tests.helpers import device_cpu, device_cuda, images_all, resolve_resource_path, skip_if_device_not_available if TYPE_CHECKING: from safeds.ml.nn._layer import _Layer @@ -25,14 +34,36 @@ class TestImageToTableClassifier: @pytest.mark.parametrize( ("seed", "device", "layer_3_bias", "prediction_label"), [ - (1234, device_cuda, [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185], ["grayscale"] * 7), - (4711, device_cuda, [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957], ["white_square"] * 7), - (1234, device_cpu, [-0.6926110982894897, 0.33004942536354065, -0.32962560653686523, 0.5768553614616394], ["grayscale"] * 7), - (4711, device_cpu, [-0.9051575660705566, -0.8625037670135498, 0.24682046473026276, -0.2612163722515106], ["white_square"] * 7), + ( + 1234, + device_cuda, + [0.5809096097946167, -0.32418742775917053, 0.026058292016386986, 0.5801554918289185], + ["grayscale"] * 7, + ), + ( + 4711, + device_cuda, + [-0.8114155530929565, -0.9443624019622803, 0.8557258248329163, -0.848240852355957], + ["white_square"] * 7, + ), + ( + 1234, + device_cpu, + [-0.6926110982894897, 0.33004942536354065, -0.32962560653686523, 0.5768553614616394], + ["grayscale"] * 7, + ), + ( + 4711, + device_cpu, + [-0.9051575660705566, -0.8625037670135498, 0.24682046473026276, -0.2612163722515106], + ["white_square"] * 7, + ), ], - ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] + ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"], ) - def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device) -> None: + def test_should_train_and_predict_model( + self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device, + ) -> None: skip_if_device_not_available(device) torch.set_default_device(device) torch.manual_seed(seed) @@ -49,14 +80,10 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa image_classes_one_hot_encoded = one_hot_encoder.transform(image_classes) image_dataset = ImageDataset(image_list, image_classes_one_hot_encoded) num_of_classes: int = image_dataset.output_size if isinstance(image_dataset.output_size, int) else 0 - layers = [ - Convolutional2DLayer(1, 2), - MaxPooling2DLayer(10), - FlattenLayer(), - ForwardLayer(num_of_classes) - ] - nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, - OutputConversionImageToTable()) + layers = [Convolutional2DLayer(1, 2), MaxPooling2DLayer(10), FlattenLayer(), ForwardLayer(num_of_classes)] + nn_original = NeuralNetworkClassifier( + InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToTable(), + ) nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias @@ -69,14 +96,36 @@ class TestImageToColumnClassifier: @pytest.mark.parametrize( ("seed", "device", "layer_3_bias", "prediction_label"), [ - (1234, device_cuda, [0.5805736780166626, -0.32432740926742554, 0.02629312314093113, 0.5803964138031006], ["grayscale"] * 7), - (4711, device_cuda, [-0.8114045262336731, -0.9443488717079163, 0.8557113409042358, -0.8482510447502136], ["white_square"] * 7), - (1234, device_cpu, [-0.69260174036026, 0.33002084493637085, -0.32964015007019043, 0.5768893957138062], ["grayscale"] * 7), - (4711, device_cpu, [-0.9051562547683716, -0.8625034093856812, 0.24682027101516724, -0.26121777296066284], ["white_square"] * 7), + ( + 1234, + device_cuda, + [0.5805736780166626, -0.32432740926742554, 0.02629312314093113, 0.5803964138031006], + ["grayscale"] * 7, + ), + ( + 4711, + device_cuda, + [-0.8114045262336731, -0.9443488717079163, 0.8557113409042358, -0.8482510447502136], + ["white_square"] * 7, + ), + ( + 1234, + device_cpu, + [-0.69260174036026, 0.33002084493637085, -0.32964015007019043, 0.5768893957138062], + ["grayscale"] * 7, + ), + ( + 4711, + device_cpu, + [-0.9051562547683716, -0.8625034093856812, 0.24682027101516724, -0.26121777296066284], + ["white_square"] * 7, + ), ], - ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] + ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"], ) - def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device) -> None: + def test_should_train_and_predict_model( + self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device, + ) -> None: skip_if_device_not_available(device) torch.set_default_device(device) torch.manual_seed(seed) @@ -92,14 +141,10 @@ def test_should_train_and_predict_model(self, seed: int, layer_3_bias: list[floa image_dataset = ImageDataset(image_list, image_classes, shuffle=True) num_of_classes: int = image_dataset.output_size if isinstance(image_dataset.output_size, int) else 0 - layers = [ - Convolutional2DLayer(1, 2), - AvgPooling2DLayer(10), - FlattenLayer(), - ForwardLayer(num_of_classes) - ] - nn_original = NeuralNetworkClassifier(InputConversionImage(image_dataset.input_size), layers, - OutputConversionImageToColumn()) + layers = [Convolutional2DLayer(1, 2), AvgPooling2DLayer(10), FlattenLayer(), ForwardLayer(num_of_classes)] + nn_original = NeuralNetworkClassifier( + InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToColumn(), + ) nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias @@ -117,9 +162,11 @@ class TestImageToImageRegressor: (1234, device_cpu, [-0.1637762188911438, 0.02012808807194233, -0.22295698523521423, 0.1689515858888626]), (4711, device_cpu, [-0.030541712418198586, -0.15364733338356018, 0.1741572618484497, 0.015837203711271286]), ], - ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"] + ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"], ) - def test_should_train_and_predict_model(self, seed: int, snapshot_png_image_list: SnapshotAssertion, layer_3_bias: list[float], device: Device) -> None: + def test_should_train_and_predict_model( + self, seed: int, snapshot_png_image_list: SnapshotAssertion, layer_3_bias: list[float], device: Device, + ) -> None: skip_if_device_not_available(device) torch.set_default_device(device) torch.manual_seed(seed) @@ -135,8 +182,9 @@ def test_should_train_and_predict_model(self, seed: int, snapshot_png_image_list ConvolutionalTranspose2DLayer(6, 2), ConvolutionalTranspose2DLayer(4, 2), ] - nn_original = NeuralNetworkRegressor(InputConversionImage(image_dataset.input_size), layers, - OutputConversionImageToImage()) + nn_original = NeuralNetworkRegressor( + InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToImage(), + ) nn = nn_original.fit(image_dataset, epoch_size=20) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) assert nn._model.state_dict()["_pytorch_layers.3._layer.bias"].tolist() == layer_3_bias diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index c76625bd6..60509e3b8 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -1,30 +1,48 @@ -from typing import Literal, Type +from typing import Literal import pytest -from torch import nn - from safeds.data.image.typing import ImageSize from safeds.ml.nn import Convolutional2DLayer, ConvolutionalTranspose2DLayer +from torch import nn class TestConvolutional2DLayer: @pytest.mark.parametrize( ("activation_function", "activation_layer"), - [ - ("sigmoid", nn.Sigmoid), - ("relu", nn.ReLU), - ("softmax", nn.Softmax) - ], + [("sigmoid", nn.Sigmoid), ("relu", nn.ReLU), ("softmax", nn.Softmax)], ) @pytest.mark.parametrize( - ("conv_type", "torch_layer", "output_channel", "kernel_size", "stride", "padding", "out_channel", "out_width", "out_height"), + ( + "conv_type", + "torch_layer", + "output_channel", + "kernel_size", + "stride", + "padding", + "out_channel", + "out_width", + "out_height", + ), [ (Convolutional2DLayer, nn.Conv2d, 30, 2, 2, 2, 30, 7, 12), (ConvolutionalTranspose2DLayer, nn.ConvTranspose2d, 30, 2, 2, 2, 30, 16, 36), ], ) - def test_should_create_pooling_layer(self, activation_function: Literal["sigmoid", "relu", "softmax"], activation_layer: Type[nn.Module], conv_type: Type[Convolutional2DLayer], torch_layer: Type[nn.Module], output_channel: int, kernel_size: int, stride: int, padding: int, out_channel: int, out_width: int, out_height: int) -> None: + def test_should_create_pooling_layer( + self, + activation_function: Literal["sigmoid", "relu", "softmax"], + activation_layer: type[nn.Module], + conv_type: type[Convolutional2DLayer], + torch_layer: type[nn.Module], + output_channel: int, + kernel_size: int, + stride: int, + padding: int, + out_channel: int, + out_width: int, + out_height: int, + ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) layer._set_input_size(input_size) @@ -49,13 +67,27 @@ def test_should_create_pooling_layer(self, activation_function: Literal["sigmoid (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), ], ) - def test_should_raise_if_input_size_not_set(self, activation_function: Literal["sigmoid", "relu", "softmax"], conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: + def test_should_raise_if_input_size_not_set( + self, + activation_function: Literal["sigmoid", "relu", "softmax"], + conv_type: type[Convolutional2DLayer], + output_channel: int, + kernel_size: int, + stride: int, + padding: int, + ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) with pytest.raises(ValueError, match=r"The input_size is not yet set."): layer.input_size # noqa: B018 - with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): + with pytest.raises( + ValueError, + match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.", + ): layer.output_size # noqa: B018 - with pytest.raises(ValueError, match=r"The input_size is not yet set. The internal layer can only be created when the input_size is set."): + with pytest.raises( + ValueError, + match=r"The input_size is not yet set. The internal layer can only be created when the input_size is set.", + ): layer._get_internal_layer(activation_function=activation_function) @pytest.mark.parametrize( @@ -65,11 +97,16 @@ def test_should_raise_if_input_size_not_set(self, activation_function: Literal[" (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), ], ) - def test_should_raise_if_activation_function_not_set(self, conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: + def test_should_raise_if_activation_function_not_set( + self, conv_type: type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int, + ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) layer._set_input_size(input_size) - with pytest.raises(ValueError, match=r"The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs."): + with pytest.raises( + ValueError, + match=r"The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.", + ): layer._get_internal_layer() @pytest.mark.parametrize( @@ -79,11 +116,16 @@ def test_should_raise_if_activation_function_not_set(self, conv_type: Type[Convo (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), ], ) - def test_should_raise_if_unsupported_activation_function_is_set(self, conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: + def test_should_raise_if_unsupported_activation_function_is_set( + self, conv_type: type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int, + ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) layer._set_input_size(input_size) - with pytest.raises(ValueError, match=r"The activation_function 'unknown' is not supported. Please choose one of the following: \['sigmoid', 'relu', 'softmax'\]."): + with pytest.raises( + ValueError, + match=r"The activation_function 'unknown' is not supported. Please choose one of the following: \['sigmoid', 'relu', 'softmax'\].", + ): layer._get_internal_layer(activation_function="unknown") @pytest.mark.parametrize( @@ -93,7 +135,9 @@ def test_should_raise_if_unsupported_activation_function_is_set(self, conv_type: (ConvolutionalTranspose2DLayer, 30, 2, 2, 2), ], ) - def test_should_raise_if_input_size_is_set_with_int(self, conv_type: Type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int) -> None: + def test_should_raise_if_input_size_is_set_with_int( + self, conv_type: type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int, + ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) with pytest.raises(TypeError, match=r"The input_size of a convolution layer has to be of type ImageSize."): layer._set_input_size(1) diff --git a/tests/safeds/ml/nn/test_flatten_layer.py b/tests/safeds/ml/nn/test_flatten_layer.py index f95476341..a5d679383 100644 --- a/tests/safeds/ml/nn/test_flatten_layer.py +++ b/tests/safeds/ml/nn/test_flatten_layer.py @@ -1,8 +1,7 @@ import pytest -from torch import nn - from safeds.data.image.typing import ImageSize from safeds.ml.nn import FlattenLayer +from torch import nn class TestFlattenLayer: @@ -19,7 +18,10 @@ def test_should_raise_if_input_size_not_set(self) -> None: layer = FlattenLayer() with pytest.raises(ValueError, match=r"The input_size is not yet set."): layer.input_size # noqa: B018 - with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): + with pytest.raises( + ValueError, + match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.", + ): layer.output_size # noqa: B018 def test_should_raise_if_input_size_is_set_with_int(self) -> None: diff --git a/tests/safeds/ml/nn/test_forward_layer.py b/tests/safeds/ml/nn/test_forward_layer.py index 9fd8c2ece..28791c22c 100644 --- a/tests/safeds/ml/nn/test_forward_layer.py +++ b/tests/safeds/ml/nn/test_forward_layer.py @@ -2,11 +2,10 @@ from typing import Any import pytest -from torch import nn - from safeds.data.image.typing import ImageSize from safeds.exceptions import OutOfBoundsError from safeds.ml.nn import ForwardLayer +from torch import nn @pytest.mark.parametrize( @@ -47,8 +46,14 @@ def test_should_raise_if_input_size_doesnt_match(input_size: int) -> None: ids=["sigmoid", "relu", "softmax", "none"], ) def test_should_accept_activation_function(activation_function: str, expected_activation_function: type | None) -> None: - forward_layer = ForwardLayer(output_size=1, input_size=1)._get_internal_layer(activation_function=activation_function) - assert forward_layer._fn is None if expected_activation_function is None else isinstance(forward_layer._fn, expected_activation_function) + forward_layer = ForwardLayer(output_size=1, input_size=1)._get_internal_layer( + activation_function=activation_function, + ) + assert ( + forward_layer._fn is None + if expected_activation_function is None + else isinstance(forward_layer._fn, expected_activation_function) + ) @pytest.mark.parametrize( @@ -101,7 +106,10 @@ def test_should_raise_if_input_size_is_set_with_image_size() -> None: def test_should_raise_if_activation_function_not_set() -> None: layer = ForwardLayer(1) - with pytest.raises(ValueError, match=r"The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs."): + with pytest.raises( + ValueError, + match=r"The activation_function is not set. The internal layer can only be created when the activation_function is provided in the kwargs.", + ): layer._get_internal_layer() diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py index 33ca77e70..d1da75ab0 100644 --- a/tests/safeds/ml/nn/test_input_conversion_image.py +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -1,10 +1,10 @@ import pytest - from safeds.data.image.containers import ImageList from safeds.data.labeled.containers import ImageDataset from safeds.data.tabular.containers import Column, Table from safeds.ml.nn import InputConversionImage -from tests.helpers import resolve_resource_path, images_all + +from tests.helpers import images_all, resolve_resource_path _test_image_list = ImageList.from_files(resolve_resource_path(images_all())).resize(10, 10) @@ -14,22 +14,58 @@ class TestIsFitDataValid: @pytest.mark.parametrize( ("image_dataset_valid", "image_dataset_invalid"), [ - (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, _test_image_list)), - (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list, _test_image_list)), - (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, Column("images", images_all()))), - (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}))), - (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list.resize(20, 20), Column("images", images_all()))), - (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, Column("ims", images_all()))), - (ImageDataset(_test_image_list, Column("images", images_all())), ImageDataset(_test_image_list, Column("images", [s + "10" for s in images_all()]))), - (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list.resize(20, 20), Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}))), - (ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset(_test_image_list, Table({"b": [0, 0, 1, 1, 0, 1, 0], "c": [1, 1, 0, 0, 1, 0, 1]}))), - (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list.resize(20, 20), _test_image_list)), - (ImageDataset(_test_image_list, _test_image_list), ImageDataset(_test_image_list, _test_image_list.resize(20, 20))), - ] + ( + ImageDataset(_test_image_list, Column("images", images_all())), + ImageDataset(_test_image_list, _test_image_list), + ), + ( + ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), + ImageDataset(_test_image_list, _test_image_list), + ), + ( + ImageDataset(_test_image_list, _test_image_list), + ImageDataset(_test_image_list, Column("images", images_all())), + ), + ( + ImageDataset(_test_image_list, _test_image_list), + ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), + ), + ( + ImageDataset(_test_image_list, Column("images", images_all())), + ImageDataset(_test_image_list.resize(20, 20), Column("images", images_all())), + ), + ( + ImageDataset(_test_image_list, Column("images", images_all())), + ImageDataset(_test_image_list, Column("ims", images_all())), + ), + ( + ImageDataset(_test_image_list, Column("images", images_all())), + ImageDataset(_test_image_list, Column("images", [s + "10" for s in images_all()])), + ), + ( + ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), + ImageDataset( + _test_image_list.resize(20, 20), Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}), + ), + ), + ( + ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), + ImageDataset(_test_image_list, Table({"b": [0, 0, 1, 1, 0, 1, 0], "c": [1, 1, 0, 0, 1, 0, 1]})), + ), + ( + ImageDataset(_test_image_list, _test_image_list), + ImageDataset(_test_image_list.resize(20, 20), _test_image_list), + ), + ( + ImageDataset(_test_image_list, _test_image_list), + ImageDataset(_test_image_list, _test_image_list.resize(20, 20)), + ), + ], ) - def test_should_return_false_if_fit_data_is_invalid(self, image_dataset_valid: ImageDataset, image_dataset_invalid: ImageDataset) -> None: + def test_should_return_false_if_fit_data_is_invalid( + self, image_dataset_valid: ImageDataset, image_dataset_invalid: ImageDataset, + ) -> None: input_conversion = InputConversionImage(image_dataset_valid.input_size) assert input_conversion._is_fit_data_valid(image_dataset_valid) assert input_conversion._is_fit_data_valid(image_dataset_valid) assert not input_conversion._is_fit_data_valid(image_dataset_invalid) - diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 5f4008846..582965218 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,17 +1,28 @@ import pytest - from safeds.data.image.typing import ImageSize from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table -from safeds.exceptions import FeatureDataMismatchError, InputSizeError, ModelNotFittedError, OutOfBoundsError, \ - InvalidModelStructureError +from safeds.exceptions import ( + FeatureDataMismatchError, + InputSizeError, + InvalidModelStructureError, + ModelNotFittedError, + OutOfBoundsError, +) from safeds.ml.nn import ( + AvgPooling2DLayer, + Convolutional2DLayer, + ConvolutionalTranspose2DLayer, + FlattenLayer, ForwardLayer, + InputConversionImage, InputConversionTable, + MaxPooling2DLayer, NeuralNetworkClassifier, NeuralNetworkRegressor, - OutputConversionTable, OutputConversionImageToTable, OutputConversionImageToImage, Convolutional2DLayer, - ConvolutionalTranspose2DLayer, MaxPooling2DLayer, AvgPooling2DLayer, FlattenLayer, InputConversionImage, + OutputConversionImageToImage, + OutputConversionImageToTable, + OutputConversionTable, ) from safeds.ml.nn._input_conversion import _InputConversion from safeds.ml.nn._layer import _Layer @@ -240,40 +251,201 @@ def callback_was_called(self) -> bool: @pytest.mark.parametrize( ("input_conversion", "layers", "output_conversion", "error_msg"), [ - (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToTable(), r"The defined model uses an output conversion for images but no input conversion for images."), - (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToColumn(), r"The defined model uses an output conversion for images but no input conversion for images."), - (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToImage(), r"A NeuralNetworkClassifier cannot be used with images as output."), - (InputConversionTable([], ""), [Convolutional2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [MaxPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [AvgPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [FlattenLayer()], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionTable(), r"The defined model uses an input conversion for images but no output conversion for images."), - (InputConversionImage(ImageSize(1, 1, 1)), [Convolutional2DLayer(1, 1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [Convolutional2DLayer(1, 1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [MaxPooling2DLayer(1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [MaxPooling2DLayer(1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [AvgPooling2DLayer(1)], OutputConversionImageToTable(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [AvgPooling2DLayer(1)], OutputConversionImageToColumn(), r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), Convolutional2DLayer(1, 1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), Convolutional2DLayer(1, 1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), MaxPooling2DLayer(1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), MaxPooling2DLayer(1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), AvgPooling2DLayer(1)], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), AvgPooling2DLayer(1)], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), FlattenLayer()], OutputConversionImageToTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), FlattenLayer()], OutputConversionImageToColumn(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [ForwardLayer(1)], OutputConversionImageToTable(), r"The 2-dimensional data has to be flattened before using a 1-dimensional layer."), - (InputConversionImage(ImageSize(1, 1, 1)), [ForwardLayer(1)], OutputConversionImageToColumn(), r"The 2-dimensional data has to be flattened before using a 1-dimensional layer."), - (InputConversionImage(ImageSize(1, 1, 1)), [], OutputConversionImageToTable(), r"You need to provide at least one layer to a neural network."), - (InputConversionImage(ImageSize(1, 1, 1)), [], OutputConversionImageToColumn(), r"You need to provide at least one layer to a neural network."), - ] + ( + InputConversionTable([], ""), + [FlattenLayer()], + OutputConversionImageToTable(), + r"The defined model uses an output conversion for images but no input conversion for images.", + ), + ( + InputConversionTable([], ""), + [FlattenLayer()], + OutputConversionImageToColumn(), + r"The defined model uses an output conversion for images but no input conversion for images.", + ), + ( + InputConversionTable([], ""), + [FlattenLayer()], + OutputConversionImageToImage(), + r"A NeuralNetworkClassifier cannot be used with images as output.", + ), + ( + InputConversionTable([], ""), + [Convolutional2DLayer(1, 1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [ConvolutionalTranspose2DLayer(1, 1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [MaxPooling2DLayer(1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [AvgPooling2DLayer(1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [FlattenLayer()], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer()], + OutputConversionTable(), + r"The defined model uses an input conversion for images but no output conversion for images.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [Convolutional2DLayer(1, 1)], + OutputConversionImageToTable(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [Convolutional2DLayer(1, 1)], + OutputConversionImageToColumn(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [ConvolutionalTranspose2DLayer(1, 1)], + OutputConversionImageToTable(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [ConvolutionalTranspose2DLayer(1, 1)], + OutputConversionImageToColumn(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [MaxPooling2DLayer(1)], + OutputConversionImageToTable(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [MaxPooling2DLayer(1)], + OutputConversionImageToColumn(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [AvgPooling2DLayer(1)], + OutputConversionImageToTable(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [AvgPooling2DLayer(1)], + OutputConversionImageToColumn(), + r"The output data would be 2-dimensional but the provided output conversion uses 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), Convolutional2DLayer(1, 1)], + OutputConversionImageToTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), Convolutional2DLayer(1, 1)], + OutputConversionImageToColumn(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], + OutputConversionImageToTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], + OutputConversionImageToColumn(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), MaxPooling2DLayer(1)], + OutputConversionImageToTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), MaxPooling2DLayer(1)], + OutputConversionImageToColumn(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), AvgPooling2DLayer(1)], + OutputConversionImageToTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), AvgPooling2DLayer(1)], + OutputConversionImageToColumn(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), FlattenLayer()], + OutputConversionImageToTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), FlattenLayer()], + OutputConversionImageToColumn(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [ForwardLayer(1)], + OutputConversionImageToTable(), + r"The 2-dimensional data has to be flattened before using a 1-dimensional layer.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [ForwardLayer(1)], + OutputConversionImageToColumn(), + r"The 2-dimensional data has to be flattened before using a 1-dimensional layer.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [], + OutputConversionImageToTable(), + r"You need to provide at least one layer to a neural network.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [], + OutputConversionImageToColumn(), + r"You need to provide at least one layer to a neural network.", + ), + ], ) - def test_should_raise_if_model_has_invalid_structure(self, input_conversion: _InputConversion, layers: list[_Layer], output_conversion: _OutputConversion, error_msg: str) -> None: + def test_should_raise_if_model_has_invalid_structure( + self, + input_conversion: _InputConversion, + layers: list[_Layer], + output_conversion: _OutputConversion, + error_msg: str, + ) -> None: with pytest.raises(InvalidModelStructureError, match=error_msg): NeuralNetworkClassifier(input_conversion, layers, output_conversion) @@ -473,26 +645,122 @@ def callback_was_called(self) -> bool: @pytest.mark.parametrize( ("input_conversion", "layers", "output_conversion", "error_msg"), [ - (InputConversionTable([], ""), [FlattenLayer()], OutputConversionImageToImage(), r"The defined model uses an output conversion for images but no input conversion for images."), - (InputConversionTable([], ""), [Convolutional2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [ConvolutionalTranspose2DLayer(1, 1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [MaxPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [AvgPooling2DLayer(1)], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionTable([], ""), [FlattenLayer()], OutputConversionTable(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionTable(), r"The defined model uses an input conversion for images but no output conversion for images."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionImageToImage(), r"The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ForwardLayer(1)], OutputConversionImageToImage(), r"The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), Convolutional2DLayer(1, 1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), MaxPooling2DLayer(1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), AvgPooling2DLayer(1)], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer(), FlattenLayer()], OutputConversionImageToImage(), r"You cannot use a 2-dimensional layer with 1-dimensional data."), - (InputConversionImage(ImageSize(1, 1, 1)), [ForwardLayer(1)], OutputConversionImageToImage(), r"The 2-dimensional data has to be flattened before using a 1-dimensional layer."), - (InputConversionImage(ImageSize(1, 1, 1)), [], OutputConversionImageToImage(), r"You need to provide at least one layer to a neural network."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionImageToTable(), r"A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output."), - (InputConversionImage(ImageSize(1, 1, 1)), [FlattenLayer()], OutputConversionImageToColumn(), r"A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output."), - ] + ( + InputConversionTable([], ""), + [FlattenLayer()], + OutputConversionImageToImage(), + r"The defined model uses an output conversion for images but no input conversion for images.", + ), + ( + InputConversionTable([], ""), + [Convolutional2DLayer(1, 1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [ConvolutionalTranspose2DLayer(1, 1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [MaxPooling2DLayer(1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [AvgPooling2DLayer(1)], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionTable([], ""), + [FlattenLayer()], + OutputConversionTable(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer()], + OutputConversionTable(), + r"The defined model uses an input conversion for images but no output conversion for images.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer()], + OutputConversionImageToImage(), + r"The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), ForwardLayer(1)], + OutputConversionImageToImage(), + r"The output data would be 1-dimensional but the provided output conversion uses 2-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), Convolutional2DLayer(1, 1)], + OutputConversionImageToImage(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), ConvolutionalTranspose2DLayer(1, 1)], + OutputConversionImageToImage(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), MaxPooling2DLayer(1)], + OutputConversionImageToImage(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), AvgPooling2DLayer(1)], + OutputConversionImageToImage(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer(), FlattenLayer()], + OutputConversionImageToImage(), + r"You cannot use a 2-dimensional layer with 1-dimensional data.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [ForwardLayer(1)], + OutputConversionImageToImage(), + r"The 2-dimensional data has to be flattened before using a 1-dimensional layer.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [], + OutputConversionImageToImage(), + r"You need to provide at least one layer to a neural network.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer()], + OutputConversionImageToTable(), + r"A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output.", + ), + ( + InputConversionImage(ImageSize(1, 1, 1)), + [FlattenLayer()], + OutputConversionImageToColumn(), + r"A NeuralNetworkRegressor cannot be used with images as input and 1-dimensional data as output.", + ), + ], ) - def test_should_raise_if_model_has_invalid_structure(self, input_conversion: _InputConversion, layers: list[_Layer], output_conversion: _OutputConversion, error_msg: str) -> None: + def test_should_raise_if_model_has_invalid_structure( + self, + input_conversion: _InputConversion, + layers: list[_Layer], + output_conversion: _OutputConversion, + error_msg: str, + ) -> None: with pytest.raises(InvalidModelStructureError, match=error_msg): NeuralNetworkRegressor(input_conversion, layers, output_conversion) diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index 816a4bc32..f786df0d6 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -1,10 +1,9 @@ import pytest import torch - from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.transformation import OneHotEncoder -from safeds.ml.nn import OutputConversionImageToTable, OutputConversionImageToImage, OutputConversionImageToColumn +from safeds.ml.nn import OutputConversionImageToColumn, OutputConversionImageToImage, OutputConversionImageToTable from safeds.ml.nn._output_conversion import _OutputConversion @@ -16,7 +15,7 @@ class TestDataConversionImage: (OutputConversionImageToColumn(), {"column_name": "a", "one_hot_encoder": OneHotEncoder()}), (OutputConversionImageToTable(), {"column_names": ["a"]}), (OutputConversionImageToImage(), {}), - ] + ], ) def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversion, kwargs: dict) -> None: with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): @@ -26,16 +25,31 @@ def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _Outp class TestOutputConversionImageToColumn: def test_should_raise_if_column_name_not_set(self) -> None: - with pytest.raises(ValueError, match=r"The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs."): - OutputConversionImageToColumn()._data_conversion(input_data=_SingleSizeImageList(), output_data=torch.empty(1), one_hot_encoder=OneHotEncoder()) + with pytest.raises( + ValueError, + match=r"The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.", + ): + OutputConversionImageToColumn()._data_conversion( + input_data=_SingleSizeImageList(), output_data=torch.empty(1), one_hot_encoder=OneHotEncoder(), + ) def test_should_raise_if_one_hot_encoder_not_set(self) -> None: - with pytest.raises(ValueError, match=r"The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs."): - OutputConversionImageToColumn()._data_conversion(input_data=_SingleSizeImageList(), output_data=torch.empty(1), column_name="column_name") + with pytest.raises( + ValueError, + match=r"The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.", + ): + OutputConversionImageToColumn()._data_conversion( + input_data=_SingleSizeImageList(), output_data=torch.empty(1), column_name="column_name", + ) class TestOutputConversionImageToTable: def test_should_raise_if_column_names_not_set(self) -> None: - with pytest.raises(ValueError, match=r"The column_names are not set. The data can only be converted if the column_names are provided as `list\[str\]` in the kwargs."): - OutputConversionImageToTable()._data_conversion(input_data=_SingleSizeImageList(), output_data=torch.empty(1)) + with pytest.raises( + ValueError, + match=r"The column_names are not set. The data can only be converted if the column_names are provided as `list\[str\]` in the kwargs.", + ): + OutputConversionImageToTable()._data_conversion( + input_data=_SingleSizeImageList(), output_data=torch.empty(1), + ) diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py index 1af4d279d..e6e389a80 100644 --- a/tests/safeds/ml/nn/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -1,10 +1,9 @@ -from typing import Literal, Type +from typing import Literal import pytest -from torch import nn - from safeds.data.image.typing import ImageSize from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer +from torch import nn class TestPooling2DLayer: @@ -16,7 +15,7 @@ class TestPooling2DLayer: ("avg", nn.AvgPool2d), ], ) - def test_should_create_pooling_layer(self, strategy: Literal["max", "avg"], torch_layer: Type[nn.Module]) -> None: + def test_should_create_pooling_layer(self, strategy: Literal["max", "avg"], torch_layer: type[nn.Module]) -> None: layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) with pytest.raises(TypeError, match=r"The input_size of a pooling layer has to be of type ImageSize."): @@ -37,7 +36,10 @@ def test_should_raise_if_input_size_not_set(self, strategy: Literal["max", "avg" layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) with pytest.raises(ValueError, match=r"The input_size is not yet set."): layer.input_size # noqa: B018 - with pytest.raises(ValueError, match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set."): + with pytest.raises( + ValueError, + match=r"The input_size is not yet set. The layer cannot compute the output_size if the input_size is not set.", + ): layer.output_size # noqa: B018 @pytest.mark.parametrize( From 8b50d3bf210b329b6ddba4f9a0b6ad27fd9f72b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 01:21:17 +0200 Subject: [PATCH 31/42] refactor: ruff linter --- src/safeds/ml/nn/_input_conversion_image.py | 5 ++++- src/safeds/ml/nn/_output_conversion_image.py | 14 +++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 37d4553f5..5da5b7edd 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -38,7 +38,10 @@ def _data_size(self) -> ImageSize: return self._input_size def _data_conversion_fit( - self, input_data: ImageDataset, batch_size: int, num_of_classes: int = 1, # noqa: ARG002 + self, + input_data: ImageDataset, + batch_size: int, # noqa: ARG002 + num_of_classes: int = 1, # noqa: ARG002 ) -> ImageDataset: return input_data diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 1026745d2..fab38f181 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -1,5 +1,6 @@ from __future__ import annotations +from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any from safeds.data.image.containers import ImageList @@ -15,11 +16,14 @@ from safeds.ml.nn._output_conversion import _OutputConversion -class _OutputConversionImage: - pass # pragma: no cover +class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset], ABC): + @abstractmethod + def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset: + pass -class OutputConversionImageToColumn(_OutputConversion[ImageList, ImageDataset], _OutputConversionImage): + +class OutputConversionImageToColumn(_OutputConversionImage): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Column]: import torch @@ -51,7 +55,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToTable(_OutputConversion[ImageList, ImageDataset], _OutputConversionImage): +class OutputConversionImageToTable(_OutputConversionImage): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset[Table]: import torch @@ -82,7 +86,7 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: return im_dataset -class OutputConversionImageToImage(_OutputConversion[ImageList, ImageDataset], _OutputConversionImage): +class OutputConversionImageToImage(_OutputConversionImage): def _data_conversion( self, input_data: ImageList, output_data: Tensor, **kwargs: Any, # noqa: ARG002 From 8bd40b10e31fa6d82ae75b86bf912df3597ce04a Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Wed, 1 May 2024 23:23:02 +0000 Subject: [PATCH 32/42] style: apply automated linter fixes --- .../data/image/containers/_image_list.py | 9 ++-- .../data/labeled/containers/_image_dataset.py | 11 ++--- src/safeds/ml/nn/_convolutional2d_layer.py | 17 ++++++- src/safeds/ml/nn/_input_conversion.py | 5 ++- src/safeds/ml/nn/_model.py | 44 ++++++++++--------- src/safeds/ml/nn/_output_conversion_image.py | 8 +++- .../data/image/containers/test_image_list.py | 6 ++- .../labeled/containers/test_image_dataset.py | 12 ++++- tests/safeds/ml/nn/test_cnn_workflow.py | 30 ++++++++++--- .../ml/nn/test_convolutional2d_layer.py | 21 +++++++-- .../ml/nn/test_input_conversion_image.py | 7 ++- .../ml/nn/test_output_conversion_image.py | 11 +++-- 12 files changed, 129 insertions(+), 52 deletions(-) diff --git a/src/safeds/data/image/containers/_image_list.py b/src/safeds/data/image/containers/_image_list.py index 0af2c6931..ad3043e95 100644 --- a/src/safeds/data/image/containers/_image_list.py +++ b/src/safeds/data/image/containers/_image_list.py @@ -91,18 +91,21 @@ def from_files(path: str | Path | Sequence[str | Path], return_filenames: Litera @staticmethod @overload def from_files( - path: str | Path | Sequence[str | Path], return_filenames: Literal[True], + path: str | Path | Sequence[str | Path], + return_filenames: Literal[True], ) -> tuple[ImageList, list[str]]: ... @staticmethod @overload def from_files( - path: str | Path | Sequence[str | Path], return_filenames: bool, + path: str | Path | Sequence[str | Path], + return_filenames: bool, ) -> ImageList | tuple[ImageList, list[str]]: ... @staticmethod def from_files( - path: str | Path | Sequence[str | Path], return_filenames: bool = False, + path: str | Path | Sequence[str | Path], + return_filenames: bool = False, ) -> ImageList | tuple[ImageList, list[str]]: """ Create an ImageList from a directory or a list of files. diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 592f291bf..1ae0765df 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -56,10 +56,9 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, s else: self._input_size: ImageSize = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel) self._input: _SingleSizeImageList = input_data._as_single_size_image_list() - if ( - (isinstance(output_data, Column | Table)) - and len(input_data) != output_data.number_of_rows - ) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)): + if ((isinstance(output_data, Column | Table)) and len(input_data) != output_data.number_of_rows) or ( + isinstance(output_data, ImageList) and len(input_data) != len(output_data) + ): if isinstance(output_data, Table): output_len = output_data.number_of_rows else: @@ -281,5 +280,7 @@ def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncode return table_as_tensor def _to_column(self) -> Column: - table = Table(dict(zip(self._one_hot_encoder.get_names_of_added_columns(), self._tensor.T.tolist(), strict=False))) + table = Table( + dict(zip(self._one_hot_encoder.get_names_of_added_columns(), self._tensor.T.tolist(), strict=False)), + ) return self._one_hot_encoder.inverse_transform(table).get_column(self._column_name) diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 751aca9fb..2dc4e50e8 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -65,7 +65,14 @@ def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) return _InternalLayer( - input_size, output_size, kernel_size, activation_function, padding, stride, transpose, output_padding, + input_size, + output_size, + kernel_size, + activation_function, + padding, + stride, + transpose, + output_padding, ) @@ -175,7 +182,13 @@ def _set_input_size(self, input_size: int | ImageSize) -> None: class ConvolutionalTranspose2DLayer(Convolutional2DLayer): def __init__( - self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0, output_padding: int = 0, + self, + output_channel: int, + kernel_size: int, + *, + stride: int = 1, + padding: int = 0, + output_padding: int = 0, ): """ Create a Convolutional Transpose 2D Layer. diff --git a/src/safeds/ml/nn/_input_conversion.py b/src/safeds/ml/nn/_input_conversion.py index 71bfe1a1e..c2f5f519f 100644 --- a/src/safeds/ml/nn/_input_conversion.py +++ b/src/safeds/ml/nn/_input_conversion.py @@ -27,7 +27,10 @@ def _data_size(self) -> int | ImageSize: @abstractmethod def _data_conversion_fit( - self, input_data: FT, batch_size: int, num_of_classes: int = 1, + self, + input_data: FT, + batch_size: int, + num_of_classes: int = 1, ) -> DataLoader | ImageDataset: pass # pragma: no cover diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 7b08c70d6..662dc70d3 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -79,9 +79,7 @@ def __init__( ) data_dimensions = 2 for layer in layers: - if data_dimensions == 2 and ( - isinstance(layer, Convolutional2DLayer | _Pooling2DLayer) - ): + if data_dimensions == 2 and (isinstance(layer, Convolutional2DLayer | _Pooling2DLayer)): continue elif data_dimensions == 2 and isinstance(layer, FlattenLayer): data_dimensions = 1 @@ -89,9 +87,11 @@ def __init__( continue else: raise InvalidModelStructureError( - "The 2-dimensional data has to be flattened before using a 1-dimensional layer." - if data_dimensions == 2 - else "You cannot use a 2-dimensional layer with 1-dimensional data.", + ( + "The 2-dimensional data has to be flattened before using a 1-dimensional layer." + if data_dimensions == 2 + else "You cannot use a 2-dimensional layer with 1-dimensional data." + ), ) if data_dimensions == 1 and isinstance(output_conversion, OutputConversionImageToImage): raise InvalidModelStructureError( @@ -103,9 +103,7 @@ def __init__( ) else: for layer in layers: - if ( - isinstance(layer, Convolutional2DLayer | FlattenLayer | _Pooling2DLayer) - ): + if isinstance(layer, Convolutional2DLayer | FlattenLayer | _Pooling2DLayer): raise InvalidModelStructureError("You cannot use a 2-dimensional layer with 1-dimensional data.") self._input_conversion: _InputConversion[IFT, IPT] = input_conversion @@ -241,7 +239,9 @@ def predict(self, test_data: IPT) -> OT: elem = self._model(x) predictions.append(elem.squeeze(dim=1)) return self._output_conversion._data_conversion( - test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration(), + test_data, + torch.cat(predictions, dim=0), + **self._input_conversion._get_output_configuration(), ) @property @@ -286,9 +286,7 @@ def __init__( ) data_dimensions = 2 for layer in layers: - if data_dimensions == 2 and ( - isinstance(layer, Convolutional2DLayer | _Pooling2DLayer) - ): + if data_dimensions == 2 and (isinstance(layer, Convolutional2DLayer | _Pooling2DLayer)): continue elif data_dimensions == 2 and isinstance(layer, FlattenLayer): data_dimensions = 1 @@ -296,9 +294,11 @@ def __init__( continue else: raise InvalidModelStructureError( - "The 2-dimensional data has to be flattened before using a 1-dimensional layer." - if data_dimensions == 2 - else "You cannot use a 2-dimensional layer with 1-dimensional data.", + ( + "The 2-dimensional data has to be flattened before using a 1-dimensional layer." + if data_dimensions == 2 + else "You cannot use a 2-dimensional layer with 1-dimensional data." + ), ) if data_dimensions == 2 and ( isinstance(output_conversion, OutputConversionImageToColumn | OutputConversionImageToTable) @@ -312,9 +312,7 @@ def __init__( ) else: for layer in layers: - if ( - isinstance(layer, Convolutional2DLayer | FlattenLayer | _Pooling2DLayer) - ): + if isinstance(layer, Convolutional2DLayer | FlattenLayer | _Pooling2DLayer): raise InvalidModelStructureError("You cannot use a 2-dimensional layer with 1-dimensional data.") self._input_conversion: _InputConversion[IFT, IPT] = input_conversion @@ -463,7 +461,9 @@ def predict(self, test_data: IPT) -> OT: else: predictions.append(elem.squeeze(dim=1).round()) return self._output_conversion._data_conversion( - test_data, torch.cat(predictions, dim=0), **self._input_conversion._get_output_configuration(), + test_data, + torch.cat(predictions, dim=0), + **self._input_conversion._get_output_configuration(), ) @property @@ -473,7 +473,9 @@ def is_fitted(self) -> bool: def _create_internal_model( - input_conversion: _InputConversion[IFT, IPT], layers: list[_Layer], is_for_classification: bool, + input_conversion: _InputConversion[IFT, IPT], + layers: list[_Layer], + is_for_classification: bool, ) -> nn.Module: from torch import nn diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index fab38f181..5bc67f8ed 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -89,7 +89,10 @@ def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: class OutputConversionImageToImage(_OutputConversionImage): def _data_conversion( - self, input_data: ImageList, output_data: Tensor, **kwargs: Any, # noqa: ARG002 + self, + input_data: ImageList, + output_data: Tensor, + **kwargs: Any, # noqa: ARG002 ) -> ImageDataset[ImageList]: import torch @@ -99,6 +102,7 @@ def _data_conversion( return ImageDataset[ImageList]( input_data, _SingleSizeImageList._create_from_tensor( - (output_data * 255).to(torch.uint8), list(range(output_data.size(dim=0))), + (output_data * 255).to(torch.uint8), + list(range(output_data.size(dim=0))), ), ) diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py index e0a3a9a73..341893410 100644 --- a/tests/safeds/data/image/containers/test_image_list.py +++ b/tests/safeds/data/image/containers/test_image_list.py @@ -458,7 +458,8 @@ def test_from_files_creation(self, resource_path: str | Path, snapshot_png_image torch.set_default_device(torch.device("cpu")) image_list = ImageList.from_files(resolve_resource_path(resource_path)) image_list_returned_filenames, filenames = ImageList.from_files( - resolve_resource_path(resource_path), return_filenames=True, + resolve_resource_path(resource_path), + return_filenames=True, ) assert image_list == snapshot_png_image_list assert image_list == image_list_returned_filenames @@ -1253,7 +1254,8 @@ def test_create_from_tensor_4_dim(self, tensor: Tensor) -> None: @pytest.mark.parametrize("tensor", [torch.ones(4, 3, 1, 1, 1), torch.ones(4, 3)], ids=["5-dim", "2-dim"]) def test_should_raise_from_invalid_tensor(self, tensor: Tensor) -> None: with pytest.raises( - ValueError, match=rf"Invalid Tensor. This Tensor requires 3 or 4 dimensions but has {tensor.dim()}", + ValueError, + match=rf"Invalid Tensor. This Tensor requires 3 or 4 dimensions but has {tensor.dim()}", ): _SingleSizeImageList._create_from_tensor(tensor, list(range(tensor.size(0)))) diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index c3437ba55..c3b1bee1a 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -87,7 +87,11 @@ class TestImageDatasetInit: ], ) def test_should_raise_with_invalid_data( - self, input_data: ImageList, output_data: T, error: type[Exception], error_msg: str, + self, + input_data: ImageList, + output_data: T, + error: type[Exception], + error_msg: str, ) -> None: with pytest.raises(error, match=error_msg): ImageDataset(input_data, output_data) @@ -187,7 +191,11 @@ class TestColumnAsTensor: ], ) def test_should_raise_from_tensor( - self, tensor: Tensor, one_hot_encoder: OneHotEncoder, error: type[Exception], error_msg: str, + self, + tensor: Tensor, + one_hot_encoder: OneHotEncoder, + error: type[Exception], + error_msg: str, ) -> None: with pytest.raises(error, match=error_msg): _ColumnAsTensor._from_tensor(tensor, "a", one_hot_encoder) diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index 4e92d3687..bd15fd86e 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -62,7 +62,11 @@ class TestImageToTableClassifier: ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"], ) def test_should_train_and_predict_model( - self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device, + self, + seed: int, + layer_3_bias: list[float], + prediction_label: list[str], + device: Device, ) -> None: skip_if_device_not_available(device) torch.set_default_device(device) @@ -82,7 +86,9 @@ def test_should_train_and_predict_model( num_of_classes: int = image_dataset.output_size if isinstance(image_dataset.output_size, int) else 0 layers = [Convolutional2DLayer(1, 2), MaxPooling2DLayer(10), FlattenLayer(), ForwardLayer(num_of_classes)] nn_original = NeuralNetworkClassifier( - InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToTable(), + InputConversionImage(image_dataset.input_size), + layers, + OutputConversionImageToTable(), ) nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) @@ -124,7 +130,11 @@ class TestImageToColumnClassifier: ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"], ) def test_should_train_and_predict_model( - self, seed: int, layer_3_bias: list[float], prediction_label: list[str], device: Device, + self, + seed: int, + layer_3_bias: list[float], + prediction_label: list[str], + device: Device, ) -> None: skip_if_device_not_available(device) torch.set_default_device(device) @@ -143,7 +153,9 @@ def test_should_train_and_predict_model( layers = [Convolutional2DLayer(1, 2), AvgPooling2DLayer(10), FlattenLayer(), ForwardLayer(num_of_classes)] nn_original = NeuralNetworkClassifier( - InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToColumn(), + InputConversionImage(image_dataset.input_size), + layers, + OutputConversionImageToColumn(), ) nn = nn_original.fit(image_dataset, epoch_size=2) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) @@ -165,7 +177,11 @@ class TestImageToImageRegressor: ids=["seed-1234-cuda", "seed-4711-cuda", "seed-1234-cpu", "seed-4711-cpu"], ) def test_should_train_and_predict_model( - self, seed: int, snapshot_png_image_list: SnapshotAssertion, layer_3_bias: list[float], device: Device, + self, + seed: int, + snapshot_png_image_list: SnapshotAssertion, + layer_3_bias: list[float], + device: Device, ) -> None: skip_if_device_not_available(device) torch.set_default_device(device) @@ -183,7 +199,9 @@ def test_should_train_and_predict_model( ConvolutionalTranspose2DLayer(4, 2), ] nn_original = NeuralNetworkRegressor( - InputConversionImage(image_dataset.input_size), layers, OutputConversionImageToImage(), + InputConversionImage(image_dataset.input_size), + layers, + OutputConversionImageToImage(), ) nn = nn_original.fit(image_dataset, epoch_size=20) assert str(nn_original._model.state_dict().values()) != str(nn._model.state_dict().values()) diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index 60509e3b8..48a21dad8 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -98,7 +98,12 @@ def test_should_raise_if_input_size_not_set( ], ) def test_should_raise_if_activation_function_not_set( - self, conv_type: type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int, + self, + conv_type: type[Convolutional2DLayer], + output_channel: int, + kernel_size: int, + stride: int, + padding: int, ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) @@ -117,7 +122,12 @@ def test_should_raise_if_activation_function_not_set( ], ) def test_should_raise_if_unsupported_activation_function_is_set( - self, conv_type: type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int, + self, + conv_type: type[Convolutional2DLayer], + output_channel: int, + kernel_size: int, + stride: int, + padding: int, ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) input_size = ImageSize(10, 20, 30, _ignore_invalid_channel=True) @@ -136,7 +146,12 @@ def test_should_raise_if_unsupported_activation_function_is_set( ], ) def test_should_raise_if_input_size_is_set_with_int( - self, conv_type: type[Convolutional2DLayer], output_channel: int, kernel_size: int, stride: int, padding: int, + self, + conv_type: type[Convolutional2DLayer], + output_channel: int, + kernel_size: int, + stride: int, + padding: int, ) -> None: layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) with pytest.raises(TypeError, match=r"The input_size of a convolution layer has to be of type ImageSize."): diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py index d1da75ab0..f8cfea0be 100644 --- a/tests/safeds/ml/nn/test_input_conversion_image.py +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -45,7 +45,8 @@ class TestIsFitDataValid: ( ImageDataset(_test_image_list, Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]})), ImageDataset( - _test_image_list.resize(20, 20), Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}), + _test_image_list.resize(20, 20), + Table({"a": [0, 0, 1, 1, 0, 1, 0], "b": [1, 1, 0, 0, 1, 0, 1]}), ), ), ( @@ -63,7 +64,9 @@ class TestIsFitDataValid: ], ) def test_should_return_false_if_fit_data_is_invalid( - self, image_dataset_valid: ImageDataset, image_dataset_invalid: ImageDataset, + self, + image_dataset_valid: ImageDataset, + image_dataset_invalid: ImageDataset, ) -> None: input_conversion = InputConversionImage(image_dataset_valid.input_size) assert input_conversion._is_fit_data_valid(image_dataset_valid) diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index f786df0d6..8c4123209 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -30,7 +30,9 @@ def test_should_raise_if_column_name_not_set(self) -> None: match=r"The column_name is not set. The data can only be converted if the column_name is provided as `str` in the kwargs.", ): OutputConversionImageToColumn()._data_conversion( - input_data=_SingleSizeImageList(), output_data=torch.empty(1), one_hot_encoder=OneHotEncoder(), + input_data=_SingleSizeImageList(), + output_data=torch.empty(1), + one_hot_encoder=OneHotEncoder(), ) def test_should_raise_if_one_hot_encoder_not_set(self) -> None: @@ -39,7 +41,9 @@ def test_should_raise_if_one_hot_encoder_not_set(self) -> None: match=r"The one_hot_encoder is not set. The data can only be converted if the one_hot_encoder is provided as `OneHotEncoder` in the kwargs.", ): OutputConversionImageToColumn()._data_conversion( - input_data=_SingleSizeImageList(), output_data=torch.empty(1), column_name="column_name", + input_data=_SingleSizeImageList(), + output_data=torch.empty(1), + column_name="column_name", ) @@ -51,5 +55,6 @@ def test_should_raise_if_column_names_not_set(self) -> None: match=r"The column_names are not set. The data can only be converted if the column_names are provided as `list\[str\]` in the kwargs.", ): OutputConversionImageToTable()._data_conversion( - input_data=_SingleSizeImageList(), output_data=torch.empty(1), + input_data=_SingleSizeImageList(), + output_data=torch.empty(1), ) From a1e7415e24555de08fc4d6cb2e5f36d89571f824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 2 May 2024 01:29:43 +0200 Subject: [PATCH 33/42] refactor: codecov --- src/safeds/ml/nn/_output_conversion_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index fab38f181..deb3408c2 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -20,7 +20,7 @@ class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset], ABC): @abstractmethod def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset: - pass + pass # pragma: no cover class OutputConversionImageToColumn(_OutputConversionImage): From a3b9336db5045a33fcdb779c0c3bcb0f6d06295e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 3 May 2024 02:02:10 +0200 Subject: [PATCH 34/42] refactor: finish merge --- src/safeds/ml/nn/_convolutional2d_layer.py | 4 ++-- src/safeds/ml/nn/_flatten_layer.py | 4 ++-- src/safeds/ml/nn/_forward_layer.py | 2 +- src/safeds/ml/nn/_input_conversion_image.py | 4 ++-- src/safeds/ml/nn/_input_conversion_table.py | 2 +- src/safeds/ml/nn/_model.py | 4 +--- src/safeds/ml/nn/_output_conversion_image.py | 4 ++-- src/safeds/ml/nn/_output_conversion_table.py | 2 +- src/safeds/ml/nn/_pooling2d_layer.py | 4 ++-- tests/safeds/ml/nn/test_model.py | 16 +++++++--------- .../safeds/ml/nn/test_output_conversion_image.py | 4 ++-- 11 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 2dc4e50e8..063b9255c 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: from torch import Tensor, nn -from safeds.ml.nn._layer import _Layer +from safeds.ml.nn import Layer def _create_internal_model( @@ -76,7 +76,7 @@ def forward(self, x: Tensor) -> Tensor: ) -class Convolutional2DLayer(_Layer): +class Convolutional2DLayer(Layer): def __init__(self, output_channel: int, kernel_size: int, *, stride: int = 1, padding: int = 0): """ Create a Convolutional 2D Layer. diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py index 855e5829e..eca4e253c 100644 --- a/src/safeds/ml/nn/_flatten_layer.py +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -7,7 +7,7 @@ from safeds.data.image.typing import ImageSize -from safeds.ml.nn._layer import _Layer +from safeds.ml.nn import Layer def _create_internal_model() -> nn.Module: @@ -24,7 +24,7 @@ def forward(self, x: Tensor) -> Tensor: return _InternalLayer() -class FlattenLayer(_Layer): +class FlattenLayer(Layer): def __init__(self) -> None: """Create a Flatten Layer.""" self._input_size: ImageSize | None = None diff --git a/src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_forward_layer.py index aca1fab7b..baa91c17f 100644 --- a/src/safeds/ml/nn/_forward_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -9,7 +9,7 @@ from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError -from safeds.ml.nn._layer import Layer +from safeds.ml.nn import Layer def _create_internal_model(input_size: int, output_size: int, activation_function: str) -> nn.Module: diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 5da5b7edd..3ca9352d5 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -11,10 +11,10 @@ from safeds.data.image.typing import ImageSize from safeds.data.tabular.transformation import OneHotEncoder -from safeds.ml.nn._input_conversion import _InputConversion +from safeds.ml.nn import InputConversion -class InputConversionImage(_InputConversion[ImageDataset, ImageList]): +class InputConversionImage(InputConversion[ImageDataset, ImageList]): """The input conversion for a neural network, defines the input parameters for the neural network.""" def __init__(self, image_size: ImageSize) -> None: diff --git a/src/safeds/ml/nn/_input_conversion_table.py b/src/safeds/ml/nn/_input_conversion_table.py index ca3233363..5ac205ed0 100644 --- a/src/safeds/ml/nn/_input_conversion_table.py +++ b/src/safeds/ml/nn/_input_conversion_table.py @@ -7,7 +7,7 @@ from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Table -from safeds.ml.nn._input_conversion import InputConversion +from safeds.ml.nn import InputConversion class InputConversionTable(InputConversion[TabularDataset, Table]): diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 2d7991866..e763c09c8 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -32,9 +32,7 @@ from torch import Tensor, nn from safeds.data.image.typing import ImageSize - from safeds.ml.nn._input_conversion import InputConversion - from safeds.ml.nn._layer import Layer - from safeds.ml.nn._output_conversion import OutputConversion + from safeds.ml.nn import InputConversion, Layer, OutputConversion IFT = TypeVar("IFT", TabularDataset, TimeSeries, ImageDataset) # InputFitType IPT = TypeVar("IPT", Table, TimeSeries, ImageList) # InputPredictType diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index b102fcb1c..6065b059f 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -13,10 +13,10 @@ from torch import Tensor from safeds.data.tabular.transformation import OneHotEncoder -from safeds.ml.nn._output_conversion import _OutputConversion +from safeds.ml.nn import OutputConversion -class _OutputConversionImage(_OutputConversion[ImageList, ImageDataset], ABC): +class _OutputConversionImage(OutputConversion[ImageList, ImageDataset], ABC): @abstractmethod def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset: diff --git a/src/safeds/ml/nn/_output_conversion_table.py b/src/safeds/ml/nn/_output_conversion_table.py index bba03d852..a77b9862f 100644 --- a/src/safeds/ml/nn/_output_conversion_table.py +++ b/src/safeds/ml/nn/_output_conversion_table.py @@ -7,7 +7,7 @@ from safeds.data.labeled.containers import TabularDataset from safeds.data.tabular.containers import Column, Table -from safeds.ml.nn._output_conversion import OutputConversion +from safeds.ml.nn import OutputConversion class OutputConversionTable(OutputConversion[Table, TabularDataset]): diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 1f8e67b52..0767e7dda 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -8,7 +8,7 @@ if TYPE_CHECKING: from torch import Tensor, nn -from safeds.ml.nn._layer import _Layer +from safeds.ml.nn import Layer def _create_internal_model(strategy: Literal["max", "avg"], kernel_size: int, padding: int, stride: int) -> nn.Module: @@ -29,7 +29,7 @@ def forward(self, x: Tensor) -> Tensor: return _InternalLayer(strategy, kernel_size, padding, stride) -class _Pooling2DLayer(_Layer): +class _Pooling2DLayer(Layer): def __init__(self, strategy: Literal["max", "avg"], kernel_size: int, *, stride: int = -1, padding: int = 0): """ Create a Pooling 2D Layer. diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 582965218..72ae915ec 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -24,9 +24,7 @@ OutputConversionImageToTable, OutputConversionTable, ) -from safeds.ml.nn._input_conversion import _InputConversion -from safeds.ml.nn._layer import _Layer -from safeds.ml.nn._output_conversion import _OutputConversion +from safeds.ml.nn import InputConversion, Layer, OutputConversion from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn @@ -441,9 +439,9 @@ def callback_was_called(self) -> bool: ) def test_should_raise_if_model_has_invalid_structure( self, - input_conversion: _InputConversion, - layers: list[_Layer], - output_conversion: _OutputConversion, + input_conversion: InputConversion, + layers: list[Layer], + output_conversion: OutputConversion, error_msg: str, ) -> None: with pytest.raises(InvalidModelStructureError, match=error_msg): @@ -757,9 +755,9 @@ def callback_was_called(self) -> bool: ) def test_should_raise_if_model_has_invalid_structure( self, - input_conversion: _InputConversion, - layers: list[_Layer], - output_conversion: _OutputConversion, + input_conversion: InputConversion, + layers: list[Layer], + output_conversion: OutputConversion, error_msg: str, ) -> None: with pytest.raises(InvalidModelStructureError, match=error_msg): diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index 8c4123209..98c0105be 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -4,7 +4,7 @@ from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn import OutputConversionImageToColumn, OutputConversionImageToImage, OutputConversionImageToTable -from safeds.ml.nn._output_conversion import _OutputConversion +from safeds.ml.nn._output_conversion_image import _OutputConversionImage class TestDataConversionImage: @@ -17,7 +17,7 @@ class TestDataConversionImage: (OutputConversionImageToImage(), {}), ], ) - def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversion, kwargs: dict) -> None: + def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversionImage, kwargs: dict) -> None: with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) From de465aa5765ed1b380dce68b2b9a3b45f80e21e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 3 May 2024 04:50:49 +0200 Subject: [PATCH 35/42] feat: added and improved various `__hash__`, `__sizeof__` and `__eq__` methods --- src/safeds/data/image/containers/_image.py | 2 +- .../containers/_multi_size_image_list.py | 2 + .../containers/_single_size_image_list.py | 2 +- src/safeds/data/image/typing/_image_size.py | 2 +- .../data/labeled/containers/_image_dataset.py | 87 +++++++++++++++ src/safeds/ml/nn/_convolutional2d_layer.py | 104 +++++++++++++++++ src/safeds/ml/nn/_flatten_layer.py | 43 +++++++ src/safeds/ml/nn/_input_conversion_image.py | 56 ++++++++++ src/safeds/ml/nn/_layer.py | 12 ++ src/safeds/ml/nn/_output_conversion_image.py | 41 +++++++ src/safeds/ml/nn/_pooling2d_layer.py | 56 ++++++++++ .../data/image/containers/test_image.py | 2 +- .../labeled/containers/test_image_dataset.py | 96 ++++++++++++++++ .../ml/nn/test_convolutional2d_layer.py | 103 +++++++++++++++++ tests/safeds/ml/nn/test_flatten_layer.py | 21 ++++ .../ml/nn/test_input_conversion_image.py | 81 ++++++++++++++ .../ml/nn/test_output_conversion_image.py | 65 +++++++++++ tests/safeds/ml/nn/test_pooling2d_layer.py | 105 ++++++++++++++++++ 18 files changed, 876 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index b63bb31d8..7a6a00830 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -139,7 +139,7 @@ def __eq__(self, other: object) -> bool: if not isinstance(other, Image): return NotImplemented - return ( + return (self is other) or ( self._image_tensor.size() == other._image_tensor.size() and torch.all(torch.eq(self._image_tensor, other._set_device(self.device)._image_tensor)).item() ) diff --git a/src/safeds/data/image/containers/_multi_size_image_list.py b/src/safeds/data/image/containers/_multi_size_image_list.py index e21f3eba0..d01142c5a 100644 --- a/src/safeds/data/image/containers/_multi_size_image_list.py +++ b/src/safeds/data/image/containers/_multi_size_image_list.py @@ -112,6 +112,8 @@ def __eq__(self, other: object) -> bool: return NotImplemented if not isinstance(other, _MultiSizeImageList) or set(other._image_list_dict) != set(self._image_list_dict): return False + if self is other: + return True for image_list_key, image_list_value in self._image_list_dict.items(): if image_list_value != other._image_list_dict[image_list_key]: return False diff --git a/src/safeds/data/image/containers/_single_size_image_list.py b/src/safeds/data/image/containers/_single_size_image_list.py index 32a4059d0..0502b1613 100644 --- a/src/safeds/data/image/containers/_single_size_image_list.py +++ b/src/safeds/data/image/containers/_single_size_image_list.py @@ -179,7 +179,7 @@ def __eq__(self, other: object) -> bool: return NotImplemented if not isinstance(other, _SingleSizeImageList): return False - return ( + return (self is other) or ( self._tensor.size() == other._tensor.size() and set(self._tensor_positions_to_indices) == set(self._tensor_positions_to_indices) and set(self._indices_to_tensor_positions) == set(self._indices_to_tensor_positions) diff --git a/src/safeds/data/image/typing/_image_size.py b/src/safeds/data/image/typing/_image_size.py index e3283a584..df6ce4ccd 100644 --- a/src/safeds/data/image/typing/_image_size.py +++ b/src/safeds/data/image/typing/_image_size.py @@ -62,7 +62,7 @@ def from_image(image: Image) -> ImageSize: def __eq__(self, other: object) -> bool: if not isinstance(other, ImageSize): return NotImplemented - return self._width == other._width and self._height == other._height and self._channel == other._channel + return (self is other) or (self._width == other._width and self._height == other._height and self._channel == other._channel) def __hash__(self) -> int: return _structural_hash(self._width, self._height, self._channel) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 1ae0765df..72fa6e798 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -1,8 +1,10 @@ from __future__ import annotations import copy +import sys from typing import TYPE_CHECKING, Generic, TypeVar +from safeds._utils import _structural_hash from safeds.data.image.containers import ImageList from safeds.data.image.containers._empty_image_list import _EmptyImageList from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList @@ -110,6 +112,56 @@ def __next__(self) -> tuple[Tensor, Tensor]: def __len__(self) -> int: return self._input.number_of_images + def __eq__(self, other: object) -> bool: + """ + Compare two image datasets. + + Parameters + ---------- + other: + The image dataset to compare to. + + Returns + ------- + equals: + Whether the two image datasets are the same. + """ + if not isinstance(other, ImageDataset): + return NotImplemented + return (self is other) or (self._shuffle_after_epoch == other._shuffle_after_epoch and self._batch_size == other._batch_size and isinstance(other._output, type(self._output)) and (self._input == other._input) and (self._output == other._output)) + + def __hash__(self) -> int: + """ + Return a deterministic hash value for this image dataset. + + Returns + ------- + hash: + the hash value + """ + return _structural_hash(self._input, self._output, self._shuffle_after_epoch, self._batch_size) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + size: + Size of this object in bytes. + """ + return ( + sys.getsizeof(self._shuffle_tensor_indices) + + self._shuffle_tensor_indices.element_size() * self._shuffle_tensor_indices.nelement() + + sys.getsizeof(self._input) + + sys.getsizeof(self._output) + + sys.getsizeof(self._input_size) + + sys.getsizeof(self._output_size) + + sys.getsizeof(self._shuffle_after_epoch) + + sys.getsizeof(self._batch_size) + + sys.getsizeof(self._next_batch_index) + ) + @property def input_size(self) -> ImageSize: """ @@ -234,6 +286,23 @@ def __init__(self, table: Table) -> None: "The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.", ) + def __eq__(self, other: object) -> bool: + import torch + + if not isinstance(other, _TableAsTensor): + return NotImplemented + return (self is other) or (self._column_names == other._column_names and torch.all(torch.eq(self._tensor, other._tensor)).item()) + + def __hash__(self) -> int: + return _structural_hash(self._tensor.size(), self._column_names) + + def __sizeof__(self) -> int: + return ( + sys.getsizeof(self._tensor) + + self._tensor.element_size() * self._tensor.nelement() + + sys.getsizeof(self._column_names) + ) + @staticmethod def _from_tensor(tensor: Tensor, column_names: list[str]) -> _TableAsTensor: if tensor.dim() != 2: @@ -263,6 +332,24 @@ def __init__(self, column: Column) -> None: torch.get_default_device(), ) + def __eq__(self, other: object) -> bool: + import torch + + if not isinstance(other, _ColumnAsTensor): + return NotImplemented + return (self is other) or (self._column_name == other._column_name and self._one_hot_encoder == other._one_hot_encoder and torch.all(torch.eq(self._tensor, other._tensor)).item()) + + def __hash__(self) -> int: + return _structural_hash(self._tensor.size(), self._column_name, self._one_hot_encoder) + + def __sizeof__(self) -> int: + return ( + sys.getsizeof(self._tensor) + + self._tensor.element_size() * self._tensor.nelement() + + sys.getsizeof(self._column_name) + + sys.getsizeof(self._one_hot_encoder) + ) + @staticmethod def _from_tensor(tensor: Tensor, column_name: str, one_hot_encoder: OneHotEncoder) -> _ColumnAsTensor: if tensor.dim() != 2: diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 063b9255c..604384446 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -1,8 +1,10 @@ from __future__ import annotations import math +import sys from typing import TYPE_CHECKING, Any, Literal +from safeds._utils import _structural_hash from safeds.data.image.typing import ImageSize if TYPE_CHECKING: @@ -178,6 +180,60 @@ def _set_input_size(self, input_size: int | ImageSize) -> None: self._input_size = input_size self._output_size = None + def __hash__(self) -> int: + """ + Return a deterministic hash value for this convolutional 2d layer. + + Returns + ------- + hash: + the hash value + """ + return _structural_hash(self._output_channel, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size) + + def __eq__(self, other: object) -> bool: + """ + Compare two convolutional 2d layer. + + Parameters + ---------- + other: + The convolutional 2d layer to compare to. + + Returns + ------- + equals: + Whether the two convolutional 2d layer are the same. + """ + if not isinstance(other, Convolutional2DLayer) or isinstance(other, ConvolutionalTranspose2DLayer): + return NotImplemented + return (self is other) or ( + self._output_channel == other._output_channel + and self._kernel_size == other._kernel_size + and self._stride == other._stride + and self._padding == other._padding + and self._input_size == other._input_size + and self._output_size == other._output_size + ) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + size: + Size of this object in bytes. + """ + return ( + sys.getsizeof(self._output_channel) + + sys.getsizeof(self._kernel_size) + + sys.getsizeof(self._stride) + + sys.getsizeof(self._padding) + + sys.getsizeof(self._input_size) + + sys.getsizeof(self._output_size) + ) + class ConvolutionalTranspose2DLayer(Convolutional2DLayer): @@ -256,3 +312,51 @@ def output_size(self) -> ImageSize: ) self._output_size = ImageSize(new_width, new_height, self._output_channel, _ignore_invalid_channel=True) return self._output_size + + def __hash__(self) -> int: + """ + Return a deterministic hash value for this convolutional transpose 2d layer. + + Returns + ------- + hash: + the hash value + """ + return _structural_hash(super().__hash__(), self._output_padding) + + def __eq__(self, other: object) -> bool: + """ + Compare two convolutional transpose 2d layer. + + Parameters + ---------- + other: + The convolutional transpose 2d layer to compare to. + + Returns + ------- + equals: + Whether the two convolutional transpose 2d layer are the same. + """ + if not isinstance(other, ConvolutionalTranspose2DLayer): + return NotImplemented + return (self is other) or ( + self._output_channel == other._output_channel + and self._kernel_size == other._kernel_size + and self._stride == other._stride + and self._padding == other._padding + and self._input_size == other._input_size + and self._output_size == other._output_size + and self._output_padding == other._output_padding + ) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + size: + Size of this object in bytes. + """ + return sys.getsizeof(self._output_padding) + super().__sizeof__() diff --git a/src/safeds/ml/nn/_flatten_layer.py b/src/safeds/ml/nn/_flatten_layer.py index eca4e253c..50269b448 100644 --- a/src/safeds/ml/nn/_flatten_layer.py +++ b/src/safeds/ml/nn/_flatten_layer.py @@ -1,7 +1,10 @@ from __future__ import annotations +import sys from typing import TYPE_CHECKING, Any +from safeds._utils import _structural_hash + if TYPE_CHECKING: from torch import Tensor, nn @@ -80,3 +83,43 @@ def _set_input_size(self, input_size: int | ImageSize) -> None: raise TypeError("The input_size of a flatten layer has to be of type ImageSize.") self._input_size = input_size self._output_size = None + + def __hash__(self) -> int: + """ + Return a deterministic hash value for this flatten layer. + + Returns + ------- + hash: + the hash value + """ + return _structural_hash(self._input_size, self._output_size) + + def __eq__(self, other: object) -> bool: + """ + Compare two flatten layer. + + Parameters + ---------- + other: + The flatten layer to compare to. + + Returns + ------- + equals: + Whether the two flatten layer are the same. + """ + if not isinstance(other, FlattenLayer): + return NotImplemented + return (self is other) or (self._input_size == other._input_size and self._output_size == other._output_size) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + size: + Size of this object in bytes. + """ + return sys.getsizeof(self._input_size) + sys.getsizeof(self._output_size) diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 3ca9352d5..798d48cfd 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -1,7 +1,9 @@ from __future__ import annotations +import sys from typing import TYPE_CHECKING, Any +from safeds._utils import _structural_hash from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.labeled.containers import ImageDataset @@ -79,3 +81,57 @@ def _get_output_configuration(self) -> dict[str, Any]: "column_name": self._column_name, "one_hot_encoder": self._one_hot_encoder, } + + def __hash__(self) -> int: + """ + Return a deterministic hash value for this InputConversionImage. + + Returns + ------- + hash: + the hash value + """ + return _structural_hash(self._input_size, self._output_size, self._one_hot_encoder, self._column_name, self._column_names, self._output_type) + + def __eq__(self, other: object) -> bool: + """ + Compare two InputConversionImage instances. + + Parameters + ---------- + other: + The InputConversionImage instance to compare to. + + Returns + ------- + equals: + Whether the instances are the same. + """ + if not isinstance(other, InputConversionImage): + return NotImplemented + return (self is other) or ( + self._input_size == other._input_size + and self._output_size == other._output_size + and self._one_hot_encoder == other._one_hot_encoder + and self._column_name == other._column_name + and self._column_names == other._column_names + and self._output_type == other._output_type + ) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + size: + Size of this object in bytes. + """ + return ( + sys.getsizeof(self._input_size) + + sys.getsizeof(self._output_size) + + sys.getsizeof(self._one_hot_encoder) + + sys.getsizeof(self._column_name) + + sys.getsizeof(self._column_names) + + sys.getsizeof(self._output_type) + ) diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py index eaaf23322..c1c305c21 100644 --- a/src/safeds/ml/nn/_layer.py +++ b/src/safeds/ml/nn/_layer.py @@ -31,3 +31,15 @@ def output_size(self) -> int | ImageSize: @abstractmethod def _set_input_size(self, input_size: int | ImageSize) -> None: pass # pragma: no cover + + @abstractmethod + def __hash__(self) -> int: + pass # pragma: no cover + + @abstractmethod + def __eq__(self, other: object) -> bool: + pass # pragma: no cover + + @abstractmethod + def __sizeof__(self) -> int: + pass # pragma: no cover diff --git a/src/safeds/ml/nn/_output_conversion_image.py b/src/safeds/ml/nn/_output_conversion_image.py index 6065b059f..1973d9473 100644 --- a/src/safeds/ml/nn/_output_conversion_image.py +++ b/src/safeds/ml/nn/_output_conversion_image.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any +from safeds._utils import _structural_hash from safeds.data.image.containers import ImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList from safeds.data.labeled.containers import ImageDataset @@ -22,6 +23,46 @@ class _OutputConversionImage(OutputConversion[ImageList, ImageDataset], ABC): def _data_conversion(self, input_data: ImageList, output_data: Tensor, **kwargs: Any) -> ImageDataset: pass # pragma: no cover + def __hash__(self) -> int: + """ + Return a deterministic hash value for this OutputConversionImage. + + Returns + ------- + hash: + the hash value + """ + return _structural_hash(self.__class__.__name__) + + def __eq__(self, other: object) -> bool: + """ + Compare two OutputConversionImage instances. + + Parameters + ---------- + other: + The OutputConversionImage instance to compare to. + + Returns + ------- + equals: + Whether the instances are the same. + """ + if not isinstance(other, type(self)): + return NotImplemented + return True + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + size: + Size of this object in bytes. + """ + return 0 + class OutputConversionImageToColumn(_OutputConversionImage): diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 0767e7dda..7ee39f3ea 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -1,8 +1,10 @@ from __future__ import annotations import math +import sys from typing import TYPE_CHECKING, Any, Literal +from safeds._utils import _structural_hash from safeds.data.image.typing import ImageSize if TYPE_CHECKING: @@ -109,6 +111,60 @@ def _set_input_size(self, input_size: int | ImageSize) -> None: self._input_size = input_size self._output_size = None + def __hash__(self) -> int: + """ + Return a deterministic hash value for this pooling 2d layer. + + Returns + ------- + hash: + the hash value + """ + return _structural_hash(self._strategy, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size) + + def __eq__(self, other: object) -> bool: + """ + Compare two pooling 2d layer. + + Parameters + ---------- + other: + The pooling 2d layer to compare to. + + Returns + ------- + equals: + Whether the two pooling 2d layer are the same. + """ + if not isinstance(other, type(self)): + return NotImplemented + return (self is other) or ( + self._input_size == other._input_size + and self._output_size == other._output_size + and self._strategy == other._strategy + and self._kernel_size == other._kernel_size + and self._stride == other._stride + and self._padding == other._padding + ) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + size: + Size of this object in bytes. + """ + return ( + sys.getsizeof(self._input_size) + + sys.getsizeof(self._output_size) + + sys.getsizeof(self._strategy) + + sys.getsizeof(self._kernel_size) + + sys.getsizeof(self._stride) + + sys.getsizeof(self._padding) + ) + class MaxPooling2DLayer(_Pooling2DLayer): diff --git a/tests/safeds/data/image/containers/test_image.py b/tests/safeds/data/image/containers/test_image.py index 2c5334903..ac1bb0b65 100644 --- a/tests/safeds/data/image/containers/test_image.py +++ b/tests/safeds/data/image/containers/test_image.py @@ -333,7 +333,7 @@ def test_should_not_be_equal_different_devices(self) -> None: images_all(), ids=images_all_ids(), ) - def test_should_raise(self, resource_path: str, device: Device) -> None: + def test_should_be_not_implemented(self, resource_path: str, device: Device) -> None: skip_if_device_not_available(device) image = Image.from_file(resolve_resource_path(resource_path), device) other = Table() diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index c3b1bee1a..4d8cdc122 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -1,4 +1,5 @@ import math +import sys from typing import TypeVar import pytest @@ -104,6 +105,101 @@ def test_should_return_length(self) -> None: assert len(image_dataset) == 1 +class TestEq: + + @pytest.mark.parametrize( + ("image_dataset1", "image_dataset2"), + [ + (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1]))), + (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]}))), + (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path)))), + ] + ) + def test_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: + assert image_dataset1 == image_dataset2 + + @pytest.mark.parametrize( + "image_dataset1", + [ + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), + ] + ) + @pytest.mark.parametrize( + "image_dataset2", + [ + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("ims", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"ims": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [0])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(white_square_png_path))), + ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Column("images", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Table({"images": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), + ] + ) + def test_should_not_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: + assert image_dataset1 != image_dataset2 + + def test_should_be_not_implemented(self) -> None: + image_dataset = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])) + other = Table() + assert image_dataset.__eq__(other) is NotImplemented + + +class TestHash: + + @pytest.mark.parametrize( + ("image_dataset1", "image_dataset2"), + [ + (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1]))), + (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]}))), + (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path)))), + ] + ) + def test_hash_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: + assert hash(image_dataset1) == hash(image_dataset2) + + @pytest.mark.parametrize( + "image_dataset1", + [ + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), + ] + ) + @pytest.mark.parametrize( + "image_dataset2", + [ + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("ims", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"ims": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [0])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(white_square_png_path))), + ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Column("images", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Table({"images": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), + ] + ) + def test_hash_should_not_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: + assert hash(image_dataset1) != hash(image_dataset2) + + +class TestSizeOf: + + @pytest.mark.parametrize( + "image_dataset", + [ + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), + ] + ) + def test_should_size_be_greater_than_normal_object(self, image_dataset: ImageDataset) -> None: + assert sys.getsizeof(image_dataset) > sys.getsizeof(object()) + + class TestShuffle: def test_should_be_different_order(self) -> None: diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index 48a21dad8..2f84d3fd2 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -1,3 +1,4 @@ +import sys from typing import Literal import pytest @@ -156,3 +157,105 @@ def test_should_raise_if_input_size_is_set_with_int( layer = conv_type(output_channel, kernel_size, stride=stride, padding=padding) with pytest.raises(TypeError, match=r"The input_size of a convolution layer has to be of type ImageSize."): layer._set_input_size(1) + + class TestEq: + + @pytest.mark.parametrize( + ("conv2dlayer1", "conv2dlayer2"), + [ + (Convolutional2DLayer(1, 2), Convolutional2DLayer(1, 2)), + (Convolutional2DLayer(1, 2, stride=3, padding=4), Convolutional2DLayer(1, 2, stride=3, padding=4)), + (ConvolutionalTranspose2DLayer(1, 2), ConvolutionalTranspose2DLayer(1, 2)), + (ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5)), + ] + ) + def test_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: + assert conv2dlayer1 == conv2dlayer2 + assert conv2dlayer2 == conv2dlayer1 + + @pytest.mark.parametrize( + "conv2dlayer1", + [ + Convolutional2DLayer(1, 2), + Convolutional2DLayer(1, 2, stride=3, padding=4), + ConvolutionalTranspose2DLayer(1, 2), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), + ] + ) + @pytest.mark.parametrize( + "conv2dlayer2", + [ + Convolutional2DLayer(2, 2), + Convolutional2DLayer(1, 1), + Convolutional2DLayer(1, 2, stride=4, padding=4), + Convolutional2DLayer(1, 2, stride=3, padding=3), + ConvolutionalTranspose2DLayer(1, 1), + ConvolutionalTranspose2DLayer(2, 2), + ConvolutionalTranspose2DLayer(1, 2, stride=4, padding=4, output_padding=5), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=3, output_padding=5), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=4), + ] + ) + def test_should_not_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: + assert conv2dlayer1 != conv2dlayer2 + assert conv2dlayer2 != conv2dlayer1 + + def test_should_be_not_implemented(self) -> None: + conv2dlayer = Convolutional2DLayer(1, 2) + convtranspose2dlayer = ConvolutionalTranspose2DLayer(1, 2) + assert conv2dlayer.__eq__(convtranspose2dlayer) is NotImplemented + assert convtranspose2dlayer.__eq__(conv2dlayer) is NotImplemented + + class TestHash: + + @pytest.mark.parametrize( + ("conv2dlayer1", "conv2dlayer2"), + [ + (Convolutional2DLayer(1, 2), Convolutional2DLayer(1, 2)), + (Convolutional2DLayer(1, 2, stride=3, padding=4), Convolutional2DLayer(1, 2, stride=3, padding=4)), + (ConvolutionalTranspose2DLayer(1, 2), ConvolutionalTranspose2DLayer(1, 2)), + (ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5)), + ] + ) + def test_hash_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: + assert hash(conv2dlayer1) == hash(conv2dlayer2) + + @pytest.mark.parametrize( + "conv2dlayer1", + [ + Convolutional2DLayer(1, 2), + Convolutional2DLayer(1, 2, stride=3, padding=4), + ConvolutionalTranspose2DLayer(1, 2), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), + ] + ) + @pytest.mark.parametrize( + "conv2dlayer2", + [ + Convolutional2DLayer(2, 2), + Convolutional2DLayer(1, 1), + Convolutional2DLayer(1, 2, stride=4, padding=4), + Convolutional2DLayer(1, 2, stride=3, padding=3), + ConvolutionalTranspose2DLayer(1, 1), + ConvolutionalTranspose2DLayer(2, 2), + ConvolutionalTranspose2DLayer(1, 2, stride=4, padding=4, output_padding=5), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=3, output_padding=5), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=4), + ] + ) + def test_hash_should_not_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: + assert hash(conv2dlayer1) != hash(conv2dlayer2) + + class TestSizeOf: + + @pytest.mark.parametrize( + "conv2dlayer", + [ + Convolutional2DLayer(1, 2), + Convolutional2DLayer(1, 2, stride=3, padding=4), + ConvolutionalTranspose2DLayer(1, 2), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), + ] + ) + def test_should_size_be_greater_than_normal_object(self, conv2dlayer: Convolutional2DLayer) -> None: + assert sys.getsizeof(conv2dlayer) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/test_flatten_layer.py b/tests/safeds/ml/nn/test_flatten_layer.py index a5d679383..c846bb2c6 100644 --- a/tests/safeds/ml/nn/test_flatten_layer.py +++ b/tests/safeds/ml/nn/test_flatten_layer.py @@ -1,5 +1,8 @@ +import sys + import pytest from safeds.data.image.typing import ImageSize +from safeds.data.tabular.containers import Table from safeds.ml.nn import FlattenLayer from torch import nn @@ -28,3 +31,21 @@ def test_should_raise_if_input_size_is_set_with_int(self) -> None: layer = FlattenLayer() with pytest.raises(TypeError, match=r"The input_size of a flatten layer has to be of type ImageSize."): layer._set_input_size(1) + + class TestEq: + + def test_should_be_equal(self): + assert FlattenLayer() == FlattenLayer() + + def test_should_be_not_implemented(self): + assert FlattenLayer().__eq__(Table()) is NotImplemented + + class TestHash: + + def test_hash_should_be_equal(self): + assert hash(FlattenLayer()) == hash(FlattenLayer()) + + class TestSizeOf: + + def test_should_size_be_greater_than_normal_object(self): + assert sys.getsizeof(FlattenLayer()) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py index f8cfea0be..3e53fae46 100644 --- a/tests/safeds/ml/nn/test_input_conversion_image.py +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -1,5 +1,8 @@ +import sys + import pytest from safeds.data.image.containers import ImageList +from safeds.data.image.typing import ImageSize from safeds.data.labeled.containers import ImageDataset from safeds.data.tabular.containers import Column, Table from safeds.ml.nn import InputConversionImage @@ -72,3 +75,81 @@ def test_should_return_false_if_fit_data_is_invalid( assert input_conversion._is_fit_data_valid(image_dataset_valid) assert input_conversion._is_fit_data_valid(image_dataset_valid) assert not input_conversion._is_fit_data_valid(image_dataset_invalid) + + +class TestEq: + + @pytest.mark.parametrize( + ("input_conversion_image1", "input_conversion_image2"), + [ + (InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3))) + ] + ) + def test_should_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + assert input_conversion_image1 == input_conversion_image2 + + @pytest.mark.parametrize( + "input_conversion_image1", + [ + InputConversionImage(ImageSize(1, 2, 3)) + ] + ) + @pytest.mark.parametrize( + "input_conversion_image2", + [ + InputConversionImage(ImageSize(2, 2, 3)), + InputConversionImage(ImageSize(1, 1, 3)), + InputConversionImage(ImageSize(1, 2, 1)), + InputConversionImage(ImageSize(1, 2, 4)), + ] + ) + def test_should_not_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + assert input_conversion_image1 != input_conversion_image2 + + def test_should_be_not_implemented(self) -> None: + input_conversion_image = InputConversionImage(ImageSize(1, 2, 3)) + other = Table() + assert input_conversion_image.__eq__(other) is NotImplemented + + +class TestHash: + + @pytest.mark.parametrize( + ("input_conversion_image1", "input_conversion_image2"), + [ + (InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3))) + ] + ) + def test_hash_should_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + assert hash(input_conversion_image1) == hash(input_conversion_image2) + + @pytest.mark.parametrize( + "input_conversion_image1", + [ + InputConversionImage(ImageSize(1, 2, 3)) + ] + ) + @pytest.mark.parametrize( + "input_conversion_image2", + [ + InputConversionImage(ImageSize(2, 2, 3)), + InputConversionImage(ImageSize(1, 1, 3)), + InputConversionImage(ImageSize(1, 2, 1)), + InputConversionImage(ImageSize(1, 2, 4)), + ] + ) + def test_hash_should_not_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + assert hash(input_conversion_image1) != hash(input_conversion_image2) + + +class TestSizeOf: + + @pytest.mark.parametrize( + "input_conversion_image", + [ + InputConversionImage(ImageSize(1, 2, 3)) + ] + ) + def test_should_size_be_greater_than_normal_object(self, input_conversion_image: InputConversionImage) -> None: + assert sys.getsizeof(input_conversion_image) > sys.getsizeof(object()) + diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index 98c0105be..6ec2b6548 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -1,7 +1,10 @@ +import sys + import pytest import torch from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList +from safeds.data.tabular.containers import Table from safeds.data.tabular.transformation import OneHotEncoder from safeds.ml.nn import OutputConversionImageToColumn, OutputConversionImageToImage, OutputConversionImageToTable from safeds.ml.nn._output_conversion_image import _OutputConversionImage @@ -21,6 +24,68 @@ def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _Outp with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) + class TestEq: + + @pytest.mark.parametrize( + ("output_conversion_image1", "output_conversion_image2"), + [ + (OutputConversionImageToColumn(), OutputConversionImageToColumn()), + (OutputConversionImageToTable(), OutputConversionImageToTable()), + (OutputConversionImageToImage(), OutputConversionImageToImage()), + ] + ) + def test_should_be_equal(self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage) -> None: + assert output_conversion_image1 == output_conversion_image2 + + def test_should_be_not_implemented(self) -> None: + output_conversion_image_to_image = OutputConversionImageToImage() + output_conversion_image_to_table = OutputConversionImageToTable() + output_conversion_image_to_column = OutputConversionImageToColumn() + other = Table() + assert output_conversion_image_to_image.__eq__(other) is NotImplemented + assert output_conversion_image_to_image.__eq__(output_conversion_image_to_table) is NotImplemented + assert output_conversion_image_to_image.__eq__(output_conversion_image_to_column) is NotImplemented + assert output_conversion_image_to_table.__eq__(other) is NotImplemented + assert output_conversion_image_to_table.__eq__(output_conversion_image_to_image) is NotImplemented + assert output_conversion_image_to_table.__eq__(output_conversion_image_to_column) is NotImplemented + assert output_conversion_image_to_column.__eq__(other) is NotImplemented + assert output_conversion_image_to_column.__eq__(output_conversion_image_to_table) is NotImplemented + assert output_conversion_image_to_column.__eq__(output_conversion_image_to_image) is NotImplemented + + class TestHash: + + @pytest.mark.parametrize( + ("output_conversion_image1", "output_conversion_image2"), + [ + (OutputConversionImageToColumn(), OutputConversionImageToColumn()), + (OutputConversionImageToTable(), OutputConversionImageToTable()), + (OutputConversionImageToImage(), OutputConversionImageToImage()), + ] + ) + def test_hash_should_be_equal(self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage) -> None: + assert hash(output_conversion_image1) == hash(output_conversion_image2) + + def test_hash_should_not_be_equal(self) -> None: + output_conversion_image_to_image = OutputConversionImageToImage() + output_conversion_image_to_table = OutputConversionImageToTable() + output_conversion_image_to_column = OutputConversionImageToColumn() + assert hash(output_conversion_image_to_image) != hash(output_conversion_image_to_table) + assert hash(output_conversion_image_to_image) != hash(output_conversion_image_to_column) + assert hash(output_conversion_image_to_table) != hash(output_conversion_image_to_column) + + class TestSizeOf: + + @pytest.mark.parametrize( + "output_conversion_image", + [ + OutputConversionImageToColumn(), + OutputConversionImageToTable(), + OutputConversionImageToImage(), + ] + ) + def test_should_size_be_greater_than_normal_object(self, output_conversion_image: _OutputConversionImage) -> None: + assert sys.getsizeof(output_conversion_image) > sys.getsizeof(object()) + class TestOutputConversionImageToColumn: diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py index e6e389a80..a30d4c4a8 100644 --- a/tests/safeds/ml/nn/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -1,7 +1,10 @@ +import sys from typing import Literal import pytest from safeds.data.image.typing import ImageSize +from safeds.data.tabular.containers import Table +from safeds.ml.nn import MaxPooling2DLayer, AvgPooling2DLayer from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer from torch import nn @@ -53,3 +56,105 @@ def test_should_raise_if_input_size_is_set_with_int(self, strategy: Literal["max layer = _Pooling2DLayer(strategy, 2, stride=2, padding=2) with pytest.raises(TypeError, match=r"The input_size of a pooling layer has to be of type ImageSize."): layer._set_input_size(1) + + class TestEq: + + @pytest.mark.parametrize( + ("pooling_2d_layer_1", "pooling_2d_layer_2"), + [ + (MaxPooling2DLayer(2), MaxPooling2DLayer(2)), + (MaxPooling2DLayer(2, stride=3, padding=4), MaxPooling2DLayer(2, stride=3, padding=4)), + (AvgPooling2DLayer(2), AvgPooling2DLayer(2)), + (AvgPooling2DLayer(2, stride=3, padding=4), AvgPooling2DLayer(2, stride=3, padding=4)), + ] + ) + def test_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + assert pooling_2d_layer_1 == pooling_2d_layer_2 + + @pytest.mark.parametrize( + "pooling_2d_layer_1", + [ + MaxPooling2DLayer(2), + MaxPooling2DLayer(2, stride=3, padding=4), + AvgPooling2DLayer(2), + AvgPooling2DLayer(2, stride=3, padding=4), + ] + ) + @pytest.mark.parametrize( + "pooling_2d_layer_2", + [ + MaxPooling2DLayer(1), + MaxPooling2DLayer(1, stride=3, padding=4), + MaxPooling2DLayer(2, stride=1, padding=4), + MaxPooling2DLayer(2, stride=3, padding=1), + AvgPooling2DLayer(1), + AvgPooling2DLayer(1, stride=3, padding=4), + AvgPooling2DLayer(2, stride=1, padding=4), + AvgPooling2DLayer(2, stride=3, padding=1), + ] + ) + def test_should_not_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + assert pooling_2d_layer_1 != pooling_2d_layer_2 + + def test_should_be_not_implemented(self) -> None: + max_pooling_2d_layer = MaxPooling2DLayer(1) + avg_pooling_2d_layer = AvgPooling2DLayer(1) + other = Table() + assert max_pooling_2d_layer.__eq__(other) is NotImplemented + assert max_pooling_2d_layer.__eq__(avg_pooling_2d_layer) is NotImplemented + assert avg_pooling_2d_layer.__eq__(other) is NotImplemented + assert avg_pooling_2d_layer.__eq__(max_pooling_2d_layer) is NotImplemented + + class TestHash: + + @pytest.mark.parametrize( + ("pooling_2d_layer_1", "pooling_2d_layer_2"), + [ + (MaxPooling2DLayer(2), MaxPooling2DLayer(2)), + (MaxPooling2DLayer(2, stride=3, padding=4), MaxPooling2DLayer(2, stride=3, padding=4)), + (AvgPooling2DLayer(2), AvgPooling2DLayer(2)), + (AvgPooling2DLayer(2, stride=3, padding=4), AvgPooling2DLayer(2, stride=3, padding=4)), + ] + ) + def test_hash_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + assert hash(pooling_2d_layer_1) == hash(pooling_2d_layer_2) + + @pytest.mark.parametrize( + "pooling_2d_layer_1", + [ + MaxPooling2DLayer(2), + MaxPooling2DLayer(2, stride=3, padding=4), + AvgPooling2DLayer(2), + AvgPooling2DLayer(2, stride=3, padding=4), + ] + ) + @pytest.mark.parametrize( + "pooling_2d_layer_2", + [ + MaxPooling2DLayer(1), + MaxPooling2DLayer(1, stride=3, padding=4), + MaxPooling2DLayer(2, stride=1, padding=4), + MaxPooling2DLayer(2, stride=3, padding=1), + AvgPooling2DLayer(1), + AvgPooling2DLayer(1, stride=3, padding=4), + AvgPooling2DLayer(2, stride=1, padding=4), + AvgPooling2DLayer(2, stride=3, padding=1), + ] + ) + def test_hash_should_not_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + assert hash(pooling_2d_layer_1) != hash(pooling_2d_layer_2) + + class TestSizeOf: + + @pytest.mark.parametrize( + "pooling_2d_layer", + [ + MaxPooling2DLayer(2), + MaxPooling2DLayer(2, stride=3, padding=4), + AvgPooling2DLayer(2), + AvgPooling2DLayer(2, stride=3, padding=4), + ] + ) + def test_should_size_be_greater_than_normal_object(self, pooling_2d_layer: _Pooling2DLayer) -> None: + assert sys.getsizeof(pooling_2d_layer) > sys.getsizeof(object()) + From 3c800eaa53c5349a4777c03bf7d5a56156e6113d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 3 May 2024 04:54:43 +0200 Subject: [PATCH 36/42] refactor: mypy --- tests/safeds/ml/nn/test_cnn_workflow.py | 4 ++-- tests/safeds/ml/nn/test_flatten_layer.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/safeds/ml/nn/test_cnn_workflow.py b/tests/safeds/ml/nn/test_cnn_workflow.py index bd15fd86e..058ee9d78 100644 --- a/tests/safeds/ml/nn/test_cnn_workflow.py +++ b/tests/safeds/ml/nn/test_cnn_workflow.py @@ -26,7 +26,7 @@ from tests.helpers import device_cpu, device_cuda, images_all, resolve_resource_path, skip_if_device_not_available if TYPE_CHECKING: - from safeds.ml.nn._layer import _Layer + from safeds.ml.nn import Layer class TestImageToTableClassifier: @@ -192,7 +192,7 @@ def test_should_train_and_predict_model( image_list_grayscale = image_list.convert_to_grayscale() image_dataset = ImageDataset(image_list, image_list_grayscale) - layers: list[_Layer] = [ + layers: list[Layer] = [ Convolutional2DLayer(6, 2), Convolutional2DLayer(12, 2), ConvolutionalTranspose2DLayer(6, 2), diff --git a/tests/safeds/ml/nn/test_flatten_layer.py b/tests/safeds/ml/nn/test_flatten_layer.py index c846bb2c6..e3319db9a 100644 --- a/tests/safeds/ml/nn/test_flatten_layer.py +++ b/tests/safeds/ml/nn/test_flatten_layer.py @@ -34,18 +34,18 @@ def test_should_raise_if_input_size_is_set_with_int(self) -> None: class TestEq: - def test_should_be_equal(self): + def test_should_be_equal(self) -> None: assert FlattenLayer() == FlattenLayer() - def test_should_be_not_implemented(self): + def test_should_be_not_implemented(self) -> None: assert FlattenLayer().__eq__(Table()) is NotImplemented class TestHash: - def test_hash_should_be_equal(self): + def test_hash_should_be_equal(self) -> None: assert hash(FlattenLayer()) == hash(FlattenLayer()) class TestSizeOf: - def test_should_size_be_greater_than_normal_object(self): + def test_should_size_be_greater_than_normal_object(self) -> None: assert sys.getsizeof(FlattenLayer()) > sys.getsizeof(object()) From c8437bb670071e18506b33f59633d6ef168ce753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Fri, 3 May 2024 04:57:21 +0200 Subject: [PATCH 37/42] test: added missing tests --- tests/safeds/data/labeled/containers/test_image_dataset.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index 4d8cdc122..d81df8644 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -259,6 +259,9 @@ def test_should_raise_from_tensor(self, tensor: Tensor, error_msg: str) -> None: with pytest.raises(ValueError, match=error_msg): _TableAsTensor._from_tensor(tensor, ["a", "b"]) + def test_eq_should_be_not_implemented(self) -> None: + assert _TableAsTensor(Table()).__eq__(Table()) is NotImplemented + class TestColumnAsTensor: @@ -295,3 +298,6 @@ def test_should_raise_from_tensor( ) -> None: with pytest.raises(error, match=error_msg): _ColumnAsTensor._from_tensor(tensor, "a", one_hot_encoder) + + def test_eq_should_be_not_implemented(self) -> None: + assert _ColumnAsTensor(Column("column", [1])).__eq__(Table()) is NotImplemented From 3204c260f81a6b4a05f9b7d80c6ef46845e0f13e Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 3 May 2024 02:58:59 +0000 Subject: [PATCH 38/42] style: apply automated linter fixes --- src/safeds/data/image/typing/_image_size.py | 4 +- .../data/labeled/containers/_image_dataset.py | 18 +++- src/safeds/ml/nn/_convolutional2d_layer.py | 4 +- src/safeds/ml/nn/_input_conversion_image.py | 9 +- src/safeds/ml/nn/_pooling2d_layer.py | 4 +- .../labeled/containers/test_image_dataset.py | 99 ++++++++++++++----- .../ml/nn/test_convolutional2d_layer.py | 36 ++++--- .../ml/nn/test_input_conversion_image.py | 50 ++++------ tests/safeds/ml/nn/test_model.py | 4 +- .../ml/nn/test_output_conversion_image.py | 22 +++-- tests/safeds/ml/nn/test_pooling2d_layer.py | 33 ++++--- 11 files changed, 190 insertions(+), 93 deletions(-) diff --git a/src/safeds/data/image/typing/_image_size.py b/src/safeds/data/image/typing/_image_size.py index df6ce4ccd..3a3e400fc 100644 --- a/src/safeds/data/image/typing/_image_size.py +++ b/src/safeds/data/image/typing/_image_size.py @@ -62,7 +62,9 @@ def from_image(image: Image) -> ImageSize: def __eq__(self, other: object) -> bool: if not isinstance(other, ImageSize): return NotImplemented - return (self is other) or (self._width == other._width and self._height == other._height and self._channel == other._channel) + return (self is other) or ( + self._width == other._width and self._height == other._height and self._channel == other._channel + ) def __hash__(self) -> int: return _structural_hash(self._width, self._height, self._channel) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 72fa6e798..0fc52347c 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -128,7 +128,13 @@ def __eq__(self, other: object) -> bool: """ if not isinstance(other, ImageDataset): return NotImplemented - return (self is other) or (self._shuffle_after_epoch == other._shuffle_after_epoch and self._batch_size == other._batch_size and isinstance(other._output, type(self._output)) and (self._input == other._input) and (self._output == other._output)) + return (self is other) or ( + self._shuffle_after_epoch == other._shuffle_after_epoch + and self._batch_size == other._batch_size + and isinstance(other._output, type(self._output)) + and (self._input == other._input) + and (self._output == other._output) + ) def __hash__(self) -> int: """ @@ -291,7 +297,9 @@ def __eq__(self, other: object) -> bool: if not isinstance(other, _TableAsTensor): return NotImplemented - return (self is other) or (self._column_names == other._column_names and torch.all(torch.eq(self._tensor, other._tensor)).item()) + return (self is other) or ( + self._column_names == other._column_names and torch.all(torch.eq(self._tensor, other._tensor)).item() + ) def __hash__(self) -> int: return _structural_hash(self._tensor.size(), self._column_names) @@ -337,7 +345,11 @@ def __eq__(self, other: object) -> bool: if not isinstance(other, _ColumnAsTensor): return NotImplemented - return (self is other) or (self._column_name == other._column_name and self._one_hot_encoder == other._one_hot_encoder and torch.all(torch.eq(self._tensor, other._tensor)).item()) + return (self is other) or ( + self._column_name == other._column_name + and self._one_hot_encoder == other._one_hot_encoder + and torch.all(torch.eq(self._tensor, other._tensor)).item() + ) def __hash__(self) -> int: return _structural_hash(self._tensor.size(), self._column_name, self._one_hot_encoder) diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 604384446..0f412f4ff 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -189,7 +189,9 @@ def __hash__(self) -> int: hash: the hash value """ - return _structural_hash(self._output_channel, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size) + return _structural_hash( + self._output_channel, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size, + ) def __eq__(self, other: object) -> bool: """ diff --git a/src/safeds/ml/nn/_input_conversion_image.py b/src/safeds/ml/nn/_input_conversion_image.py index 798d48cfd..e19b38b68 100644 --- a/src/safeds/ml/nn/_input_conversion_image.py +++ b/src/safeds/ml/nn/_input_conversion_image.py @@ -91,7 +91,14 @@ def __hash__(self) -> int: hash: the hash value """ - return _structural_hash(self._input_size, self._output_size, self._one_hot_encoder, self._column_name, self._column_names, self._output_type) + return _structural_hash( + self._input_size, + self._output_size, + self._one_hot_encoder, + self._column_name, + self._column_names, + self._output_type, + ) def __eq__(self, other: object) -> bool: """ diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 7ee39f3ea..256afa841 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -120,7 +120,9 @@ def __hash__(self) -> int: hash: the hash value """ - return _structural_hash(self._strategy, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size) + return _structural_hash( + self._strategy, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size, + ) def __eq__(self, other: object) -> bool: """ diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index d81df8644..83b3dfa72 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -110,10 +110,25 @@ class TestEq: @pytest.mark.parametrize( ("image_dataset1", "image_dataset2"), [ - (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1]))), - (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]}))), - (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path)))), - ] + ( + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), + ), + ( + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), + ), + ( + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ), + ], ) def test_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: assert image_dataset1 == image_dataset2 @@ -123,8 +138,11 @@ def test_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2: Ima [ ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), - ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), - ] + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ], ) @pytest.mark.parametrize( "image_dataset2", @@ -132,12 +150,20 @@ def test_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2: Ima ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("ims", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"ims": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [0])), - ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]})), - ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(white_square_png_path))), + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]}), + ), + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(white_square_png_path)), + ), ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Table({"images": [1]})), - ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), - ] + ImageDataset( + ImageList.from_files(resolve_resource_path(white_square_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ], ) def test_should_not_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: assert image_dataset1 != image_dataset2 @@ -153,10 +179,25 @@ class TestHash: @pytest.mark.parametrize( ("image_dataset1", "image_dataset2"), [ - (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1]))), - (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]}))), - (ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path)))), - ] + ( + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), + ), + ( + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), + ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), + ), + ( + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ), + ], ) def test_hash_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: assert hash(image_dataset1) == hash(image_dataset2) @@ -166,8 +207,11 @@ def test_hash_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2 [ ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), - ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), - ] + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ], ) @pytest.mark.parametrize( "image_dataset2", @@ -175,12 +219,20 @@ def test_hash_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2 ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("ims", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"ims": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [0])), - ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]})), - ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(white_square_png_path))), + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]}), + ), + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(white_square_png_path)), + ), ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), Table({"images": [1]})), - ImageDataset(ImageList.from_files(resolve_resource_path(white_square_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), - ] + ImageDataset( + ImageList.from_files(resolve_resource_path(white_square_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ], ) def test_hash_should_not_be_equal(self, image_dataset1: ImageDataset, image_dataset2: ImageDataset) -> None: assert hash(image_dataset1) != hash(image_dataset2) @@ -193,8 +245,11 @@ class TestSizeOf: [ ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1])), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [1]})), - ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), ImageList.from_files(resolve_resource_path(plane_png_path))), - ] + ImageDataset( + ImageList.from_files(resolve_resource_path(plane_png_path)), + ImageList.from_files(resolve_resource_path(plane_png_path)), + ), + ], ) def test_should_size_be_greater_than_normal_object(self, image_dataset: ImageDataset) -> None: assert sys.getsizeof(image_dataset) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index 2f84d3fd2..edff59acd 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -166,8 +166,11 @@ class TestEq: (Convolutional2DLayer(1, 2), Convolutional2DLayer(1, 2)), (Convolutional2DLayer(1, 2, stride=3, padding=4), Convolutional2DLayer(1, 2, stride=3, padding=4)), (ConvolutionalTranspose2DLayer(1, 2), ConvolutionalTranspose2DLayer(1, 2)), - (ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5)), - ] + ( + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), + ), + ], ) def test_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: assert conv2dlayer1 == conv2dlayer2 @@ -180,7 +183,7 @@ def test_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer(1, 2, stride=3, padding=4), ConvolutionalTranspose2DLayer(1, 2), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), - ] + ], ) @pytest.mark.parametrize( "conv2dlayer2", @@ -194,9 +197,11 @@ def test_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: ConvolutionalTranspose2DLayer(1, 2, stride=4, padding=4, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=3, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=4), - ] + ], ) - def test_should_not_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: + def test_should_not_be_equal( + self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer, + ) -> None: assert conv2dlayer1 != conv2dlayer2 assert conv2dlayer2 != conv2dlayer1 @@ -214,10 +219,15 @@ class TestHash: (Convolutional2DLayer(1, 2), Convolutional2DLayer(1, 2)), (Convolutional2DLayer(1, 2, stride=3, padding=4), Convolutional2DLayer(1, 2, stride=3, padding=4)), (ConvolutionalTranspose2DLayer(1, 2), ConvolutionalTranspose2DLayer(1, 2)), - (ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5)), - ] + ( + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), + ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), + ), + ], ) - def test_hash_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: + def test_hash_should_be_equal( + self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer, + ) -> None: assert hash(conv2dlayer1) == hash(conv2dlayer2) @pytest.mark.parametrize( @@ -227,7 +237,7 @@ def test_hash_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dla Convolutional2DLayer(1, 2, stride=3, padding=4), ConvolutionalTranspose2DLayer(1, 2), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), - ] + ], ) @pytest.mark.parametrize( "conv2dlayer2", @@ -241,9 +251,11 @@ def test_hash_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dla ConvolutionalTranspose2DLayer(1, 2, stride=4, padding=4, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=3, output_padding=5), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=4), - ] + ], ) - def test_hash_should_not_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer) -> None: + def test_hash_should_not_be_equal( + self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer, + ) -> None: assert hash(conv2dlayer1) != hash(conv2dlayer2) class TestSizeOf: @@ -255,7 +267,7 @@ class TestSizeOf: Convolutional2DLayer(1, 2, stride=3, padding=4), ConvolutionalTranspose2DLayer(1, 2), ConvolutionalTranspose2DLayer(1, 2, stride=3, padding=4, output_padding=5), - ] + ], ) def test_should_size_be_greater_than_normal_object(self, conv2dlayer: Convolutional2DLayer) -> None: assert sys.getsizeof(conv2dlayer) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py index 3e53fae46..1475341d7 100644 --- a/tests/safeds/ml/nn/test_input_conversion_image.py +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -81,19 +81,14 @@ class TestEq: @pytest.mark.parametrize( ("input_conversion_image1", "input_conversion_image2"), - [ - (InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3))) - ] + [(InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3)))], ) - def test_should_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + def test_should_be_equal( + self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + ) -> None: assert input_conversion_image1 == input_conversion_image2 - @pytest.mark.parametrize( - "input_conversion_image1", - [ - InputConversionImage(ImageSize(1, 2, 3)) - ] - ) + @pytest.mark.parametrize("input_conversion_image1", [InputConversionImage(ImageSize(1, 2, 3))]) @pytest.mark.parametrize( "input_conversion_image2", [ @@ -101,9 +96,11 @@ def test_should_be_equal(self, input_conversion_image1: InputConversionImage, in InputConversionImage(ImageSize(1, 1, 3)), InputConversionImage(ImageSize(1, 2, 1)), InputConversionImage(ImageSize(1, 2, 4)), - ] + ], ) - def test_should_not_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + def test_should_not_be_equal( + self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + ) -> None: assert input_conversion_image1 != input_conversion_image2 def test_should_be_not_implemented(self) -> None: @@ -116,19 +113,14 @@ class TestHash: @pytest.mark.parametrize( ("input_conversion_image1", "input_conversion_image2"), - [ - (InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3))) - ] + [(InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3)))], ) - def test_hash_should_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + def test_hash_should_be_equal( + self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + ) -> None: assert hash(input_conversion_image1) == hash(input_conversion_image2) - @pytest.mark.parametrize( - "input_conversion_image1", - [ - InputConversionImage(ImageSize(1, 2, 3)) - ] - ) + @pytest.mark.parametrize("input_conversion_image1", [InputConversionImage(ImageSize(1, 2, 3))]) @pytest.mark.parametrize( "input_conversion_image2", [ @@ -136,20 +128,16 @@ def test_hash_should_be_equal(self, input_conversion_image1: InputConversionImag InputConversionImage(ImageSize(1, 1, 3)), InputConversionImage(ImageSize(1, 2, 1)), InputConversionImage(ImageSize(1, 2, 4)), - ] + ], ) - def test_hash_should_not_be_equal(self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage) -> None: + def test_hash_should_not_be_equal( + self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + ) -> None: assert hash(input_conversion_image1) != hash(input_conversion_image2) class TestSizeOf: - @pytest.mark.parametrize( - "input_conversion_image", - [ - InputConversionImage(ImageSize(1, 2, 3)) - ] - ) + @pytest.mark.parametrize("input_conversion_image", [InputConversionImage(ImageSize(1, 2, 3))]) def test_should_size_be_greater_than_normal_object(self, input_conversion_image: InputConversionImage) -> None: assert sys.getsizeof(input_conversion_image) > sys.getsizeof(object()) - diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 72ae915ec..3e03ad2fc 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -15,16 +15,18 @@ ConvolutionalTranspose2DLayer, FlattenLayer, ForwardLayer, + InputConversion, InputConversionImage, InputConversionTable, + Layer, MaxPooling2DLayer, NeuralNetworkClassifier, NeuralNetworkRegressor, + OutputConversion, OutputConversionImageToImage, OutputConversionImageToTable, OutputConversionTable, ) -from safeds.ml.nn import InputConversion, Layer, OutputConversion from safeds.ml.nn._output_conversion_image import OutputConversionImageToColumn diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index 6ec2b6548..dfa8d0183 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -20,7 +20,9 @@ class TestDataConversionImage: (OutputConversionImageToImage(), {}), ], ) - def test_should_raise_if_input_data_is_multi_size(self, output_conversion: _OutputConversionImage, kwargs: dict) -> None: + def test_should_raise_if_input_data_is_multi_size( + self, output_conversion: _OutputConversionImage, kwargs: dict, + ) -> None: with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) @@ -32,9 +34,11 @@ class TestEq: (OutputConversionImageToColumn(), OutputConversionImageToColumn()), (OutputConversionImageToTable(), OutputConversionImageToTable()), (OutputConversionImageToImage(), OutputConversionImageToImage()), - ] + ], ) - def test_should_be_equal(self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage) -> None: + def test_should_be_equal( + self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage, + ) -> None: assert output_conversion_image1 == output_conversion_image2 def test_should_be_not_implemented(self) -> None: @@ -60,9 +64,11 @@ class TestHash: (OutputConversionImageToColumn(), OutputConversionImageToColumn()), (OutputConversionImageToTable(), OutputConversionImageToTable()), (OutputConversionImageToImage(), OutputConversionImageToImage()), - ] + ], ) - def test_hash_should_be_equal(self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage) -> None: + def test_hash_should_be_equal( + self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage, + ) -> None: assert hash(output_conversion_image1) == hash(output_conversion_image2) def test_hash_should_not_be_equal(self) -> None: @@ -81,9 +87,11 @@ class TestSizeOf: OutputConversionImageToColumn(), OutputConversionImageToTable(), OutputConversionImageToImage(), - ] + ], ) - def test_should_size_be_greater_than_normal_object(self, output_conversion_image: _OutputConversionImage) -> None: + def test_should_size_be_greater_than_normal_object( + self, output_conversion_image: _OutputConversionImage, + ) -> None: assert sys.getsizeof(output_conversion_image) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py index a30d4c4a8..f5eee9f47 100644 --- a/tests/safeds/ml/nn/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -4,7 +4,7 @@ import pytest from safeds.data.image.typing import ImageSize from safeds.data.tabular.containers import Table -from safeds.ml.nn import MaxPooling2DLayer, AvgPooling2DLayer +from safeds.ml.nn import AvgPooling2DLayer, MaxPooling2DLayer from safeds.ml.nn._pooling2d_layer import _Pooling2DLayer from torch import nn @@ -66,9 +66,11 @@ class TestEq: (MaxPooling2DLayer(2, stride=3, padding=4), MaxPooling2DLayer(2, stride=3, padding=4)), (AvgPooling2DLayer(2), AvgPooling2DLayer(2)), (AvgPooling2DLayer(2, stride=3, padding=4), AvgPooling2DLayer(2, stride=3, padding=4)), - ] + ], ) - def test_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + def test_should_be_equal( + self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + ) -> None: assert pooling_2d_layer_1 == pooling_2d_layer_2 @pytest.mark.parametrize( @@ -78,7 +80,7 @@ def test_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_l MaxPooling2DLayer(2, stride=3, padding=4), AvgPooling2DLayer(2), AvgPooling2DLayer(2, stride=3, padding=4), - ] + ], ) @pytest.mark.parametrize( "pooling_2d_layer_2", @@ -91,9 +93,11 @@ def test_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_l AvgPooling2DLayer(1, stride=3, padding=4), AvgPooling2DLayer(2, stride=1, padding=4), AvgPooling2DLayer(2, stride=3, padding=1), - ] + ], ) - def test_should_not_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + def test_should_not_be_equal( + self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + ) -> None: assert pooling_2d_layer_1 != pooling_2d_layer_2 def test_should_be_not_implemented(self) -> None: @@ -114,9 +118,11 @@ class TestHash: (MaxPooling2DLayer(2, stride=3, padding=4), MaxPooling2DLayer(2, stride=3, padding=4)), (AvgPooling2DLayer(2), AvgPooling2DLayer(2)), (AvgPooling2DLayer(2, stride=3, padding=4), AvgPooling2DLayer(2, stride=3, padding=4)), - ] + ], ) - def test_hash_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + def test_hash_should_be_equal( + self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + ) -> None: assert hash(pooling_2d_layer_1) == hash(pooling_2d_layer_2) @pytest.mark.parametrize( @@ -126,7 +132,7 @@ def test_hash_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling MaxPooling2DLayer(2, stride=3, padding=4), AvgPooling2DLayer(2), AvgPooling2DLayer(2, stride=3, padding=4), - ] + ], ) @pytest.mark.parametrize( "pooling_2d_layer_2", @@ -139,9 +145,11 @@ def test_hash_should_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling AvgPooling2DLayer(1, stride=3, padding=4), AvgPooling2DLayer(2, stride=1, padding=4), AvgPooling2DLayer(2, stride=3, padding=1), - ] + ], ) - def test_hash_should_not_be_equal(self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer) -> None: + def test_hash_should_not_be_equal( + self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + ) -> None: assert hash(pooling_2d_layer_1) != hash(pooling_2d_layer_2) class TestSizeOf: @@ -153,8 +161,7 @@ class TestSizeOf: MaxPooling2DLayer(2, stride=3, padding=4), AvgPooling2DLayer(2), AvgPooling2DLayer(2, stride=3, padding=4), - ] + ], ) def test_should_size_be_greater_than_normal_object(self, pooling_2d_layer: _Pooling2DLayer) -> None: assert sys.getsizeof(pooling_2d_layer) > sys.getsizeof(object()) - From b8cfd66eeae82dc5f575dfe0a06bf9bbc9e759b0 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Fri, 3 May 2024 03:00:28 +0000 Subject: [PATCH 39/42] style: apply automated linter fixes --- src/safeds/ml/nn/_convolutional2d_layer.py | 7 ++++++- src/safeds/ml/nn/_pooling2d_layer.py | 7 ++++++- .../labeled/containers/test_image_dataset.py | 6 ++++-- tests/safeds/ml/nn/test_convolutional2d_layer.py | 12 +++++++++--- .../safeds/ml/nn/test_input_conversion_image.py | 16 ++++++++++++---- .../safeds/ml/nn/test_output_conversion_image.py | 15 +++++++++++---- tests/safeds/ml/nn/test_pooling2d_layer.py | 16 ++++++++++++---- 7 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/safeds/ml/nn/_convolutional2d_layer.py b/src/safeds/ml/nn/_convolutional2d_layer.py index 0f412f4ff..59e0c2dde 100644 --- a/src/safeds/ml/nn/_convolutional2d_layer.py +++ b/src/safeds/ml/nn/_convolutional2d_layer.py @@ -190,7 +190,12 @@ def __hash__(self) -> int: the hash value """ return _structural_hash( - self._output_channel, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size, + self._output_channel, + self._kernel_size, + self._stride, + self._padding, + self._input_size, + self._output_size, ) def __eq__(self, other: object) -> bool: diff --git a/src/safeds/ml/nn/_pooling2d_layer.py b/src/safeds/ml/nn/_pooling2d_layer.py index 256afa841..f3d777b01 100644 --- a/src/safeds/ml/nn/_pooling2d_layer.py +++ b/src/safeds/ml/nn/_pooling2d_layer.py @@ -121,7 +121,12 @@ def __hash__(self) -> int: the hash value """ return _structural_hash( - self._strategy, self._kernel_size, self._stride, self._padding, self._input_size, self._output_size, + self._strategy, + self._kernel_size, + self._stride, + self._padding, + self._input_size, + self._output_size, ) def __eq__(self, other: object) -> bool: diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index 83b3dfa72..49b01e1e2 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -151,7 +151,8 @@ def test_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2: Ima ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"ims": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [0])), ImageDataset( - ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]}), + ImageList.from_files(resolve_resource_path(plane_png_path)), + Table({"images": [0], "others": [1]}), ), ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), @@ -220,7 +221,8 @@ def test_hash_should_be_equal(self, image_dataset1: ImageDataset, image_dataset2 ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"ims": [1]})), ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [0])), ImageDataset( - ImageList.from_files(resolve_resource_path(plane_png_path)), Table({"images": [0], "others": [1]}), + ImageList.from_files(resolve_resource_path(plane_png_path)), + Table({"images": [0], "others": [1]}), ), ImageDataset( ImageList.from_files(resolve_resource_path(plane_png_path)), diff --git a/tests/safeds/ml/nn/test_convolutional2d_layer.py b/tests/safeds/ml/nn/test_convolutional2d_layer.py index edff59acd..9a9a50d6c 100644 --- a/tests/safeds/ml/nn/test_convolutional2d_layer.py +++ b/tests/safeds/ml/nn/test_convolutional2d_layer.py @@ -200,7 +200,9 @@ def test_should_be_equal(self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: ], ) def test_should_not_be_equal( - self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer, + self, + conv2dlayer1: Convolutional2DLayer, + conv2dlayer2: Convolutional2DLayer, ) -> None: assert conv2dlayer1 != conv2dlayer2 assert conv2dlayer2 != conv2dlayer1 @@ -226,7 +228,9 @@ class TestHash: ], ) def test_hash_should_be_equal( - self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer, + self, + conv2dlayer1: Convolutional2DLayer, + conv2dlayer2: Convolutional2DLayer, ) -> None: assert hash(conv2dlayer1) == hash(conv2dlayer2) @@ -254,7 +258,9 @@ def test_hash_should_be_equal( ], ) def test_hash_should_not_be_equal( - self, conv2dlayer1: Convolutional2DLayer, conv2dlayer2: Convolutional2DLayer, + self, + conv2dlayer1: Convolutional2DLayer, + conv2dlayer2: Convolutional2DLayer, ) -> None: assert hash(conv2dlayer1) != hash(conv2dlayer2) diff --git a/tests/safeds/ml/nn/test_input_conversion_image.py b/tests/safeds/ml/nn/test_input_conversion_image.py index 1475341d7..f8928cd62 100644 --- a/tests/safeds/ml/nn/test_input_conversion_image.py +++ b/tests/safeds/ml/nn/test_input_conversion_image.py @@ -84,7 +84,9 @@ class TestEq: [(InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3)))], ) def test_should_be_equal( - self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + self, + input_conversion_image1: InputConversionImage, + input_conversion_image2: InputConversionImage, ) -> None: assert input_conversion_image1 == input_conversion_image2 @@ -99,7 +101,9 @@ def test_should_be_equal( ], ) def test_should_not_be_equal( - self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + self, + input_conversion_image1: InputConversionImage, + input_conversion_image2: InputConversionImage, ) -> None: assert input_conversion_image1 != input_conversion_image2 @@ -116,7 +120,9 @@ class TestHash: [(InputConversionImage(ImageSize(1, 2, 3)), InputConversionImage(ImageSize(1, 2, 3)))], ) def test_hash_should_be_equal( - self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + self, + input_conversion_image1: InputConversionImage, + input_conversion_image2: InputConversionImage, ) -> None: assert hash(input_conversion_image1) == hash(input_conversion_image2) @@ -131,7 +137,9 @@ def test_hash_should_be_equal( ], ) def test_hash_should_not_be_equal( - self, input_conversion_image1: InputConversionImage, input_conversion_image2: InputConversionImage, + self, + input_conversion_image1: InputConversionImage, + input_conversion_image2: InputConversionImage, ) -> None: assert hash(input_conversion_image1) != hash(input_conversion_image2) diff --git a/tests/safeds/ml/nn/test_output_conversion_image.py b/tests/safeds/ml/nn/test_output_conversion_image.py index dfa8d0183..afa6a69db 100644 --- a/tests/safeds/ml/nn/test_output_conversion_image.py +++ b/tests/safeds/ml/nn/test_output_conversion_image.py @@ -21,7 +21,9 @@ class TestDataConversionImage: ], ) def test_should_raise_if_input_data_is_multi_size( - self, output_conversion: _OutputConversionImage, kwargs: dict, + self, + output_conversion: _OutputConversionImage, + kwargs: dict, ) -> None: with pytest.raises(ValueError, match=r"The given input ImageList contains images of different sizes."): output_conversion._data_conversion(input_data=_MultiSizeImageList(), output_data=torch.empty(1), **kwargs) @@ -37,7 +39,9 @@ class TestEq: ], ) def test_should_be_equal( - self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage, + self, + output_conversion_image1: _OutputConversionImage, + output_conversion_image2: _OutputConversionImage, ) -> None: assert output_conversion_image1 == output_conversion_image2 @@ -67,7 +71,9 @@ class TestHash: ], ) def test_hash_should_be_equal( - self, output_conversion_image1: _OutputConversionImage, output_conversion_image2: _OutputConversionImage, + self, + output_conversion_image1: _OutputConversionImage, + output_conversion_image2: _OutputConversionImage, ) -> None: assert hash(output_conversion_image1) == hash(output_conversion_image2) @@ -90,7 +96,8 @@ class TestSizeOf: ], ) def test_should_size_be_greater_than_normal_object( - self, output_conversion_image: _OutputConversionImage, + self, + output_conversion_image: _OutputConversionImage, ) -> None: assert sys.getsizeof(output_conversion_image) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/nn/test_pooling2d_layer.py b/tests/safeds/ml/nn/test_pooling2d_layer.py index f5eee9f47..2218c7539 100644 --- a/tests/safeds/ml/nn/test_pooling2d_layer.py +++ b/tests/safeds/ml/nn/test_pooling2d_layer.py @@ -69,7 +69,9 @@ class TestEq: ], ) def test_should_be_equal( - self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + self, + pooling_2d_layer_1: _Pooling2DLayer, + pooling_2d_layer_2: _Pooling2DLayer, ) -> None: assert pooling_2d_layer_1 == pooling_2d_layer_2 @@ -96,7 +98,9 @@ def test_should_be_equal( ], ) def test_should_not_be_equal( - self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + self, + pooling_2d_layer_1: _Pooling2DLayer, + pooling_2d_layer_2: _Pooling2DLayer, ) -> None: assert pooling_2d_layer_1 != pooling_2d_layer_2 @@ -121,7 +125,9 @@ class TestHash: ], ) def test_hash_should_be_equal( - self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + self, + pooling_2d_layer_1: _Pooling2DLayer, + pooling_2d_layer_2: _Pooling2DLayer, ) -> None: assert hash(pooling_2d_layer_1) == hash(pooling_2d_layer_2) @@ -148,7 +154,9 @@ def test_hash_should_be_equal( ], ) def test_hash_should_not_be_equal( - self, pooling_2d_layer_1: _Pooling2DLayer, pooling_2d_layer_2: _Pooling2DLayer, + self, + pooling_2d_layer_1: _Pooling2DLayer, + pooling_2d_layer_2: _Pooling2DLayer, ) -> None: assert hash(pooling_2d_layer_1) != hash(pooling_2d_layer_2) From 16871cfffe889b2d106f810417d10c937f05cc63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Mon, 6 May 2024 16:40:44 +0200 Subject: [PATCH 40/42] feat: disabled warning from `OneHotEncoder` in `ImageDataset` --- src/safeds/data/labeled/containers/_image_dataset.py | 5 ++++- tests/safeds/data/labeled/containers/test_image_dataset.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 0fc52347c..1a102f569 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -2,6 +2,7 @@ import copy import sys +import warnings from typing import TYPE_CHECKING, Generic, TypeVar from safeds._utils import _structural_hash @@ -335,7 +336,9 @@ def __init__(self, column: Column) -> None: self._column_name = column.name column_as_table = Table.from_columns([column]) - self._one_hot_encoder = OneHotEncoder().fit(column_as_table, [self._column_name]) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=rf"The columns \['{self._column_name}'\] contain numerical data. The OneHotEncoder is designed to encode non-numerical values into numerical values", category=UserWarning) + self._one_hot_encoder = OneHotEncoder().fit(column_as_table, [self._column_name]) self._tensor = torch.Tensor(self._one_hot_encoder.transform(column_as_table)._data.to_numpy(copy=True)).to( torch.get_default_device(), ) diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py index 49b01e1e2..8369acc30 100644 --- a/tests/safeds/data/labeled/containers/test_image_dataset.py +++ b/tests/safeds/data/labeled/containers/test_image_dataset.py @@ -1,5 +1,6 @@ import math import sys +import warnings from typing import TypeVar import pytest @@ -358,3 +359,8 @@ def test_should_raise_from_tensor( def test_eq_should_be_not_implemented(self) -> None: assert _ColumnAsTensor(Column("column", [1])).__eq__(Table()) is NotImplemented + + def test_should_not_warn(self) -> None: + with warnings.catch_warnings(): + warnings.filterwarnings("error") + _ColumnAsTensor(Column("column", [1, 2, 3])) From 0397251d452470171dcc1341fd8d7bb0229b52cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Mon, 6 May 2024 16:47:24 +0200 Subject: [PATCH 41/42] refactor: completed merge --- src/safeds/data/labeled/containers/_image_dataset.py | 2 +- src/safeds/data/tabular/transformation/_one_hot_encoder.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 1a102f569..51cd955bb 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -89,7 +89,7 @@ def __init__(self, input_data: ImageList, output_data: T, batch_size: int = 1, s _output_size = len(_column_as_tensor._one_hot_encoder.get_names_of_added_columns()) _output = _column_as_tensor elif isinstance(output_data, _SingleSizeImageList): - _output = output_data.clone()._as_single_size_image_list() + _output = output_data._clone()._as_single_size_image_list() _output_size = ImageSize(output_data.widths[0], output_data.heights[0], output_data.channel) else: raise ValueError("The given output ImageList contains images of different sizes.") # noqa: TRY004 diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index a4f26e8fe..f9694e6f9 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -224,7 +224,7 @@ def transform(self, table: Table) -> Table: values_not_present_when_fitted.append((value, old_column_name)) for new_column in self._column_names[old_column_name]: - table = table.add_column(Column(new_column, encoded_values[new_column])) + table = table.add_columns([Column(new_column, encoded_values[new_column])]) if len(values_not_present_when_fitted) > 0: raise ValueNotPresentWhenFittedError(values_not_present_when_fitted) From 701091b355664a7668573d4a0eca31f9163b6b00 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Mon, 6 May 2024 14:49:06 +0000 Subject: [PATCH 42/42] style: apply automated linter fixes --- src/safeds/data/labeled/containers/_image_dataset.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/safeds/data/labeled/containers/_image_dataset.py b/src/safeds/data/labeled/containers/_image_dataset.py index 51cd955bb..1b2f72a8c 100644 --- a/src/safeds/data/labeled/containers/_image_dataset.py +++ b/src/safeds/data/labeled/containers/_image_dataset.py @@ -337,7 +337,11 @@ def __init__(self, column: Column) -> None: self._column_name = column.name column_as_table = Table.from_columns([column]) with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message=rf"The columns \['{self._column_name}'\] contain numerical data. The OneHotEncoder is designed to encode non-numerical values into numerical values", category=UserWarning) + warnings.filterwarnings( + "ignore", + message=rf"The columns \['{self._column_name}'\] contain numerical data. The OneHotEncoder is designed to encode non-numerical values into numerical values", + category=UserWarning, + ) self._one_hot_encoder = OneHotEncoder().fit(column_as_table, [self._column_name]) self._tensor = torch.Tensor(self._one_hot_encoder.transform(column_as_table)._data.to_numpy(copy=True)).to( torch.get_default_device(),