Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add ImageDataset and Layer for ConvolutionalNeuralNetworks #645

Merged
merged 49 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
bc33cc0
feat: added `ImageDataset`
Marsmaennchen221 Apr 19, 2024
1dca8f0
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 579-add…
Marsmaennchen221 Apr 19, 2024
312feec
feat: added `Convolutional2DLayer`, `FlattenLayer`, `MaxPooling2DLaye…
Marsmaennchen221 Apr 23, 2024
7b97fb7
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 579-add…
Marsmaennchen221 Apr 23, 2024
7ae5b56
test: fixed one test
Marsmaennchen221 Apr 23, 2024
83a008e
test: added tests for `ImageSize` and `Image.size`
Marsmaennchen221 Apr 23, 2024
cf7bfa4
test: added tests for `ImageList.sizes`
Marsmaennchen221 Apr 24, 2024
a40ed0e
feat: changed `ImageDataset` to have generic output type
Marsmaennchen221 Apr 24, 2024
9137eac
test: corrected test in `ImageList`
Marsmaennchen221 Apr 24, 2024
ac452e4
test: corrected cnn workflow test to be os independent
Marsmaennchen221 Apr 24, 2024
3350ba6
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 579-add…
Marsmaennchen221 Apr 24, 2024
c3bcc20
feat: added `ConvolutionalTranspose2DLayer`
Marsmaennchen221 Apr 24, 2024
8312d99
test: made `TestImageToColumn.test_should_train_and_predict_model` os…
Marsmaennchen221 Apr 24, 2024
031677a
feat: added `Image.__array__` to convert a `Image` to a `numpy.ndarray`
Marsmaennchen221 Apr 28, 2024
c567b55
feat: added checks and errors for invalid CNNs
Marsmaennchen221 Apr 29, 2024
137d658
feat: added equals check to `OneHotEncoder`
Marsmaennchen221 Apr 29, 2024
732b1ce
test: added tests for `Convolutional2DLayer`, `ConvolutionalTranspose…
Marsmaennchen221 Apr 29, 2024
cf497a4
test: added tests for `OneHotEncoder.__eq__`
Marsmaennchen221 Apr 29, 2024
4d07c6a
refactor: ruff linter
Marsmaennchen221 Apr 29, 2024
99e0d54
refactor: mypy linter
Marsmaennchen221 Apr 30, 2024
75cce0a
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 579-add…
Marsmaennchen221 May 1, 2024
a43ecda
refactor: finish merge
Marsmaennchen221 May 1, 2024
e4e5239
refactor: linter
Marsmaennchen221 May 1, 2024
d87bc92
refactor: linter
Marsmaennchen221 May 1, 2024
5bdce23
refactor: linter
Marsmaennchen221 May 1, 2024
032f58f
refactor: mypy linter
Marsmaennchen221 May 1, 2024
4b30a58
refactor: mypy linter
Marsmaennchen221 May 1, 2024
33e8db8
refactor: mypy linter
Marsmaennchen221 May 1, 2024
19f1f24
refactor: mypy linter
Marsmaennchen221 May 1, 2024
5503312
refactor: mypy linter
Marsmaennchen221 May 1, 2024
aedf2be
refactor: mypy linter
Marsmaennchen221 May 1, 2024
2953f36
refactor: mypy linter
Marsmaennchen221 May 1, 2024
0974807
refactor: mypy linter
Marsmaennchen221 May 1, 2024
ff407cd
style: apply automated linter fixes
megalinter-bot May 1, 2024
8b50d3b
refactor: ruff linter
Marsmaennchen221 May 1, 2024
8bd40b1
style: apply automated linter fixes
megalinter-bot May 1, 2024
a1e7415
refactor: codecov
Marsmaennchen221 May 1, 2024
c717cdc
Merge branch '579-add-a-new-imagedataset-class' of https://github.com…
Marsmaennchen221 May 1, 2024
5cf56cd
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 579-add…
Marsmaennchen221 May 2, 2024
a3b9336
refactor: finish merge
Marsmaennchen221 May 3, 2024
de465aa
feat: added and improved various `__hash__`, `__sizeof__` and `__eq__…
Marsmaennchen221 May 3, 2024
3c800ea
refactor: mypy
Marsmaennchen221 May 3, 2024
c8437bb
test: added missing tests
Marsmaennchen221 May 3, 2024
3204c26
style: apply automated linter fixes
megalinter-bot May 3, 2024
b8cfd66
style: apply automated linter fixes
megalinter-bot May 3, 2024
16871cf
feat: disabled warning from `OneHotEncoder` in `ImageDataset`
Marsmaennchen221 May 6, 2024
06e1af9
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 579-add…
Marsmaennchen221 May 6, 2024
0397251
refactor: completed merge
Marsmaennchen221 May 6, 2024
701091b
style: apply automated linter fixes
megalinter-bot May 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/safeds/data/image/containers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@
if TYPE_CHECKING:
from ._image import Image
from ._image_list import ImageList
from ._image_dataset import ImageDataset

apipkg.initpkg(
__name__,
{
"Image": "._image:Image",
"ImageList": "._image_list:ImageList",
"ImageDataset": "._image_dataset:ImageDataset",
},
)

__all__ = [
"Image",
"ImageList",
"ImageDataset",
]
7 changes: 6 additions & 1 deletion src/safeds/data/image/containers/_empty_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from safeds._utils import _structural_hash
from safeds.data.image.containers._image_list import ImageList
from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
from safeds.data.image.utils._image_transformation_error_and_warning_checks import (
from safeds.data.image._utils._image_transformation_error_and_warning_checks import (
_check_add_noise_errors,
_check_adjust_brightness_errors_and_warnings,
_check_adjust_color_balance_errors_and_warnings,
Expand All @@ -17,6 +17,7 @@
_check_resize_errors,
_check_sharpen_errors_and_warnings,
)
from safeds.data.image.typing import ImageSize
from safeds.exceptions import IndexOutOfBoundsError

if TYPE_CHECKING:
Expand Down Expand Up @@ -91,6 +92,10 @@ def heights(self) -> list[int]:
def channel(self) -> int:
return NotImplemented

@property
def sizes(self) -> list[ImageSize]:
return []

@property
def number_of_sizes(self) -> int:
return 0
Expand Down
15 changes: 14 additions & 1 deletion src/safeds/data/image/containers/_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from safeds._config import _get_device
from safeds._utils import _structural_hash
from safeds.data.image.utils._image_transformation_error_and_warning_checks import (
from safeds.data.image._utils._image_transformation_error_and_warning_checks import (
_check_add_noise_errors,
_check_adjust_brightness_errors_and_warnings,
_check_adjust_color_balance_errors_and_warnings,
Expand All @@ -18,6 +18,7 @@
_check_resize_errors,
_check_sharpen_errors_and_warnings,
)
from safeds.data.image.typing import ImageSize
from safeds.exceptions import IllegalFormatError

if TYPE_CHECKING:
Expand Down Expand Up @@ -261,6 +262,18 @@ def channel(self) -> int:
"""
return self._image_tensor.size(dim=0)

@property
def size(self) -> ImageSize:
"""
Get the `ImageSize` of the image.

Returns
-------
image_size:
The size of the image.
"""
return ImageSize(self.width, self.height, self.channel)

@property
def device(self) -> Device:
"""
Expand Down
131 changes: 131 additions & 0 deletions src/safeds/data/image/containers/_image_dataset.py
Marsmaennchen221 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations

import copy
from typing import TYPE_CHECKING, TypeVar, Generic

from safeds._config import _get_device
from safeds.data.image.containers import ImageList
from safeds.data.image.containers._single_size_image_list import _SingleSizeImageList
from safeds.data.image.typing import ImageSize
from safeds.data.tabular.containers import Table
from safeds.exceptions import NonNumericColumnError, OutputLengthMismatchError, IndexOutOfBoundsError

if TYPE_CHECKING:
from torch import Tensor

T = TypeVar("T", Table, ImageList)


class ImageDataset(Generic[T]):

def __init__(self, input_data: ImageList, output_data: T, batch_size=1, shuffle=False) -> None:
import torch

self._shuffle_tensor_indices = torch.LongTensor(list(range(len(input_data))))
self._shuffle_after_epoch = shuffle
self._batch_size = batch_size
self._next_batch_index = 0

if not isinstance(input_data, _SingleSizeImageList):
raise ValueError("The given input ImageList contains images of different sizes.")
else:
self._input_size = ImageSize(input_data.widths[0], input_data.heights[0], input_data.channel)
self._input = input_data
if (isinstance(output_data, Table) and len(input_data) != output_data.number_of_rows) or (isinstance(output_data, ImageList) and len(input_data) != len(output_data)):
raise OutputLengthMismatchError(f"{len(input_data)} != {output_data.number_of_rows if isinstance(output_data, Table) else len(output_data)}")
if isinstance(output_data, Table):
non_numerical_columns = []
wrong_interval_columns = []
for column_name in output_data.column_names:
if not output_data.get_column_type(column_name).is_numeric():
non_numerical_columns.append(column_name)
elif output_data.get_column(column_name).minimum() < 0 or output_data.get_column(column_name).maximum() > 1:
wrong_interval_columns.append(column_name)
if len(non_numerical_columns) > 0:
raise NonNumericColumnError(f"Columns {non_numerical_columns} are not numerical.")
if len(wrong_interval_columns) > 0:
raise ValueError(f"Columns {wrong_interval_columns} have values outside of the interval [0, 1].")
_output = _TableAsTensor(output_data)
elif isinstance(output_data, _SingleSizeImageList):
_output = output_data.clone()._as_single_size_image_list()
else:
raise ValueError("The given output ImageList contains images of different sizes.")
self._output = _output

def __iter__(self) -> ImageDataset:
if self._shuffle_after_epoch:
im_ds = self.shuffle()
else:
im_ds = copy.copy(self)
im_ds._next_batch_index = 0
return im_ds

def __next__(self) -> tuple[Tensor, Tensor]:
if self._next_batch_index * self._batch_size >= len(self._input):
raise StopIteration
self._next_batch_index += 1
return self._get_batch(self._next_batch_index - 1)

def __len__(self) -> int:
return self._input.number_of_images

@property
def input_size(self) -> ImageSize:
return self._input_size

def get_input(self) -> ImageList:
return self._input

def get_output(self) -> T:
output = self._output
if isinstance(output, _TableAsTensor):
return output._to_table()
else:
return output

def _get_batch(self, batch_number: int, batch_size: int | None = None) -> tuple[Tensor, Tensor]:
import torch
from torch import Tensor

if batch_size is None:
batch_size = self._batch_size
if batch_size * batch_number >= len(self._input):
raise IndexOutOfBoundsError(batch_size * batch_number)
max_index = batch_size * (batch_number + 1) if batch_size * (batch_number + 1) < len(self._input) else len(self._input)
input_tensor = self._input._tensor[self._shuffle_tensor_indices[[self._input._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255
output_tensor: Tensor
if isinstance(self._output, _SingleSizeImageList):
output_tensor = self._output._tensor[self._shuffle_tensor_indices[[self._output._indices_to_tensor_positions[index] for index in range(batch_size * batch_number, max_index)]]].to(torch.float32) / 255
else: # _output is instance of _TableAsTensor
output_tensor = self._output._tensor[self._shuffle_tensor_indices[batch_size * batch_number:max_index]]
return input_tensor, output_tensor

def shuffle(self) -> ImageDataset[T]:
import torch
im_dataset: ImageDataset[T] = copy.copy(self)
im_dataset._shuffle_tensor_indices = torch.randperm(len(self))
im_dataset._next_batch_index = 0
return im_dataset


class _TableAsTensor:

def __init__(self, table: Table) -> None:
import torch

self._column_names = table.column_names
self._tensor = torch.Tensor(table._data.to_numpy(copy=True)).to(_get_device())

if not torch.all(self._tensor.sum(dim=1) == torch.ones(self._tensor.size(dim=0))):
raise ValueError("The given table is not correctly one hot encoded as it contains rows that have a sum not equal to 1.")

@staticmethod
def _from_tensor(tensor: Tensor) -> _TableAsTensor:
table_as_tensor = _TableAsTensor.__new__(_TableAsTensor)
table_as_tensor._tensor = tensor
return table_as_tensor

def _to_table(self) -> Table:
table = Table(dict(zip(self._column_names, self._tensor.T.tolist())))
return table

48 changes: 43 additions & 5 deletions src/safeds/data/image/containers/_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import os
from abc import ABCMeta, abstractmethod
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, overload, Literal

from safeds.data.image.containers._image import Image
from safeds.data.image.typing import ImageSize

if TYPE_CHECKING:
from collections.abc import Sequence
Expand Down Expand Up @@ -80,7 +81,23 @@ def from_images(images: list[Image]) -> ImageList:
return _SingleSizeImageList._create_image_list([image._image_tensor for image in images], indices)

@staticmethod
def from_files(path: str | Path | Sequence[str | Path]) -> ImageList:
@overload
def from_files(path: str | Path | Sequence[str | Path]) -> ImageList: ...

@staticmethod
@overload
def from_files(path: str | Path | Sequence[str | Path], return_filenames: Literal[False]) -> ImageList: ...

@staticmethod
@overload
def from_files(path: str | Path | Sequence[str | Path], return_filenames: Literal[True]) -> tuple[ImageList, list[str]]: ...

@staticmethod
@overload
def from_files(path: str | Path | Sequence[str | Path], return_filenames: bool) -> ImageList | tuple[ImageList, list[str]]: ...

@staticmethod
def from_files(path: str | Path | Sequence[str | Path], return_filenames: bool = False) -> ImageList | tuple[ImageList, list[str]]:
"""
Create an ImageList from a directory or a list of files.

Expand All @@ -90,6 +107,8 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList:
----------
path:
the path to the directory or a list of files
return_filenames:
if True the output will be a tuple which contains a list of the filenames in order of the images

Returns
-------
Expand All @@ -102,7 +121,7 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList:
If the directory or one of the files of the path cannot be found
"""
from PIL.Image import open as pil_image_open
from torchvision.transforms.functional import pil_to_tensor
from torchvision.transforms.v2.functional import pil_to_tensor

from safeds.data.image.containers._empty_image_list import _EmptyImageList
from safeds.data.image.containers._multi_size_image_list import _MultiSizeImageList
Expand All @@ -112,6 +131,7 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList:
return _EmptyImageList()

image_tensors = []
file_names = []
fixed_size = True

path_list: list[str | Path]
Expand All @@ -125,6 +145,7 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList:
path_list += sorted([p / name for name in os.listdir(p)])
else:
image_tensors.append(pil_to_tensor(pil_image_open(p)))
file_names.append(str(p))
if fixed_size and (
image_tensors[0].size(dim=2) != image_tensors[-1].size(dim=2)
or image_tensors[0].size(dim=1) != image_tensors[-1].size(dim=1)
Expand All @@ -137,9 +158,14 @@ def from_files(path: str | Path | Sequence[str | Path]) -> ImageList:
indices = list(range(len(image_tensors)))

if fixed_size:
return _SingleSizeImageList._create_image_list(image_tensors, indices)
image_list = _SingleSizeImageList._create_image_list(image_tensors, indices)
else:
image_list = _MultiSizeImageList._create_image_list(image_tensors, indices)

if return_filenames:
return image_list, file_names
else:
return _MultiSizeImageList._create_image_list(image_tensors, indices)
return image_list

@abstractmethod
def clone(self) -> ImageList:
Expand Down Expand Up @@ -300,6 +326,18 @@ def channel(self) -> int:
The channel of all images
"""

@property
@abstractmethod
def sizes(self) -> list[ImageSize]:
"""
Return the sizes of all images

Returns
-------
sizes:
The sizes of all images
"""

@property
@abstractmethod
def number_of_sizes(self) -> int:
Expand Down
12 changes: 11 additions & 1 deletion src/safeds/data/image/containers/_multi_size_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

from safeds._utils import _structural_hash
from safeds.data.image.containers import Image, ImageList
from safeds.data.image.utils._image_transformation_error_and_warning_checks import (
from safeds.data.image._utils._image_transformation_error_and_warning_checks import (
_check_blur_errors_and_warnings,
_check_remove_images_with_size_errors,
)
from safeds.data.image.typing import ImageSize
from safeds.exceptions import (
DuplicateIndexError,
IllegalFormatError,
Expand Down Expand Up @@ -158,6 +159,15 @@ def heights(self) -> list[int]:
def channel(self) -> int:
return next(iter(self._image_list_dict.values())).channel

@property
def sizes(self) -> list[ImageSize]:
sizes = {}
for image_list in self._image_list_dict.values():
indices = image_list._as_single_size_image_list()._tensor_positions_to_indices
for i, index in enumerate(indices):
sizes[index] = image_list.sizes[i]
return [sizes[index] for index in sorted(sizes)]

@property
def number_of_sizes(self) -> int:
return len(self._image_list_dict)
Expand Down
Loading
Loading