Skip to content

Commit

Permalink
Feature/add numpy to predict (#25)
Browse files Browse the repository at this point in the history
Co-authored-by: Eduardo Bocarruido Torres <[email protected]>
Co-authored-by: Sergio Gil Gavela <[email protected]>
  • Loading branch information
3 people authored Aug 4, 2023
1 parent 52b63be commit 3b42233
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 98 deletions.
48 changes: 48 additions & 0 deletions aisee/custom_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,51 @@ def __getitem__(self, idx: int) -> tuple[torch.Tensor, float, str]:
label = np.nan

return img, label, self.data


class DatasetFromNumpy(torch.utils.data.Dataset):
"""
Image Dataset for Numpy data.
This class only works for making predictions.
Return np.nan for label and 'N/A' for path.
Parameters
----------
data : np.ndarray
Image as numpy object.
transform : torchvision.transforms.Compose
"""

def __init__(self, data: np.ndarray, transform: transforms.Compose = None) -> None:
self.data = data
self.transform = transform

def __len__(self) -> int:
"""Return data size."""
return len(self.data)

def __getitem__(self, idx: int) -> tuple[torch.Tensor, float, str]:
"""
Return the image, label and image path.
Parameters
----------
index : int
Index of the element to return.
Returns
-------
result : tuple[torch.Tensor, float, str]
The tuple containes: (image, label, image path)
"""
img = Image.fromarray(self.data[idx])

if self.transform:
img = self.transform(img)

label = np.nan
path = "N/A"

return img, label, path
2 changes: 1 addition & 1 deletion aisee/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from .utils import get_data_split, get_n_classes, get_n_classes_multilabel
from .vision_classifier import VisionClassifier

Loss = TypeVar("LossFunction")
Loss = TypeVar("Loss")
Optimizer = TypeVar("Optimizer")

LOGGER = logging.getLogger(__name__)
Expand Down
30 changes: 30 additions & 0 deletions aisee/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import pandas as pd
from PIL import Image


def check_single_label_data(data):
Expand Down Expand Up @@ -180,3 +181,32 @@ def check_multilabel_df(df):
raise ValueError(
f"Column '{col}' has values less than 0, values must be between 0 and 1.",
)


def numpy_image_from_jpg(
path: str,
rgb: bool = True,
resize: tuple[int, int] = None) -> np.ndarray:
"""
Get numpy array from image path.
Parameters
----------
path : str
Image path
rgb : bool, default=True
Convert the image to RGB, if it is not.
resize: tuple[int, int], default=None
Resize image.
Returns
-------
n : np.ndarray
Numerical representation of an image
"""
image = Image.open(path)
if image.mode != 'RGB' and rgb:
image = image.convert(mode='RGB')
if resize:
image = image.resize(resize)
return np.array(image, "uint8")
25 changes: 19 additions & 6 deletions aisee/vision_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .custom_datasets import (
DatasetFromDataFrame,
DatasetFromFolder,
DatasetFromNumpy,
DatasetFromSingleImage,
)

Expand Down Expand Up @@ -295,7 +296,7 @@ def create_default_transform(self) -> dict[str, transforms.Compose]:

def create_dataloader(
self,
data: Union[pd.Series, pd.DataFrame, str],
data: Union[pd.Series, pd.DataFrame, str, np.ndarray],
num_workers: int = 2,
data_transform: transforms.Compose = None,
batch_size: int = 8,
Expand All @@ -306,8 +307,10 @@ def create_dataloader(
Parameters
----------
data : pandas.DataFrame or str
A DataFrame or a string which contains the training data:
data : pandas.DataFrame, str or numpy.ndarray
Numpy array images only for predict.
A DataFrame, a string, or array which contains the data:
- If it is a dataframe:
- If it is a multiclass problem: the dataframe must contain a `path`
Expand Down Expand Up @@ -385,6 +388,9 @@ class in the problem. The classes that belong to that image will be
├── dog1.jpg
└── dog2.jpg
- If it is a numpy array it must be a numpy representation of images:
np.array(nº images, height, width, channels)
num_workers : int, default=2
Subprocesses to use for data loading.
data_transform : torchvision.transforms.Compose, default=None
Expand All @@ -409,6 +415,10 @@ class in the problem. The classes that belong to that image will be
data_transform,
self.class_to_idx,
)

elif isinstance(data, np.ndarray):
image_dataset = DatasetFromNumpy(data, transform=data_transform)

elif isinstance(data, (str, Path)) and Path(data).exists():
if Path(data).is_file():
image_dataset = DatasetFromSingleImage(data, transform=data_transform)
Expand All @@ -420,7 +430,7 @@ class in the problem. The classes that belong to that image will be
)
else:
raise ValueError(
"Data must be a valid directory path (str), an image path (str) or a Pandas Dataframe.",
"Data must be a valid directory path (str), an image path (str) a Pandas Dataframe or numpy array.",
)

return torch.utils.data.DataLoader(
Expand All @@ -442,8 +452,8 @@ def predict(
Parameters
----------
data : pandas.DataFrame or str
It must be a dataframe or a string:
data : pandas.DataFrame, str or numpy.ndarray
It must be a dataframe a string or numpy.ndarray:
- If it is a dataframe:
- If it is a multiclass problem: the dataframe must contain a `path`
Expand Down Expand Up @@ -508,6 +518,9 @@ class in the problem. The classes that belong to that image will be
├── dog1.jpg
└── dog2.jpg
- If it is a numpy array it must be a numpy representation of images:
np.array(nº images, height, width, channels)
num_workers : int, default=2
Subprocesses to use for data loading.
data_transform : torchvision.transforms.Compose, default=None
Expand Down
129 changes: 38 additions & 91 deletions tests/test_vision_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)

from aisee import VisionClassifier
from aisee.utils import numpy_image_from_jpg

TEST_PATH = Path(__file__).resolve().parent
if not Path(TEST_PATH, "resources").exists():
Expand All @@ -33,6 +34,14 @@
columns=["path", "label", "fold"],
)

NUMPY_IMAGE1 = numpy_image_from_jpg(
f"{TEST_PATH}/resources/images/val/cat/cat3.jpg",
resize=(800, 800))
NUMPY_IMAGE2 = numpy_image_from_jpg(
f"{TEST_PATH}/resources/images/val/dog/dog3.jpg",
resize=(800, 800))
NUMPY_DATA = np.stack([NUMPY_IMAGE1, NUMPY_IMAGE2]*8)

MODEL_TEST = "mobilenetv2_050"
MODEL_TEST_COMPOSITE_CLASSIFIER = "vgg11_bn"

Expand Down Expand Up @@ -77,106 +86,32 @@ def test_vision_classifier_load_model_custom_weights():
assert isinstance(vc, VisionClassifier)


def test_vision_classifier_predict_single_label_series():
@pytest.mark.parametrize("data_type, data", [("pd_series", SINGLE_LABEL_DATAFRAME["path"]),
("pd_df", SINGLE_LABEL_DATAFRAME),
("NUMPY_DATA", NUMPY_DATA),
("path_one_image", f"{TEST_PATH}/resources/images/train/cat/cat1.jpg"),
("path_folder", f"{TEST_PATH}/resources/images/val"),
])
def test_vision_classifier_predict_single_label(data_type, data):
"""Check that VisionClassifier predict single label problem with Pandas Series."""
vc = VisionClassifier(
model_name=MODEL_TEST,
num_classes=2,
task="single_label",
)

predictions = vc.predict(SINGLE_LABEL_DATAFRAME["path"])

assert len(predictions) == len(SINGLE_LABEL_DATAFRAME)

for pred in predictions:
assert all(
key in pred
for key in ["image_path", "probabilities", "prediction", "real_label"]
)

assert isinstance(predictions[0]["image_path"], str)

for key in ["probabilities", "prediction", "real_label"]:
assert isinstance(predictions[0][key], np.ndarray)

assert all(
(predictions[0]["probabilities"] >= 0) & (predictions[0]["probabilities"] <= 1),
)


def test_vision_classifier_predict_single_label_dataframe():
"""Check that VisionClassifier predict single label problem with Pandas DataFrame."""
vc = VisionClassifier(
model_name=MODEL_TEST,
num_classes=2,
task="single_label",
)

predictions = vc.predict(SINGLE_LABEL_DATAFRAME)

assert len(predictions) == len(SINGLE_LABEL_DATAFRAME)

for pred in predictions:
assert all(
key in pred
for key in ["image_path", "probabilities", "prediction", "real_label"]
)

assert isinstance(predictions[0]["image_path"], str)

for key in ["probabilities", "prediction", "real_label"]:
assert isinstance(predictions[0][key], np.ndarray)

assert all(
(predictions[0]["probabilities"] >= 0) & (predictions[0]["probabilities"] <= 1),
)

if data_type == "path_one_image":
data_length = 1
elif data_type == "path_folder":
data_length = 0
for _, _, files in os.walk(data):
data_length += len(files)
else:
data_length = len(data)

def test_vision_classifier_predict_single_label_one_image():
"""Check that VisionClassifier predict single label problem with one image."""
vc = VisionClassifier(
model_name=MODEL_TEST,
num_classes=2,
task="single_label",
)

predictions = vc.predict(f"{TEST_PATH}/resources/images/train/cat/cat1.jpg")

assert len(predictions) == 1

for pred in predictions:
assert all(
key in pred
for key in ["image_path", "probabilities", "prediction", "real_label"]
)
assert isinstance(predictions[0]["image_path"], str)

for key in ["probabilities", "prediction", "real_label"]:
assert isinstance(predictions[0][key], np.ndarray)

assert all(
(predictions[0]["probabilities"] >= 0) & (predictions[0]["probabilities"] <= 1),
)


def test_vision_classifier_predict_single_label_path():
"""Check that VisionClassifier predict single label problem with path to directory."""
vc = VisionClassifier(
model_name=MODEL_TEST,
num_classes=2,
task="single_label",
)

data_dir = f"{TEST_PATH}/resources/images/val"

number_of_images = 0
for _, _, files in os.walk(data_dir):
number_of_images += len(files)

predictions = vc.predict(data_dir)
predictions = vc.predict(data)

assert len(predictions) == number_of_images
assert len(predictions) == data_length

for pred in predictions:
assert all(
Expand Down Expand Up @@ -399,3 +334,15 @@ def test_vision_classifier_missing_columns_error(task, data):

with pytest.raises(ValueError):
vc.predict(data)


@pytest.mark.parametrize("rgb", [False, True])
@pytest.mark.parametrize("resize", [None, (600, 600)])
def test_numpy_image_from_jpg(rgb, resize):
"""Check util function numpy_image_from_jpg."""
img_array = numpy_image_from_jpg(
f"{TEST_PATH}/resources/images/val/cat/cat3.jpg",
rgb=rgb,
resize=resize)

assert isinstance(img_array, np.ndarray)

0 comments on commit 3b42233

Please sign in to comment.