Skip to content

Commit

Permalink
Feature/sg 815 fix override dataset params (#1092)
Browse files Browse the repository at this point in the history
* Fix dataset interpolation issue

* Added makefile to ease running tests locally

* Fix bug in unpacking batch that may have more than two elements

* Fix bug in unpacking batch that may have more than two elements

* Add more coverage for unit tests to try cuda & cpu devices since test was failing for some reason on linux machine, but not on windows

* Exclude crowd in metrics tests since our ref values are excluding them

* Fix test

* Fix test

* Fix test

* Fix test

* Added ensure_is_tuple_of_two to allow override input_dim=512

* Added test case to check whether we can handle transforms

* Fixed case when passing manually instantiated transforms

* Fix type of input_dim in preprocessing params. It is now tuple, not list
  • Loading branch information
BloodAxe authored Jun 1, 2023
1 parent 75c8af6 commit 7907c48
Show file tree
Hide file tree
Showing 10 changed files with 191 additions and 81 deletions.
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
unit_tests:
python -m unittest tests/deci_core_unit_test_suite_runner.py

integration_tests:
python -m unittest tests/deci_core_integration_test_suite_runner.py

yolo_nas_integration_tests:
python -m unittest tests/integration_tests/yolo_nas_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ train_dataset_params:
prob: 0.5 # probability to apply per-sample mixup
flip_prob: 0.5 # probability to apply horizontal flip
- DetectionPaddedRescale:
input_dim: [640, 640]
input_dim: ${dataset_params.train_dataset_params.input_dim}
max_targets: 120
pad_value: 114
- DetectionStandardize:
Expand Down
41 changes: 34 additions & 7 deletions src/super_gradients/training/dataloaders/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import hydra
import numpy as np
import torch
from omegaconf import OmegaConf, UnsupportedValueType
from torch.utils.data import BatchSampler, DataLoader, TensorDataset, RandomSampler

import super_gradients
Expand Down Expand Up @@ -81,14 +82,40 @@ def get_data_loader(config_name: str, dataset_cls: object, train: bool, dataset_
return dataloader


def _process_dataset_params(cfg, dataset_params, train):
default_dataset_params = cfg.train_dataset_params if train else cfg.val_dataset_params
default_dataset_params = hydra.utils.instantiate(default_dataset_params)
for key, val in default_dataset_params.items():
if key not in dataset_params.keys() or dataset_params[key] is None:
dataset_params[key] = val
def _process_dataset_params(cfg, dataset_params, train: bool):
"""
Merge the default dataset config with the user-provided overrides.
This function handles variable interpolation in the dataset config.
:param cfg: Default dataset config
:param dataset_params: User-provided overrides
:param train: boolean flag indicating whether we are processing train or val dataset params
:return: New dataset params (merged defaults and overrides, where overrides take precedence)
"""

return dataset_params
try:
# No, we can't simplify the following lines to:
# >>> default_dataset_params = cfg.train_dataset_params if train else cfg.val_dataset_params
# >>> dataset_params = OmegaConf.merge(default_dataset_params, dataset_params)
# >>> return hydra.utils.instantiate(dataset_params)
# For some reason this breaks interpolation :shrug:

if train:
cfg.train_dataset_params = OmegaConf.merge(cfg.train_dataset_params, dataset_params)
return hydra.utils.instantiate(cfg.train_dataset_params)
else:
cfg.val_dataset_params = OmegaConf.merge(cfg.val_dataset_params, dataset_params)
return hydra.utils.instantiate(cfg.val_dataset_params)

except UnsupportedValueType:
# This is somewhat ugly fallback for the case when the user provides overrides for the dataset params
# that contains non-primitive types (E.g instantiated transforms).
# In this case interpolation is not possible so we just override the default params with the user-provided ones.
default_dataset_params = hydra.utils.instantiate(cfg.train_dataset_params if train else cfg.val_dataset_params)
for key, val in default_dataset_params.items():
if key not in dataset_params.keys() or dataset_params[key] is None:
dataset_params[key] = val
return dataset_params


def _process_dataloader_params(cfg, dataloader_params, dataset, train):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from super_gradients.common.factories.transforms_factory import TransformsFactory
from super_gradients.training.datasets.data_formats.default_formats import XYXY_LABEL
from super_gradients.training.datasets.data_formats.formats import ConcatenatedTensorFormat
from super_gradients.training.utils.utils import ensure_is_tuple_of_two

logger = get_logger(__name__)

Expand Down Expand Up @@ -76,7 +77,7 @@ def __init__(
max_num_samples: int = None,
cache: bool = False,
cache_dir: str = None,
input_dim: Optional[Tuple[int, int]] = None,
input_dim: Union[int, Tuple[int, int], None] = None,
transforms: List[DetectionTransform] = [],
all_classes_list: Optional[List[str]] = [],
class_inclusion_list: Optional[List[str]] = None,
Expand All @@ -89,7 +90,10 @@ def __init__(
"""Detection dataset.
:param data_dir: Where the data is stored
:param input_dim: Image size (when loaded, before transforms).
:param input_dim: Image size (when loaded, before transforms). Can be None, scalar or tuple (rows, cols).
None means that the image will be loaded as is.
Scalar (size) - Image will be resized to (size, size)
Tuple (rows,cols) - Image will be resized to (rows, cols)
:param original_target_format: Format of targets stored on disk. raw data format, the output format might
differ based on transforms.
:param max_num_samples: If not None, set the maximum size of the dataset by only indexing the first n annotations/images.
Expand Down Expand Up @@ -129,7 +133,7 @@ def __init__(
if not isinstance(self.n_available_samples, int) or self.n_available_samples < 1:
raise ValueError(f"_setup_data_source() should return the number of available samples but got {self.n_available_samples}")

self.input_dim = input_dim
self.input_dim = ensure_is_tuple_of_two(input_dim)
self.original_target_format = original_target_format
self.max_num_samples = max_num_samples

Expand Down
38 changes: 24 additions & 14 deletions src/super_gradients/training/transforms/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
_shift_bboxes,
_rescale_xyxy_bboxes,
)
from super_gradients.training.utils.utils import ensure_is_tuple_of_two

IMAGE_RESAMPLE_MODE = Image.BILINEAR
MASK_RESAMPLE_MODE = Image.NEAREST
Expand Down Expand Up @@ -459,10 +460,10 @@ class DetectionMosaic(DetectionTransform):
:param border_value: Value for filling borders after applying transforms.
"""

def __init__(self, input_dim: tuple, prob: float = 1.0, enable_mosaic: bool = True, border_value=114):
def __init__(self, input_dim: Union[int, Tuple[int, int]], prob: float = 1.0, enable_mosaic: bool = True, border_value=114):
super(DetectionMosaic, self).__init__(additional_samples_count=3)
self.prob = prob
self.input_dim = input_dim
self.input_dim = ensure_is_tuple_of_two(input_dim)
self.enable_mosaic = enable_mosaic
self.border_value = border_value

Expand Down Expand Up @@ -566,7 +567,7 @@ def __init__(
translate: Union[tuple, float] = 0.1,
scales: Union[tuple, float] = 0.1,
shear: Union[tuple, float] = 10,
target_size: Optional[Tuple[int, int]] = (640, 640),
target_size: Union[int, Tuple[int, int], None] = (640, 640),
filter_box_candidates: bool = False,
wh_thr: float = 2,
ar_thr: float = 20,
Expand All @@ -578,7 +579,7 @@ def __init__(
self.translate = translate
self.scale = scales
self.shear = shear
self.target_size = target_size
self.target_size = ensure_is_tuple_of_two(target_size)
self.enable = True
self.filter_box_candidates = filter_box_candidates
self.wh_thr = wh_thr
Expand Down Expand Up @@ -624,9 +625,17 @@ class DetectionMixup(DetectionTransform):
:param border_value: Value for filling borders after applying transform.
"""

def __init__(self, input_dim: tuple, mixup_scale: tuple, prob: float = 1.0, enable_mixup: bool = True, flip_prob: float = 0.5, border_value: int = 114):
def __init__(
self,
input_dim: Union[int, Tuple[int, int], None],
mixup_scale: tuple,
prob: float = 1.0,
enable_mixup: bool = True,
flip_prob: float = 0.5,
border_value: int = 114,
):
super(DetectionMixup, self).__init__(additional_samples_count=1, non_empty_targets=True)
self.input_dim = input_dim
self.input_dim = ensure_is_tuple_of_two(input_dim)
self.mixup_scale = mixup_scale
self.prob = prob
self.enable_mixup = enable_mixup
Expand Down Expand Up @@ -736,15 +745,15 @@ class DetectionPadToSize(DetectionTransform):
Note: This transformation assume that dimensions of input image is equal or less than `output_size`.
"""

def __init__(self, output_size: Tuple[int, int], pad_value: int):
def __init__(self, output_size: Union[int, Tuple[int, int], None], pad_value: int):
"""
Constructor for DetectionPadToSize transform.
:param output_size: Output image size (rows, cols)
:param pad_value: Padding value for image
"""
super().__init__()
self.output_size = output_size
self.output_size = ensure_is_tuple_of_two(output_size)
self.pad_value = pad_value

def __call__(self, sample: dict) -> dict:
Expand Down Expand Up @@ -775,9 +784,9 @@ class DetectionPaddedRescale(DetectionTransform):
:param pad_value: Padding value for image.
"""

def __init__(self, input_dim: Tuple, swap: Tuple[int, ...] = (2, 0, 1), max_targets: int = 50, pad_value: int = 114):
def __init__(self, input_dim: Union[int, Tuple[int, int], None], swap: Tuple[int, ...] = (2, 0, 1), max_targets: int = 50, pad_value: int = 114):
self.swap = swap
self.input_dim = input_dim
self.input_dim = ensure_is_tuple_of_two(input_dim)
self.max_targets = max_targets
self.pad_value = pad_value

Expand Down Expand Up @@ -834,14 +843,14 @@ class DetectionRescale(DetectionTransform):
:param output_shape: (rows, cols)
"""

def __init__(self, output_shape: Tuple[int, int]):
def __init__(self, output_shape: Union[int, Tuple[int, int]]):
super().__init__()
self.output_shape = output_shape
self.output_shape = ensure_is_tuple_of_two(output_shape)

def __call__(self, sample: dict) -> dict:
image, targets, crowd_targets = sample["image"], sample["target"], sample.get("crowd_target")

sy, sx = (self.output_shape[0] / image.shape[0], self.output_shape[1] / image.shape[1])
sy, sx = float(self.output_shape[0]) / float(image.shape[0]), float(self.output_shape[1]) / float(image.shape[1])

sample["image"] = _rescale_image(image=image, target_shape=self.output_shape)
sample["target"] = _rescale_bboxes(targets, scale_factors=(sy, sx))
Expand Down Expand Up @@ -1010,7 +1019,7 @@ class DetectionTargetsFormatTransform(DetectionTransform):
@resolve_param("output_format", ConcatenatedTensorFormatFactory())
def __init__(
self,
input_dim: Optional[tuple] = None,
input_dim: Union[int, Tuple[int, int], None] = None,
input_format: ConcatenatedTensorFormat = XYXY_LABEL,
output_format: ConcatenatedTensorFormat = LABEL_CXCYWH,
min_bbox_edge_size: float = 1,
Expand All @@ -1031,6 +1040,7 @@ def __init__(
self.input_dim = None

if input_dim is not None:
input_dim = ensure_is_tuple_of_two(input_dim)
self._setup_input_dim_related_params(input_dim)

def _setup_input_dim_related_params(self, input_dim: tuple):
Expand Down
40 changes: 28 additions & 12 deletions src/super_gradients/training/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
import collections
import math
import os
import tarfile
import random
import re
import math
import tarfile
import time

import inspect
from functools import lru_cache, wraps
from importlib import import_module
from itertools import islice

from pathlib import Path
from typing import Mapping, Optional, Tuple, Union, List, Dict, Any, Iterable
from zipfile import ZipFile
from jsonschema import validate
from itertools import islice

from PIL import Image, ExifTags
import numpy as np
import torch
import torch.nn as nn

# These functions changed from torch 1.2 to torch 1.3

import random
import numpy as np
from importlib import import_module
from PIL import Image, ExifTags
from jsonschema import validate

from super_gradients.common.abstractions.abstract_logger import get_logger

# These functions changed from torch 1.2 to torch 1.3

logger = get_logger(__name__)


Expand Down Expand Up @@ -581,3 +581,19 @@ def generate_batch(iterable: Iterable, batch_size: int) -> Iterable:
yield batch
else:
return


def ensure_is_tuple_of_two(inputs: Union[Any, Iterable[Any], None]) -> Union[Tuple[Any, Any], None]:
"""
Checks input and converts it to a tuple of length two. If input is None returns None.
:param inputs: Input argument, either a number or a tuple of two numbers.
:return: Tuple of two numbers if input is not None, otherwise - None.
"""
if inputs is None:
return None

if isinstance(inputs, collections.Iterable) and not isinstance(inputs, str):
a, b = inputs
return a, b

return inputs, inputs
44 changes: 44 additions & 0 deletions tests/unit_tests/detection_dataset_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import unittest
from pathlib import Path

from super_gradients.training.dataloaders import coco2017_train_yolo_nas
from super_gradients.training.datasets import COCODetectionDataset
from super_gradients.training.datasets.data_formats.default_formats import LABEL_CXCYWH
from super_gradients.training.exceptions.dataset_exceptions import DatasetValidationException, ParameterMismatchException
from super_gradients.training.transforms import DetectionMosaic, DetectionTargetsFormatTransform, DetectionPaddedRescale


class DetectionDatasetTest(unittest.TestCase):
Expand Down Expand Up @@ -44,6 +47,47 @@ def test_coco_dataset_creation_with_subset_classes(self):
with self.assertRaises(ParameterMismatchException):
COCODetectionDataset(**train_dataset_params)

def test_coco_detection_dataset_override_image_size(self):
train_dataset_params = {
"data_dir": self.mini_coco_data_dir,
"input_dim": [512, 512],
}
train_dataloader_params = {"num_workers": 0}
dataloader = coco2017_train_yolo_nas(dataset_params=train_dataset_params, dataloader_params=train_dataloader_params)
batch = next(iter(dataloader))
print(batch[0].shape)
self.assertEqual(batch[0].shape[2], 512)
self.assertEqual(batch[0].shape[3], 512)

def test_coco_detection_dataset_override_image_size_single_scalar(self):
train_dataset_params = {
"data_dir": self.mini_coco_data_dir,
"input_dim": 384,
}
train_dataloader_params = {"num_workers": 0}
dataloader = coco2017_train_yolo_nas(dataset_params=train_dataset_params, dataloader_params=train_dataloader_params)
batch = next(iter(dataloader))
print(batch[0].shape)
self.assertEqual(batch[0].shape[2], 384)
self.assertEqual(batch[0].shape[3], 384)

def test_coco_detection_dataset_override_with_objects(self):
train_dataset_params = {
"data_dir": self.mini_coco_data_dir,
"input_dim": 384,
"transforms": [
DetectionMosaic(input_dim=384),
DetectionPaddedRescale(input_dim=384, max_targets=10),
DetectionTargetsFormatTransform(max_targets=10, output_format=LABEL_CXCYWH),
],
}
train_dataloader_params = {"num_workers": 0}
dataloader = coco2017_train_yolo_nas(dataset_params=train_dataset_params, dataloader_params=train_dataloader_params)
batch = next(iter(dataloader))
print(batch[0].shape)
self.assertEqual(batch[0].shape[2], 384)
self.assertEqual(batch[0].shape[3], 384)


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 7907c48

Please sign in to comment.