Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[transforms] small random resize improvement #1584

Merged
merged 5 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions doctr/transforms/modules/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,18 +220,30 @@ class RandomResize(torch.nn.Module):

>>> import torch
>>> from doctr.transforms import RandomResize
>>> transfo = RandomResize((0.3, 0.9), p=0.5)
>>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
>>> out = transfo(torch.rand((3, 64, 64)))

Args:
----
scale_range: range of the resizing factor for width and height (independently)
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
given a float value, the aspect ratio will be preserved with this probability
symmetric_pad: whether to symmetrically pad the image,
given a float value, the symmetric padding will be applied with this probability
p: probability to apply the transformation
"""

def __init__(self, scale_range: Tuple[float, float] = (0.3, 0.9), p: float = 0.5) -> None:
def __init__(
self,
scale_range: Tuple[float, float] = (0.3, 0.9),
preserve_aspect_ratio: Union[bool, float] = False,
symmetric_pad: Union[bool, float] = False,
p: float = 0.5,
) -> None:
super().__init__()
self.scale_range = scale_range
self.preserve_aspect_ratio = preserve_aspect_ratio
self.symmetric_pad = symmetric_pad
self.p = p
self._resize = Resize

Expand All @@ -241,10 +253,18 @@ def forward(self, img: torch.Tensor, target: np.ndarray) -> Tuple[torch.Tensor,
scale_w = np.random.uniform(*self.scale_range)
new_size = (int(img.shape[-2] * scale_h), int(img.shape[-1] * scale_w))

_img, _target = self._resize(new_size, preserve_aspect_ratio=True, symmetric_pad=True)(img, target)
_img, _target = self._resize(
new_size,
preserve_aspect_ratio=self.preserve_aspect_ratio
if isinstance(self.preserve_aspect_ratio, bool)
else bool(torch.rand(1) <= self.symmetric_pad),
symmetric_pad=self.symmetric_pad
if isinstance(self.symmetric_pad, bool)
else bool(torch.rand(1) <= self.symmetric_pad),
)(img, target)

return _img, _target
return img, target

def extra_repr(self) -> str:
return f"scale_range={self.scale_range}, p={self.p}"
return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501
28 changes: 24 additions & 4 deletions doctr/transforms/modules/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,18 +523,30 @@ class RandomResize(NestedObject):

>>> import tensorflow as tf
>>> from doctr.transforms import RandomResize
>>> transfo = RandomResize((0.3, 0.9), p=0.5)
>>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5)
>>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1))

Args:
----
scale_range: range of the resizing factor for width and height (independently)
preserve_aspect_ratio: whether to preserve the aspect ratio of the image,
given a float value, the aspect ratio will be preserved with this probability
symmetric_pad: whether to symmetrically pad the image,
given a float value, the symmetric padding will be applied with this probability
p: probability to apply the transformation
"""

def __init__(self, scale_range: Tuple[float, float] = (0.3, 0.9), p: float = 0.5) -> None:
def __init__(
self,
scale_range: Tuple[float, float] = (0.3, 0.9),
preserve_aspect_ratio: Union[bool, float] = False,
symmetric_pad: Union[bool, float] = False,
p: float = 0.5,
):
super().__init__()
self.scale_range = scale_range
self.preserve_aspect_ratio = preserve_aspect_ratio
self.symmetric_pad = symmetric_pad
self.p = p
self._resize = Resize

Expand All @@ -544,10 +556,18 @@ def __call__(self, img: tf.Tensor, target: np.ndarray) -> Tuple[tf.Tensor, np.nd
scale_w = random.uniform(*self.scale_range)
new_size = (int(img.shape[-3] * scale_h), int(img.shape[-2] * scale_w))

_img, _target = self._resize(new_size, preserve_aspect_ratio=True, symmetric_pad=True)(img, target)
_img, _target = self._resize(
new_size,
preserve_aspect_ratio=self.preserve_aspect_ratio
if isinstance(self.preserve_aspect_ratio, bool)
else bool(np.random.rand(1) <= self.symmetric_pad),
symmetric_pad=self.symmetric_pad
if isinstance(self.symmetric_pad, bool)
else bool(np.random.rand(1) <= self.symmetric_pad),
)(img, target)

return _img, _target
return img, target

def extra_repr(self) -> str:
return f"scale_range={self.scale_range}, p={self.p}"
return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501
4 changes: 2 additions & 2 deletions references/detection/train_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def main(args):
T.RandomHorizontalFlip(0.15),
T.OneOf([
T.RandomApply(T.RandomCrop(ratio=(0.6, 1.33)), 0.25),
T.RandomResize(scale_range=(0.4, 0.9), p=0.25),
T.RandomResize(scale_range=(0.4, 0.9), preserve_aspect_ratio=0.5, symmetric_pad=0.5, p=0.25),
]),
T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True),
]
Expand All @@ -288,7 +288,7 @@ def main(args):
T.RandomHorizontalFlip(0.15),
T.OneOf([
T.RandomApply(T.RandomCrop(ratio=(0.6, 1.33)), 0.25),
T.RandomResize(scale_range=(0.4, 0.9), p=0.25),
T.RandomResize(scale_range=(0.4, 0.9), preserve_aspect_ratio=0.5, symmetric_pad=0.5, p=0.25),
]),
# Rotation augmentation
T.Resize(args.input_size, preserve_aspect_ratio=True),
Expand Down
4 changes: 2 additions & 2 deletions references/detection/train_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def main(args):
T.RandomHorizontalFlip(0.15),
T.OneOf([
T.RandomApply(T.RandomCrop(ratio=(0.6, 1.33)), 0.25),
T.RandomResize(scale_range=(0.4, 0.9), p=0.25),
T.RandomResize(scale_range=(0.4, 0.9), preserve_aspect_ratio=0.5, symmetric_pad=0.5, p=0.25),
]),
T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True),
]
Expand All @@ -246,7 +246,7 @@ def main(args):
T.RandomHorizontalFlip(0.15),
T.OneOf([
T.RandomApply(T.RandomCrop(ratio=(0.6, 1.33)), 0.25),
T.RandomResize(scale_range=(0.4, 0.9), p=0.25),
T.RandomResize(scale_range=(0.4, 0.9), preserve_aspect_ratio=0.5, symmetric_pad=0.5, p=0.25),
]),
# Rotation augmentation
T.Resize(args.input_size, preserve_aspect_ratio=True),
Expand Down
25 changes: 16 additions & 9 deletions tests/pytorch/test_transforms_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def test_random_crop(target):
# Check the scale
assert img.shape[-1] * img.shape[-2] >= 0.4 * input_t.shape[-1] * input_t.shape[-2]
# Check aspect ratio
assert 0.65 <= img.shape[-2] / img.shape[-1] <= 1.5
assert 0.65 <= img.shape[-2] / img.shape[-1] <= 1.6
# Check the target
assert np.all(target >= 0)
if target.ndim == 2:
Expand Down Expand Up @@ -330,17 +330,24 @@ def test_random_shadow(input_dtype, input_shape):


@pytest.mark.parametrize(
"p,target",
"p,preserve_aspect_ratio,symmetric_pad,target",
[
[1, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[0, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[1, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[0, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[1, True, False, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[0, True, False, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[1, True, False, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[0, True, False, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[1, 0.5, 0.5, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[0, 0.5, 0.5, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
],
)
def test_random_resize(p, target):
transfo = RandomResize(scale_range=(0.3, 1.3), p=p)
assert repr(transfo) == f"RandomResize(scale_range=(0.3, 1.3), p={p})"
def test_random_resize(p, preserve_aspect_ratio, symmetric_pad, target):
transfo = RandomResize(
scale_range=(0.3, 1.3), preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, p=p
)
assert (
repr(transfo)
== f"RandomResize(scale_range=(0.3, 1.3), preserve_aspect_ratio={preserve_aspect_ratio}, symmetric_pad={symmetric_pad}, p={p})" # noqa: E501
)

img = torch.rand((3, 64, 64))
# Apply the transformation
Expand Down
25 changes: 16 additions & 9 deletions tests/tensorflow/test_transforms_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def test_random_crop(target):
# Check the scale (take a margin)
assert img.shape[0] * img.shape[1] >= 0.4 * input_t.shape[0] * input_t.shape[1]
# Check aspect ratio (take a margin)
assert 0.65 <= img.shape[0] / img.shape[1] <= 1.5
assert 0.65 <= img.shape[0] / img.shape[1] <= 1.6
# Check the target
assert np.all(target >= 0)
if target.ndim == 2:
Expand Down Expand Up @@ -471,17 +471,24 @@ def test_random_shadow(input_dtype, input_shape):


@pytest.mark.parametrize(
"p,target",
"p,preserve_aspect_ratio,symmetric_pad,target",
[
[1, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[0, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[1, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[0, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[1, True, False, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[0, True, False, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[1, True, False, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[0, True, False, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)],
[1, 0.5, 0.5, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
[0, 0.5, 0.5, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)],
],
)
def test_random_resize(p, target):
transfo = T.RandomResize(scale_range=(0.3, 1.3), p=p)
assert repr(transfo) == f"RandomResize(scale_range=(0.3, 1.3), p={p})"
def test_random_resize(p, preserve_aspect_ratio, symmetric_pad, target):
transfo = T.RandomResize(
scale_range=(0.3, 1.3), preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, p=p
)
assert (
repr(transfo)
== f"RandomResize(scale_range=(0.3, 1.3), preserve_aspect_ratio={preserve_aspect_ratio}, symmetric_pad={symmetric_pad}, p={p})" # noqa: E501
)

img = tf.random.uniform((64, 64, 3))
# Apply the transformation
Expand Down
Loading