From cd9fc8d22bfe321ebd14d62c0f967694127c16b7 Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Wed, 2 Oct 2024 16:44:17 -0700 Subject: [PATCH 1/7] Empty-Commit From fa34657114412404218b1535db288ab0ae7c2e13 Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Thu, 21 Nov 2024 17:17:27 -0800 Subject: [PATCH 2/7] Fix in parameter check in AdditiveNoise --- albumentations/augmentations/transforms.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/albumentations/augmentations/transforms.py b/albumentations/augmentations/transforms.py index 5cf4ffc97..7c2bfaf25 100644 --- a/albumentations/augmentations/transforms.py +++ b/albumentations/augmentations/transforms.py @@ -5193,26 +5193,26 @@ def validate_ranges(cls, v: list[tuple[float, float]]) -> list[tuple[float, floa class GaussianParams(NoiseParamsBase): noise_type: Literal["gaussian"] = "gaussian" - mean_range: tuple[float, float] = Field(default=(0.0, 0.0), description="Range for mean value", ge=-1, le=1) - std_range: tuple[float, float] = Field(default=(0.1, 0.1), description="Range for standard deviation", ge=0, le=1) + mean_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=-1, max_val=1))] + std_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0, max_val=1))] class LaplaceParams(NoiseParamsBase): noise_type: Literal["laplace"] = "laplace" - mean_range: tuple[float, float] = Field(default=(0.0, 0.0), description="Range for location parameter", ge=-1, le=1) - scale_range: tuple[float, float] = Field(default=(0.1, 0.1), description="Range for scale parameter", ge=0, le=1) + mean_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=-1, max_val=1))] + scale_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0, max_val=1))] class BetaParams(NoiseParamsBase): noise_type: Literal["beta"] = "beta" - alpha_range: tuple[float, float] = Field(default=(2.0, 2.0), description="Range for alpha parameter", gt=0) - beta_range: tuple[float, float] = Field(default=(2.0, 2.0), description="Range for beta parameter", gt=0) - scale_range: tuple[float, float] = Field(default=(1.0, 1.0), description="Range for scale parameter", ge=0, le=1) + alpha_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0))] + beta_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0))] + scale_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0, max_val=1))] class PoissonParams(NoiseParamsBase): noise_type: Literal["poisson"] = "poisson" - lambda_range: tuple[float, float] = Field(default=(1.0, 1.0), description="Range for lambda parameter", gt=0) + lambda_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0))] NoiseParams = Annotated[ From 6f833b90ffcfdcb7839eed5d2a41bbc0c848523d Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Mon, 2 Dec 2024 15:36:46 -0800 Subject: [PATCH 3/7] Refcactoring --- .pre-commit-config.yaml | 4 +- .../augmentations/blur/functional.py | 2 +- .../augmentations/blur/transforms.py | 2 +- .../augmentations/crops/functional.py | 8 +- .../augmentations/crops/transforms.py | 12 +- .../domain_adaptation/functional.py | 4 +- .../domain_adaptation/transforms.py | 2 +- .../augmentations/dropout/functional.py | 8 +- .../augmentations/dropout/xy_masking.py | 2 +- albumentations/augmentations/functional.py | 83 +- .../augmentations/geometric/functional.py | 46 +- .../augmentations/geometric/resize.py | 2 +- .../augmentations/geometric/rotate.py | 2 +- .../augmentations/geometric/transforms.py | 22 +- .../augmentations/spectrogram/transform.py | 4 +- albumentations/augmentations/transforms.py | 1046 +++++++++++++---- albumentations/augmentations/utils.py | 6 +- albumentations/core/bbox_utils.py | 12 +- albumentations/core/composition.py | 10 +- albumentations/core/keypoints_utils.py | 4 +- albumentations/core/serialization.py | 2 +- pyproject.toml | 9 +- 22 files changed, 932 insertions(+), 360 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a09ef5716..9b9fff927 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: files: setup.py - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.7.4 + rev: v0.8.1 hooks: # Run the linter. - id: ruff @@ -73,7 +73,7 @@ repos: - id: codespell additional_dependencies: ["tomli"] - repo: https://github.com/igorshubovych/markdownlint-cli - rev: v0.42.0 + rev: v0.43.0 hooks: - id: markdownlint - repo: https://github.com/tox-dev/pyproject-fmt diff --git a/albumentations/augmentations/blur/functional.py b/albumentations/augmentations/blur/functional.py index aebf55786..d455da316 100644 --- a/albumentations/augmentations/blur/functional.py +++ b/albumentations/augmentations/blur/functional.py @@ -16,7 +16,7 @@ from albumentations.augmentations.geometric.functional import scale from albumentations.core.types import EIGHT, ScaleIntType -__all__ = ["blur", "median_blur", "gaussian_blur", "glass_blur", "defocus", "central_zoom", "zoom_blur"] +__all__ = ["blur", "central_zoom", "defocus", "gaussian_blur", "glass_blur", "median_blur", "zoom_blur"] @preserve_channel_dim diff --git a/albumentations/augmentations/blur/transforms.py b/albumentations/augmentations/blur/transforms.py index 244bf11a2..bf5466b6b 100644 --- a/albumentations/augmentations/blur/transforms.py +++ b/albumentations/augmentations/blur/transforms.py @@ -22,7 +22,7 @@ from . import functional as fblur -__all__ = ["Blur", "MotionBlur", "GaussianBlur", "GlassBlur", "AdvancedBlur", "MedianBlur", "Defocus", "ZoomBlur"] +__all__ = ["AdvancedBlur", "Blur", "Defocus", "GaussianBlur", "GlassBlur", "MedianBlur", "MotionBlur", "ZoomBlur"] HALF = 0.5 diff --git a/albumentations/augmentations/crops/functional.py b/albumentations/augmentations/crops/functional.py index bf1fce1b7..cef20ae1a 100644 --- a/albumentations/augmentations/crops/functional.py +++ b/albumentations/augmentations/crops/functional.py @@ -12,14 +12,14 @@ from albumentations.core.types import ColorType __all__ = [ - "get_crop_coords", - "crop_bboxes_by_coords", - "crop_keypoints_by_coords", - "get_center_crop_coords", "crop", "crop_and_pad", "crop_and_pad_bboxes", "crop_and_pad_keypoints", + "crop_bboxes_by_coords", + "crop_keypoints_by_coords", + "get_center_crop_coords", + "get_crop_coords", ] diff --git a/albumentations/augmentations/crops/transforms.py b/albumentations/augmentations/crops/transforms.py index 95628c12b..982872706 100644 --- a/albumentations/augmentations/crops/transforms.py +++ b/albumentations/augmentations/crops/transforms.py @@ -38,17 +38,17 @@ from . import functional as fcrops __all__ = [ - "RandomCrop", + "BBoxSafeRandomCrop", "CenterCrop", "Crop", + "CropAndPad", "CropNonEmptyMaskIfExists", - "RandomSizedCrop", - "RandomResizedCrop", + "RandomCrop", + "RandomCropFromBorders", "RandomCropNearBBox", + "RandomResizedCrop", "RandomSizedBBoxSafeCrop", - "CropAndPad", - "RandomCropFromBorders", - "BBoxSafeRandomCrop", + "RandomSizedCrop", ] diff --git a/albumentations/augmentations/domain_adaptation/functional.py b/albumentations/augmentations/domain_adaptation/functional.py index a5c2e4a11..5e9e7622a 100644 --- a/albumentations/augmentations/domain_adaptation/functional.py +++ b/albumentations/augmentations/domain_adaptation/functional.py @@ -14,9 +14,9 @@ from albumentations.core.types import MONO_CHANNEL_DIMENSIONS __all__ = [ - "fourier_domain_adaptation", - "apply_histogram", "adapt_pixel_distribution", + "apply_histogram", + "fourier_domain_adaptation", ] diff --git a/albumentations/augmentations/domain_adaptation/transforms.py b/albumentations/augmentations/domain_adaptation/transforms.py index caf3b5e0e..e4a175056 100644 --- a/albumentations/augmentations/domain_adaptation/transforms.py +++ b/albumentations/augmentations/domain_adaptation/transforms.py @@ -21,8 +21,8 @@ from albumentations.core.types import ScaleFloatType __all__ = [ - "HistogramMatching", "FDA", + "HistogramMatching", "PixelDistributionAdaptation", "TemplateTransform", ] diff --git a/albumentations/augmentations/dropout/functional.py b/albumentations/augmentations/dropout/functional.py index 19e35fa00..13a7af69b 100644 --- a/albumentations/augmentations/dropout/functional.py +++ b/albumentations/augmentations/dropout/functional.py @@ -18,13 +18,13 @@ from albumentations.core.types import MONO_CHANNEL_DIMENSIONS, ColorType, DropoutFillValue, InpaintMethod __all__ = [ - "cutout", + "calculate_grid_dimensions", "channel_dropout", - "filter_keypoints_in_holes", - "generate_random_fill", + "cutout", "filter_bboxes_by_holes", - "calculate_grid_dimensions", + "filter_keypoints_in_holes", "generate_grid_holes", + "generate_random_fill", ] diff --git a/albumentations/augmentations/dropout/xy_masking.py b/albumentations/augmentations/dropout/xy_masking.py index 39a036982..90071b4b1 100644 --- a/albumentations/augmentations/dropout/xy_masking.py +++ b/albumentations/augmentations/dropout/xy_masking.py @@ -154,7 +154,7 @@ def generate_masks( max_length: tuple[int, int] | None, axis: str, ) -> list[tuple[int, int, int, int]]: - if max_length is None or max_length == 0 or isinstance(num_masks, (int, float)) and num_masks == 0: + if max_length is None or max_length == 0 or (isinstance(num_masks, (int, float)) and num_masks == 0): return [] masks = [] diff --git a/albumentations/augmentations/functional.py b/albumentations/augmentations/functional.py index f084ba8c5..329aec769 100644 --- a/albumentations/augmentations/functional.py +++ b/albumentations/augmentations/functional.py @@ -8,6 +8,7 @@ import numpy as np from albucore import ( MAX_VALUES_BY_DTYPE, + add, add_array, add_constant, add_weighted, @@ -49,9 +50,9 @@ __all__ = [ "add_fog", + "add_gravel", "add_rain", "add_shadow", - "add_gravel", "add_snow_bleach", "add_snow_texture", "add_sun_flare_overlay", @@ -61,10 +62,13 @@ "adjust_hue_torchvision", "adjust_saturation_torchvision", "channel_shuffle", + "chromatic_aberration", "clahe", "convolve", + "dilate", "downscale", "equalize", + "erode", "fancy_pca", "gamma_transform", "image_compression", @@ -79,9 +83,6 @@ "superpixels", "to_gray", "unsharp_mask", - "chromatic_aberration", - "erode", - "dilate", ] @@ -1828,7 +1829,7 @@ def planckian_jitter(img: np.ndarray, temperature: int, mode: Literal["blackbody @clipped def add_noise(img: np.ndarray, noise: np.ndarray) -> np.ndarray: - return add_array(img, noise, inplace=False) + return add(img, noise, inplace=False) def slic(image: np.ndarray, n_segments: int, compactness: float = 10.0, max_iterations: int = 10) -> np.ndarray: @@ -1983,7 +1984,7 @@ def get_safe_brightness_contrast_params( def generate_noise( - noise_type: Literal["uniform", "gaussian", "laplace", "beta", "poisson"], + noise_type: Literal["uniform", "gaussian", "laplace", "beta"], spatial_mode: Literal["constant", "per_pixel", "shared"], shape: tuple[int, ...], params: dict[str, Any] | None, @@ -2019,7 +2020,7 @@ def generate_noise( def generate_constant_noise( - noise_type: Literal["uniform", "gaussian", "laplace", "beta", "poisson"], + noise_type: Literal["uniform", "gaussian", "laplace", "beta"], shape: tuple[int, ...], params: dict[str, Any], max_value: float, @@ -2031,7 +2032,7 @@ def generate_constant_noise( def generate_per_pixel_noise( - noise_type: Literal["uniform", "gaussian", "laplace", "beta", "poisson"], + noise_type: Literal["uniform", "gaussian", "laplace", "beta"], shape: tuple[int, ...], params: dict[str, Any], max_value: float, @@ -2042,7 +2043,7 @@ def generate_per_pixel_noise( def sample_noise( - noise_type: Literal["uniform", "gaussian", "laplace", "beta", "poisson"], + noise_type: Literal["uniform", "gaussian", "laplace", "beta"], size: tuple[int, ...], params: dict[str, Any], max_value: float, @@ -2057,21 +2058,40 @@ def sample_noise( return sample_laplace(size, params, random_generator) * max_value if noise_type == "beta": return sample_beta(size, params, random_generator) * max_value - if noise_type == "poisson": - return sample_poisson(size, params, random_generator, max_value) raise ValueError(f"Unknown noise type: {noise_type}") -def sample_uniform(size: tuple[int, ...], params: dict[str, Any], random_generator: np.random.Generator) -> np.ndarray: - """Sample from uniform distribution.""" +def sample_uniform( + size: tuple[int, ...], + params: dict[str, Any], + random_generator: np.random.Generator, +) -> np.ndarray | float: + """Sample from uniform distribution. + + Args: + size: Output shape. If length is 1, generates constant noise per channel. + params: Must contain 'ranges' key with list of (min, max) tuples. + If only one range is provided, it will be used for all channels. + random_generator: NumPy random generator instance + + Returns: + Noise array of specified size. For single-channel constant mode, + returns scalar instead of array with shape (1,). + """ if len(size) == 1: # constant mode - if len(params["ranges"]) < size[0]: - raise ValueError(f"Not enough ranges provided. Expected {size[0]}, got {len(params['ranges'])}") - return np.array([random_generator.uniform(low, high) for low, high in params["ranges"][: size[0]]]) + ranges = params["ranges"] + num_channels = size[0] + + if len(ranges) == 1: + ranges = ranges * num_channels + elif len(ranges) < num_channels: + raise ValueError(f"Not enough ranges provided. Expected {num_channels}, got {len(ranges)}") + + return np.array([random_generator.uniform(low, high) for low, high in ranges[:num_channels]]) # use first range for spatial noise - low, high = params["ranges"][0] # use first range for spatial noise + low, high = params["ranges"][0] return random_generator.uniform(low, high, size=size) @@ -2108,35 +2128,8 @@ def sample_beta(size: tuple[int, ...], params: dict[str, Any], random_generator: return (2 * samples - 1) * scale -def sample_poisson( - size: tuple[int, ...], - params: dict[str, Any], - random_generator: np.random.Generator, - max_value: float, -) -> np.ndarray: - """Sample from Poisson distribution. - - For uint8 images (max_value=255), lambda is scaled accordingly as Poisson noise - is intensity-dependent. - """ - lam = random_generator.uniform(*params["lambda_range"]) - - # Scale lambda based on max_value as Poisson noise is intensity-dependent - scaled_lam = lam * max_value - - # Generate Poisson samples - samples = random_generator.poisson(lam=scaled_lam, size=size) - - # Center around 0 and normalize by standard deviation - # For Poisson, variance = lambda - noise = (samples - scaled_lam) / np.sqrt(scaled_lam) - - # Scale to match max_value range - return np.clip(noise * max_value, -max_value, max_value) - - def generate_shared_noise( - noise_type: Literal["uniform", "gaussian", "laplace", "beta", "poisson"], + noise_type: Literal["uniform", "gaussian", "laplace", "beta"], shape: tuple[int, ...], params: dict[str, Any], max_value: float, diff --git a/albumentations/augmentations/geometric/functional.py b/albumentations/augmentations/geometric/functional.py index 19985dac9..5c860ca06 100644 --- a/albumentations/augmentations/geometric/functional.py +++ b/albumentations/augmentations/geometric/functional.py @@ -24,37 +24,37 @@ ) __all__ = [ - "remap", - "remap_keypoints", - "remap_bboxes", - "pad", - "pad_with_params", - "resize", - "scale", "_func_max_size", - "longest_max_size", - "smallest_max_size", - "perspective", - "rotation2d_matrix_to_euler_angles", - "is_identity_matrix", - "warp_affine", - "to_distance_maps", - "from_distance_maps", - "transpose", - "d4", "bboxes_d4", - "keypoints_d4", + "bboxes_hflip", "bboxes_rot90", - "keypoints_rot90", "bboxes_transpose", - "keypoints_transpose", "bboxes_vflip", - "keypoints_vflip", - "bboxes_hflip", - "keypoints_hflip", "center", "center_bbox", + "d4", + "from_distance_maps", "generate_grid", + "is_identity_matrix", + "keypoints_d4", + "keypoints_hflip", + "keypoints_rot90", + "keypoints_transpose", + "keypoints_vflip", + "longest_max_size", + "pad", + "pad_with_params", + "perspective", + "remap", + "remap_bboxes", + "remap_keypoints", + "resize", + "rotation2d_matrix_to_euler_angles", + "scale", + "smallest_max_size", + "to_distance_maps", + "transpose", + "warp_affine", ] PAIR = 2 diff --git a/albumentations/augmentations/geometric/resize.py b/albumentations/augmentations/geometric/resize.py index a6cefdbbe..25ed3927c 100644 --- a/albumentations/augmentations/geometric/resize.py +++ b/albumentations/augmentations/geometric/resize.py @@ -14,7 +14,7 @@ from . import functional as fgeometric -__all__ = ["RandomScale", "LongestMaxSize", "SmallestMaxSize", "Resize"] +__all__ = ["LongestMaxSize", "RandomScale", "Resize", "SmallestMaxSize"] class RandomScale(DualTransform): diff --git a/albumentations/augmentations/geometric/rotate.py b/albumentations/augmentations/geometric/rotate.py index 06a9230f4..17a207b57 100644 --- a/albumentations/augmentations/geometric/rotate.py +++ b/albumentations/augmentations/geometric/rotate.py @@ -20,7 +20,7 @@ from . import functional as fgeometric -__all__ = ["Rotate", "RandomRotate90", "SafeRotate", "RotateAndProject"] +__all__ = ["RandomRotate90", "Rotate", "RotateAndProject", "SafeRotate"] SMALL_NUMBER = 1e-10 diff --git a/albumentations/augmentations/geometric/transforms.py b/albumentations/augmentations/geometric/transforms.py index ab79e6373..f776d1bcf 100644 --- a/albumentations/augmentations/geometric/transforms.py +++ b/albumentations/augmentations/geometric/transforms.py @@ -38,23 +38,23 @@ from . import functional as fgeometric __all__ = [ - "ShiftScaleRotate", - "ElasticTransform", - "Perspective", + "D4", "Affine", - "PiecewiseAffine", - "VerticalFlip", - "HorizontalFlip", + "ElasticTransform", "Flip", - "Transpose", - "OpticalDistortion", "GridDistortion", - "PadIfNeeded", - "D4", "GridElasticDeform", - "RandomGridShuffle", + "HorizontalFlip", + "OpticalDistortion", "Pad", + "PadIfNeeded", + "Perspective", + "PiecewiseAffine", + "RandomGridShuffle", + "ShiftScaleRotate", "ThinPlateSpline", + "Transpose", + "VerticalFlip", ] NUM_PADS_XY = 2 diff --git a/albumentations/augmentations/spectrogram/transform.py b/albumentations/augmentations/spectrogram/transform.py index 1c54e95f0..ba18b9340 100644 --- a/albumentations/augmentations/spectrogram/transform.py +++ b/albumentations/augmentations/spectrogram/transform.py @@ -10,9 +10,9 @@ from albumentations.core.types import Targets __all__ = [ - "TimeReverse", - "TimeMasking", "FrequencyMasking", + "TimeMasking", + "TimeReverse", ] diff --git a/albumentations/augmentations/transforms.py b/albumentations/augmentations/transforms.py index 7c2bfaf25..128881193 100644 --- a/albumentations/augmentations/transforms.py +++ b/albumentations/augmentations/transforms.py @@ -24,7 +24,15 @@ normalize_per_image, to_float, ) -from pydantic import AfterValidator, BaseModel, ConfigDict, Field, ValidationInfo, field_validator, model_validator +from pydantic import ( + AfterValidator, + BaseModel, + ConfigDict, + Field, + ValidationInfo, + field_validator, + model_validator, +) from scipy import special from scipy.ndimage import gaussian_filter from typing_extensions import Literal, Self, TypedDict @@ -34,7 +42,11 @@ from albumentations.augmentations.blur import functional as fblur from albumentations.augmentations.blur.transforms import BlurInitSchema from albumentations.augmentations.utils import check_range, non_rgb_error -from albumentations.core.bbox_utils import BboxProcessor, denormalize_bboxes, normalize_bboxes +from albumentations.core.bbox_utils import ( + BboxProcessor, + denormalize_bboxes, + normalize_bboxes, +) from albumentations.core.keypoints_utils import KeypointsProcessor from albumentations.core.pydantic import ( InterpolationType, @@ -77,54 +89,54 @@ from . import functional as fmain __all__ = [ - "Normalize", - "RandomGamma", - "HueSaturationValue", - "RGBShift", - "GaussNoise", "CLAHE", + "AdditiveNoise", + "AutoContrast", "ChannelShuffle", - "InvertImg", - "ToGray", - "ToRGB", - "ToSepia", - "ImageCompression", - "ToFloat", + "ChromaticAberration", + "ColorJitter", + "Downscale", + "Emboss", + "Equalize", + "FancyPCA", "FromFloat", + "GaussNoise", + "HueSaturationValue", + "ISONoise", + "Illumination", + "ImageCompression", + "InvertImg", + "Lambda", + "Morphological", + "MultiplicativeNoise", + "Normalize", + "PixelDropout", + "PlanckianJitter", + "PlasmaBrightnessContrast", + "PlasmaShadow", + "Posterize", + "RGBShift", "RandomBrightnessContrast", - "RandomSnow", + "RandomFog", + "RandomGamma", "RandomGravel", "RandomRain", - "RandomFog", - "RandomSunFlare", "RandomShadow", + "RandomSnow", + "RandomSunFlare", "RandomToneCurve", - "Lambda", - "ISONoise", - "Solarize", - "Equalize", - "Posterize", - "Downscale", - "MultiplicativeNoise", - "FancyPCA", - "ColorJitter", + "RingingOvershoot", + "SaltAndPepper", "Sharpen", - "Emboss", + "ShotNoise", + "Solarize", + "Spatter", "Superpixels", - "RingingOvershoot", + "ToFloat", + "ToGray", + "ToRGB", + "ToSepia", "UnsharpMask", - "PixelDropout", - "Spatter", - "ChromaticAberration", - "Morphological", - "PlanckianJitter", - "ShotNoise", - "AdditiveNoise", - "SaltAndPepper", - "PlasmaBrightnessContrast", - "PlasmaShadow", - "Illumination", - "AutoContrast", ] NUM_BITS_ARRAY_LENGTH = 3 @@ -215,10 +227,11 @@ def validate_normalization(self) -> Self: if ( self.mean is None or self.std is None - or self.max_pixel_value is None - and self.normalization == "standard" + or (self.max_pixel_value is None and self.normalization == "standard") ): - raise ValueError("mean, std, and max_pixel_value must be provided for standard normalization.") + raise ValueError( + "mean, std, and max_pixel_value must be provided for standard normalization.", + ) return self def __init__( @@ -226,7 +239,13 @@ def __init__( mean: ColorType | None = (0.485, 0.456, 0.406), std: ColorType | None = (0.229, 0.224, 0.225), max_pixel_value: float | None = 255.0, - normalization: Literal["standard", "image", "image_per_channel", "min_max", "min_max_per_channel"] = "standard", + normalization: Literal[ + "standard", + "image", + "image_per_channel", + "min_max", + "min_max_per_channel", + ] = "standard", always_apply: bool | None = None, p: float = 1.0, ): @@ -234,7 +253,9 @@ def __init__( self.mean = mean self.mean_np = np.array(mean, dtype=np.float32) * max_pixel_value self.std = std - self.denominator = np.reciprocal(np.array(std, dtype=np.float32) * max_pixel_value) + self.denominator = np.reciprocal( + np.array(std, dtype=np.float32) * max_pixel_value, + ) self.max_pixel_value = max_pixel_value self.normalization = normalization @@ -306,7 +327,11 @@ class ImageCompression(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - quality_range: Annotated[tuple[int, int], AfterValidator(check_1plus), AfterValidator(nondecreasing)] + quality_range: Annotated[ + tuple[int, int], + AfterValidator(check_1plus), + AfterValidator(nondecreasing), + ] quality_lower: int | None = Field( ge=1, @@ -345,7 +370,9 @@ def validate_ranges(self) -> Self: # Validate the quality_range if not (1 <= self.quality_range[0] <= MAX_JPEG_QUALITY and 1 <= self.quality_range[1] <= MAX_JPEG_QUALITY): - raise ValueError(f"Quality range values should be within [1, {MAX_JPEG_QUALITY}] range.") + raise ValueError( + f"Quality range values should be within [1, {MAX_JPEG_QUALITY}] range.", + ) return self @@ -362,7 +389,13 @@ def __init__( self.quality_range = quality_range self.compression_type = compression_type - def apply(self, img: np.ndarray, quality: int, image_type: Literal[".jpg", ".webp"], **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + quality: int, + image_type: Literal[".jpg", ".webp"], + **params: Any, + ) -> np.ndarray: return fmain.image_compression(img, quality, image_type) def get_params(self) -> dict[str, int | str]: @@ -458,7 +491,11 @@ class RandomSnow(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - snow_point_range: Annotated[tuple[float, float], AfterValidator(check_01), AfterValidator(nondecreasing)] + snow_point_range: Annotated[ + tuple[float, float], + AfterValidator(check_01), + AfterValidator(nondecreasing), + ] snow_point_lower: float | None = Field( gt=0, @@ -496,7 +533,9 @@ def validate_ranges(self) -> Self: # Validate the snow_point_range if not (0 < self.snow_point_range[0] <= self.snow_point_range[1] < 1): - raise ValueError("snow_point_range values should be increasing within (0, 1) range.") + raise ValueError( + "snow_point_range values should be increasing within (0, 1) range.", + ) return self @@ -529,7 +568,13 @@ def apply( if self.method == "bleach": return fmain.add_snow_bleach(img, snow_point, self.brightness_coeff) if self.method == "texture": - return fmain.add_snow_texture(img, snow_point, self.brightness_coeff, snow_texture, sparkle_mask) + return fmain.add_snow_texture( + img, + snow_point, + self.brightness_coeff, + snow_texture, + sparkle_mask, + ) raise ValueError(f"Unknown snow method: {self.method}") @@ -653,7 +698,10 @@ def __init__( self.gravel_roi = gravel_roi self.number_of_patches = number_of_patches - def generate_gravel_patch(self, rectangular_roi: tuple[int, int, int, int]) -> np.ndarray: + def generate_gravel_patch( + self, + rectangular_roi: tuple[int, int, int, int], + ) -> np.ndarray: x_min, y_min, x_max, y_max = rectangular_roi area = abs((x_max - x_min) * (y_max - y_min)) count = area // 10 @@ -662,10 +710,19 @@ def generate_gravel_patch(self, rectangular_roi: tuple[int, int, int, int]) -> n gravels[:, 1] = self.random_generator.integers(y_min, y_max, count) return gravels - def apply(self, img: np.ndarray, gravels_infos: list[Any], **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + gravels_infos: list[Any], + **params: Any, + ) -> np.ndarray: return fmain.add_gravel(img, gravels_infos) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, np.ndarray]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, np.ndarray]: height, width = params["shape"][:2] # Calculate ROI in pixels @@ -871,7 +928,11 @@ def apply( rain_drops, ) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: slant = int(self.py_random.uniform(*self.slant_range)) height, width = params["shape"][:2] @@ -985,16 +1046,28 @@ class InitSchema(BaseTransformInitSchema): ge=0, le=1, ) - fog_coef_range: Annotated[tuple[float, float], AfterValidator(check_01), AfterValidator(nondecreasing)] + fog_coef_range: Annotated[ + tuple[float, float], + AfterValidator(check_01), + AfterValidator(nondecreasing), + ] alpha_coef: float = Field(ge=0, le=1) @model_validator(mode="after") def validate_fog_coefficients(self) -> Self: if self.fog_coef_lower is not None: - warn("`fog_coef_lower` is deprecated, use `fog_coef_range` instead.", DeprecationWarning, stacklevel=2) + warn( + "`fog_coef_lower` is deprecated, use `fog_coef_range` instead.", + DeprecationWarning, + stacklevel=2, + ) if self.fog_coef_upper is not None: - warn("`fog_coef_upper` is deprecated, use `fog_coef_range` instead.", DeprecationWarning, stacklevel=2) + warn( + "`fog_coef_upper` is deprecated, use `fog_coef_range` instead.", + DeprecationWarning, + stacklevel=2, + ) lower = self.fog_coef_lower if self.fog_coef_lower is not None else self.fog_coef_range[0] upper = self.fog_coef_upper if self.fog_coef_upper is not None else self.fog_coef_range[1] @@ -1034,7 +1107,11 @@ def apply( radiuses, ) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: # Select a random fog intensity within the specified range intensity = self.py_random.uniform(*self.fog_coef_range) @@ -1063,12 +1140,20 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A while current_width > fog_region_size and current_height > fog_region_size and iteration < max_iterations: # Calculate the number of particles for this region area = current_width * current_height - particles_in_region = int(area / (fog_region_size * fog_region_size) * intensity * 10) + particles_in_region = int( + area / (fog_region_size * fog_region_size) * intensity * 10, + ) for _ in range(particles_in_region): # Generate random positions within the current region - x = self.py_random.randint(center_x - current_width // 2, center_x + current_width // 2) - y = self.py_random.randint(center_y - current_height // 2, center_y + current_height // 2) + x = self.py_random.randint( + center_x - current_width // 2, + center_x + current_width // 2, + ) + y = self.py_random.randint( + center_y - current_height // 2, + center_y + current_height // 2, + ) particle_positions.append((x, y)) # Shrink the region for the next iteration @@ -1228,7 +1313,11 @@ class InitSchema(BaseTransformInitSchema): src_radius: int = Field(gt=1) src_color: tuple[int, ...] - angle_range: Annotated[tuple[float, float], AfterValidator(check_01), AfterValidator(nondecreasing)] + angle_range: Annotated[ + tuple[float, float], + AfterValidator(check_01), + AfterValidator(nondecreasing), + ] num_flare_circles_range: Annotated[ tuple[int, int], @@ -1239,7 +1328,12 @@ class InitSchema(BaseTransformInitSchema): @model_validator(mode="after") def validate_parameters(self) -> Self: - flare_center_lower_x, flare_center_lower_y, flare_center_upper_x, flare_center_upper_y = self.flare_roi + ( + flare_center_lower_x, + flare_center_lower_y, + flare_center_upper_x, + flare_center_upper_y, + ) = self.flare_roi if ( not 0 <= flare_center_lower_x < flare_center_upper_x <= 1 or not 0 <= flare_center_lower_y < flare_center_upper_y <= 1 @@ -1344,7 +1438,11 @@ def apply( raise ValueError(f"Invalid method: {self.method}") - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: height, width = params["shape"][:2] diagonal = math.sqrt(height**2 + width**2) @@ -1363,7 +1461,10 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A color_range = int(max(self.src_color) * 0.2) # 20% of max color value def line(t: float) -> tuple[float, float]: - return (flare_center_x + t * math.cos(angle), flare_center_y + t * math.sin(angle)) + return ( + flare_center_x + t * math.cos(angle), + flare_center_y + t * math.sin(angle), + ) # Generate points along the flare line t_range = range(-flare_center_x, width - flare_center_x, step_size) @@ -1478,7 +1579,11 @@ class RandomShadow(ImageOnlyTransform): class InitSchema(BaseTransformInitSchema): shadow_roi: tuple[float, float, float, float] - num_shadows_limit: Annotated[tuple[int, int], AfterValidator(check_1plus), AfterValidator(nondecreasing)] + num_shadows_limit: Annotated[ + tuple[int, int], + AfterValidator(check_1plus), + AfterValidator(nondecreasing), + ] num_shadows_lower: int | None num_shadows_upper: int | None shadow_dimension: int = Field(ge=3) @@ -1535,7 +1640,9 @@ def validate_shadows(self) -> Self: f"Got: {self.shadow_intensity_range}", ) else: - raise TypeError("shadow_intensity_range should be an float or a tuple of floats.") + raise TypeError( + "shadow_intensity_range should be an float or a tuple of floats.", + ) return self @@ -1566,7 +1673,11 @@ def apply( ) -> np.ndarray: return fmain.add_shadow(img, vertices_list, intensities) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, list[np.ndarray]]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, list[np.ndarray]]: height, width = params["shape"][:2] num_shadows = self.py_random.randint(*self.num_shadows_limit) @@ -1581,8 +1692,16 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A vertices_list = [ np.stack( [ - self.random_generator.integers(x_min, x_max, size=self.shadow_dimension), - self.random_generator.integers(y_min, y_max, size=self.shadow_dimension), + self.random_generator.integers( + x_min, + x_max, + size=self.shadow_dimension, + ), + self.random_generator.integers( + y_min, + y_max, + size=self.shadow_dimension, + ), ], axis=1, ) @@ -1693,15 +1812,35 @@ def apply( ) -> np.ndarray: return fmain.move_tone_curve(img, low_y, high_y) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: image = data["image"] if "image" in data else data["images"][0] num_channels = get_num_channels(image) if self.per_channel and num_channels != 1: return { - "low_y": np.clip(self.random_generator.normal(loc=0.25, scale=self.scale, size=(num_channels,)), 0, 1), - "high_y": np.clip(self.random_generator.normal(loc=0.75, scale=self.scale, size=(num_channels,)), 0, 1), + "low_y": np.clip( + self.random_generator.normal( + loc=0.25, + scale=self.scale, + size=(num_channels,), + ), + 0, + 1, + ), + "high_y": np.clip( + self.random_generator.normal( + loc=0.75, + scale=self.scale, + size=(num_channels,), + ), + 0, + 1, + ), } # Same values for all channels low_y = np.clip(self.random_generator.normal(loc=0.25, scale=self.scale), 0, 1) @@ -1885,7 +2024,11 @@ class InitSchema(BaseTransformInitSchema): default=None, deprecated="threshold parameter is deprecated. Use threshold_range instead.", ) - threshold_range: Annotated[tuple[float, float], AfterValidator(check_01), AfterValidator(nondecreasing)] + threshold_range: Annotated[ + tuple[float, float], + AfterValidator(check_01), + AfterValidator(nondecreasing), + ] @staticmethod def normalize_threshold( @@ -1900,7 +2043,10 @@ def normalize_threshold( @model_validator(mode="after") def process_threshold(self) -> Self: - self.threshold_range = self.normalize_threshold(self.threshold, self.threshold_range) + self.threshold_range = self.normalize_threshold( + self.threshold, + self.threshold_range, + ) return self def __init__( @@ -2000,18 +2146,21 @@ class InitSchema(BaseTransformInitSchema): @field_validator("num_bits") @classmethod - def validate_num_bits(cls, num_bits: Any) -> tuple[int, int] | list[tuple[int, int]]: + def validate_num_bits( + cls, + num_bits: Any, + ) -> tuple[int, int] | list[tuple[int, int]]: if isinstance(num_bits, int): return to_tuple(num_bits, num_bits) - if isinstance(num_bits, Sequence) and len(num_bits) == NUM_BITS_ARRAY_LENGTH: + if isinstance(num_bits, Sequence): return [to_tuple(i, 0) for i in num_bits] return cast(tuple[int, int], to_tuple(num_bits, 0)) def __init__( self, num_bits: int | tuple[int, int] | list[tuple[int, int]] = 4, - always_apply: bool | None = None, p: float = 0.5, + always_apply: bool | None = None, ): super().__init__(p=p, always_apply=always_apply) self.num_bits = cast(Union[tuple[int, ...], list[tuple[int, ...]]], num_bits) @@ -2020,10 +2169,15 @@ def apply(self, img: np.ndarray, num_bits: int, **params: Any) -> np.ndarray: return fmain.posterize(img, num_bits) def get_params(self) -> dict[str, Any]: - if len(self.num_bits) == NUM_BITS_ARRAY_LENGTH: - return {"num_bits": [self.py_random.randint(int(i[0]), int(i[1])) for i in self.num_bits]} # type: ignore[index] - num_bits = self.num_bits - return {"num_bits": self.py_random.randint(int(num_bits[0]), int(num_bits[1]))} # type: ignore[arg-type] + if isinstance(self.num_bits, list): + num_bits = [self.py_random.randint(int(i[0]), int(i[1])) for i in self.num_bits] + return {"num_bits": num_bits} + return { + "num_bits": self.py_random.randint( + int(self.num_bits[0]), + int(self.num_bits[1]), + ), + } def get_transform_init_args_names(self) -> tuple[str, ...]: return ("num_bits",) @@ -2129,16 +2283,27 @@ def __init__( self.mask_params = mask_params def apply(self, img: np.ndarray, mask: np.ndarray, **params: Any) -> np.ndarray: - return fmain.equalize(img, mode=self.mode, by_channels=self.by_channels, mask=mask) + return fmain.equalize( + img, + mode=self.mode, + by_channels=self.by_channels, + mask=mask, + ) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: if not callable(self.mask): return {"mask": self.mask} mask_params = {"image": data["image"]} for key in self.mask_params: if key not in data: - raise KeyError(f"Required parameter '{key}' for mask function is missing in data.") + raise KeyError( + f"Required parameter '{key}' for mask function is missing in data.", + ) mask_params[key] = data[key] return {"mask": self.mask(**mask_params)} @@ -2263,10 +2428,20 @@ def __init__( self.brightness_by_max = brightness_by_max self.ensure_safe_range = ensure_safe_range - def apply(self, img: np.ndarray, alpha: float, beta: float, **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + alpha: float, + beta: float, + **params: Any, + ) -> np.ndarray: return albucore.multiply_add(img, alpha, beta, inplace=False) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, float]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, float]: image = data["image"] if "image" in data else data["images"][0] # Sample initial values @@ -2279,7 +2454,11 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A # Clip values to safe ranges if needed if self.ensure_safe_range: - alpha, beta = fmain.get_safe_brightness_contrast_params(alpha, beta, max_value) + alpha, beta = fmain.get_safe_brightness_contrast_params( + alpha, + beta, + max_value, + ) return { "alpha": alpha, @@ -2287,7 +2466,12 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A } def get_transform_init_args_names(self) -> tuple[str, ...]: - return "brightness_limit", "contrast_limit", "brightness_by_max", "ensure_safe_range" + return ( + "brightness_limit", + "contrast_limit", + "brightness_by_max", + "ensure_safe_range", + ) class GaussNoise(ImageOnlyTransform): @@ -2351,7 +2535,11 @@ class InitSchema(BaseTransformInitSchema): mean: float | None = Field( deprecated="mean parameter is deprecated. Use mean_range instead.", ) - std_range: Annotated[tuple[float, float], AfterValidator(check_01), AfterValidator(nondecreasing)] + std_range: Annotated[ + tuple[float, float], + AfterValidator(check_01), + AfterValidator(nondecreasing), + ] mean_range: Annotated[ tuple[float, float], AfterValidator(check_range_bounds(-1, 1)), @@ -2369,7 +2557,10 @@ def check_range(self) -> Self: self.std_range = (math.sqrt(10 / 255), math.sqrt(50 / 255)) else: # Already normalized variance, convert to std dev - self.std_range = (math.sqrt(self.var_limit[0]), math.sqrt(self.var_limit[1])) + self.std_range = ( + math.sqrt(self.var_limit[0]), + math.sqrt(self.var_limit[1]), + ) if self.mean is not None: self.mean_range = (0.0, 0.0) @@ -2402,10 +2593,19 @@ def __init__( self.var_limit = var_limit - def apply(self, img: np.ndarray, noise_map: np.ndarray, **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + noise_map: np.ndarray, + **params: Any, + ) -> np.ndarray: return fmain.add_noise(img, noise_map) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, float]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, float]: image = data["image"] if "image" in data else data["images"][0] max_value = MAX_VALUES_BY_DTYPE[image.dtype] @@ -2485,8 +2685,16 @@ class ISONoise(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - color_shift: Annotated[tuple[float, float], AfterValidator(check_01), AfterValidator(nondecreasing)] - intensity: Annotated[tuple[float, float], AfterValidator(check_0plus), AfterValidator(nondecreasing)] + color_shift: Annotated[ + tuple[float, float], + AfterValidator(check_01), + AfterValidator(nondecreasing), + ] + intensity: Annotated[ + tuple[float, float], + AfterValidator(check_0plus), + AfterValidator(nondecreasing), + ] def __init__( self, @@ -2508,9 +2716,18 @@ def apply( **params: Any, ) -> np.ndarray: non_rgb_error(img) - return fmain.iso_noise(img, color_shift, intensity, np.random.default_rng(random_seed)) + return fmain.iso_noise( + img, + color_shift, + intensity, + np.random.default_rng(random_seed), + ) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: random_seed = self.random_generator.integers(0, 2**32 - 1) return { "color_shift": self.py_random.uniform(*self.color_shift), @@ -2618,10 +2835,19 @@ class ChannelShuffle(ImageOnlyTransform): """ - def apply(self, img: np.ndarray, channels_shuffled: tuple[int, ...], **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + channels_shuffled: tuple[int, ...], + **params: Any, + ) -> np.ndarray: return fmain.channel_shuffle(img, channels_shuffled) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: ch_arr = list(range(params["shape"][2])) self.random_generator.shuffle(ch_arr) return {"channels_shuffled": ch_arr} @@ -2742,7 +2968,9 @@ def apply(self, img: np.ndarray, gamma: float, **params: Any) -> np.ndarray: return fmain.gamma_transform(img, gamma=gamma) def get_params(self) -> dict[str, float]: - return {"gamma": self.py_random.uniform(self.gamma_limit[0], self.gamma_limit[1]) / 100.0} + return { + "gamma": self.py_random.uniform(self.gamma_limit[0], self.gamma_limit[1]) / 100.0, + } def get_transform_init_args_names(self) -> tuple[str, ...]: return ("gamma_limit",) @@ -2795,13 +3023,31 @@ class ToGray(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - num_output_channels: int = Field(default=3, description="The number of output channels.", ge=1) - method: Literal["weighted_average", "from_lab", "desaturation", "average", "max", "pca"] + num_output_channels: int = Field( + default=3, + description="The number of output channels.", + ge=1, + ) + method: Literal[ + "weighted_average", + "from_lab", + "desaturation", + "average", + "max", + "pca", + ] def __init__( self, num_output_channels: int = 3, - method: Literal["weighted_average", "from_lab", "desaturation", "average", "max", "pca"] = "weighted_average", + method: Literal[ + "weighted_average", + "from_lab", + "desaturation", + "average", + "max", + "pca", + ] = "weighted_average", always_apply: bool | None = None, p: float = 0.5, ): @@ -2816,7 +3062,12 @@ def apply(self, img: np.ndarray, **params: Any) -> np.ndarray: num_channels = get_num_channels(img) - if num_channels != NUM_RGB_CHANNELS and self.method not in {"desaturation", "average", "max", "pca"}: + if num_channels != NUM_RGB_CHANNELS and self.method not in { + "desaturation", + "average", + "max", + "pca", + }: msg = "ToGray transformation expects 3-channel images." raise TypeError(msg) @@ -2864,7 +3115,12 @@ class ToRGB(ImageOnlyTransform): class InitSchema(BaseTransformInitSchema): num_output_channels: int = Field(ge=1) - def __init__(self, num_output_channels: int = 3, p: float = 1.0, always_apply: bool | None = None): + def __init__( + self, + num_output_channels: int = 3, + p: float = 1.0, + always_apply: bool | None = None, + ): super().__init__(p=p, always_apply=always_apply) self.num_output_channels = num_output_channels @@ -2877,7 +3133,10 @@ def apply(self, img: np.ndarray, **params: Any) -> np.ndarray: msg = "ToRGB transformation expects 2-dim images or 3-dim with the last dimension equal to 1." raise TypeError(msg) - return fmain.grayscale_to_multichannel(img, num_output_channels=self.num_output_channels) + return fmain.grayscale_to_multichannel( + img, + num_output_channels=self.num_output_channels, + ) def get_transform_init_args_names(self) -> tuple[str]: return ("num_output_channels",) @@ -3020,7 +3279,12 @@ class ToFloat(ImageOnlyTransform): class InitSchema(BaseTransformInitSchema): max_value: float | None - def __init__(self, max_value: float | None = None, p: float = 1.0, always_apply: bool | None = None): + def __init__( + self, + max_value: float | None = None, + p: float = 1.0, + always_apply: bool | None = None, + ): super().__init__(p, always_apply) self.max_value = max_value @@ -3161,11 +3425,18 @@ class InitSchema(BaseTransformInitSchema): scale_max: float | None interpolation: int | Interpolation | InterpolationDict | None = Field( - default_factory=lambda: Interpolation(downscale=cv2.INTER_NEAREST, upscale=cv2.INTER_NEAREST), + default_factory=lambda: Interpolation( + downscale=cv2.INTER_NEAREST, + upscale=cv2.INTER_NEAREST, + ), ) interpolation_pair: InterpolationPydantic - scale_range: Annotated[tuple[float, float], AfterValidator(check_01), AfterValidator(nondecreasing)] + scale_range: Annotated[ + tuple[float, float], + AfterValidator(check_01), + AfterValidator(nondecreasing), + ] @model_validator(mode="after") def validate_params(self) -> Self: @@ -3188,7 +3459,9 @@ def validate_params(self) -> Self: ) if isinstance(self.interpolation, dict): - self.interpolation_pair = InterpolationPydantic(**self.interpolation) + self.interpolation_pair = InterpolationPydantic( + **self.interpolation, + ) elif isinstance(self.interpolation, int): self.interpolation_pair = InterpolationPydantic( upscale=self.interpolation, @@ -3398,7 +3671,11 @@ class MultiplicativeNoise(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - multiplier: Annotated[tuple[float, float], AfterValidator(check_0plus), AfterValidator(nondecreasing)] + multiplier: Annotated[ + tuple[float, float], + AfterValidator(check_0plus), + AfterValidator(nondecreasing), + ] per_channel: bool elementwise: bool @@ -3423,7 +3700,11 @@ def apply( ) -> np.ndarray: return multiply(img, multiplier) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: image = data["image"] if "image" in data else data["images"][0] num_channels = get_num_channels(image) @@ -3433,7 +3714,11 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A else: shape = (num_channels,) if self.per_channel else (1,) - multiplier = self.random_generator.uniform(self.multiplier[0], self.multiplier[1], shape).astype(np.float32) + multiplier = self.random_generator.uniform( + self.multiplier[0], + self.multiplier[1], + shape, + ).astype(np.float32) if not self.per_channel and num_channels > 1: # Replicate the multiplier for all channels if not per_channel @@ -3501,17 +3786,33 @@ class FancyPCA(ImageOnlyTransform): class InitSchema(BaseTransformInitSchema): alpha: float = Field(ge=0) - def __init__(self, alpha: float = 0.1, p: float = 0.5, always_apply: bool | None = None): + def __init__( + self, + alpha: float = 0.1, + p: float = 0.5, + always_apply: bool | None = None, + ): super().__init__(p=p, always_apply=always_apply) self.alpha = alpha - def apply(self, img: np.ndarray, alpha_vector: np.ndarray, **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + alpha_vector: np.ndarray, + **params: Any, + ) -> np.ndarray: return fmain.fancy_pca(img, alpha_vector) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: shape = params["shape"] num_channels = shape[-1] if len(shape) == NUM_MULTI_CHANNEL_DIMENSIONS else 1 - alpha_vector = self.random_generator.normal(0, self.alpha, num_channels).astype(np.float32) + alpha_vector = self.random_generator.normal(0, self.alpha, num_channels).astype( + np.float32, + ) return {"alpha_vector": alpha_vector} def get_transform_init_args_names(self) -> tuple[str]: @@ -3599,7 +3900,11 @@ class InitSchema(BaseTransformInitSchema): @field_validator("brightness", "contrast", "saturation", "hue") @classmethod - def check_ranges(cls, value: ScaleFloatType, info: ValidationInfo) -> tuple[float, float]: + def check_ranges( + cls, + value: ScaleFloatType, + info: ValidationInfo, + ) -> tuple[float, float]: if info.field_name == "hue": bounds = -0.5, 0.5 bias = 0 @@ -3611,7 +3916,9 @@ def check_ranges(cls, value: ScaleFloatType, info: ValidationInfo) -> tuple[floa if isinstance(value, numbers.Number): if value < 0: - raise ValueError(f"If {info.field_name} is a single number, it must be non negative.") + raise ValueError( + f"If {info.field_name} is a single number, it must be non negative.", + ) left = bias - value if clip: left = max(left, 0) @@ -3831,7 +4138,10 @@ def __init__( self.sigma = sigma @staticmethod - def __generate_sharpening_matrix(alpha: np.ndarray, lightness: np.ndarray) -> np.ndarray: + def __generate_sharpening_matrix( + alpha: np.ndarray, + lightness: np.ndarray, + ) -> np.ndarray: matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32) matrix_effect = np.array( [[-1, -1, -1], [-1, 8 + lightness, -1], [-1, -1, -1]], @@ -3845,7 +4155,13 @@ def get_params(self) -> dict[str, Any]: if self.method == "kernel": lightness = self.py_random.uniform(*self.lightness) - return {"alpha": alpha, "sharpening_matrix": self.__generate_sharpening_matrix(alpha, lightness)} + return { + "alpha": alpha, + "sharpening_matrix": self.__generate_sharpening_matrix( + alpha, + lightness, + ), + } return {"alpha": alpha, "sharpening_matrix": None} @@ -3931,7 +4247,10 @@ def __init__( self.strength = strength @staticmethod - def __generate_emboss_matrix(alpha_sample: np.ndarray, strength_sample: np.ndarray) -> np.ndarray: + def __generate_emboss_matrix( + alpha_sample: np.ndarray, + strength_sample: np.ndarray, + ) -> np.ndarray: matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32) matrix_effect = np.array( [ @@ -3946,10 +4265,18 @@ def __generate_emboss_matrix(alpha_sample: np.ndarray, strength_sample: np.ndarr def get_params(self) -> dict[str, np.ndarray]: alpha = self.py_random.uniform(*self.alpha) strength = self.py_random.uniform(*self.strength) - emboss_matrix = self.__generate_emboss_matrix(alpha_sample=alpha, strength_sample=strength) + emboss_matrix = self.__generate_emboss_matrix( + alpha_sample=alpha, + strength_sample=strength, + ) return {"emboss_matrix": emboss_matrix} - def apply(self, img: np.ndarray, emboss_matrix: np.ndarray, **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + emboss_matrix: np.ndarray, + **params: Any, + ) -> np.ndarray: return fmain.convolve(img, emboss_matrix) def get_transform_init_args_names(self) -> tuple[str, str]: @@ -4072,7 +4399,10 @@ def get_transform_init_args_names(self) -> tuple[str, ...]: def get_params(self) -> dict[str, Any]: n_segments = self.py_random.randint(*self.n_segments) p = self.py_random.uniform(*self.p_replace) - return {"replace_samples": self.random_generator.random(n_segments) < p, "n_segments": n_segments} + return { + "replace_samples": self.random_generator.random(n_segments) < p, + "n_segments": n_segments, + } def apply( self, @@ -4081,7 +4411,13 @@ def apply( n_segments: int, **kwargs: Any, ) -> np.ndarray: - return fmain.superpixels(img, n_segments, replace_samples, self.max_size, self.interpolation) + return fmain.superpixels( + img, + n_segments, + replace_samples, + self.max_size, + self.interpolation, + ) class RingingOvershoot(ImageOnlyTransform): @@ -4173,7 +4509,11 @@ class InitSchema(BlurInitSchema): @field_validator("cutoff") @classmethod - def check_cutoff(cls, v: tuple[float, float], info: ValidationInfo) -> tuple[float, float]: + def check_cutoff( + cls, + v: tuple[float, float], + info: ValidationInfo, + ) -> tuple[float, float]: bounds = 0, np.pi check_range(v, *bounds, info.field_name) return v @@ -4200,7 +4540,9 @@ def get_params(self) -> dict[str, np.ndarray]: with np.errstate(divide="ignore", invalid="ignore"): kernel = np.fromfunction( lambda x, y: cutoff - * special.j1(cutoff * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2)) + * special.j1( + cutoff * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2), + ) / (2 * np.pi * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2)), [ksize, ksize], ) @@ -4289,7 +4631,11 @@ class InitSchema(BaseTransformInitSchema): @field_validator("blur_limit") @classmethod - def process_blur(cls, value: ScaleIntType, info: ValidationInfo) -> tuple[int, int]: + def process_blur( + cls, + value: ScaleIntType, + info: ValidationInfo, + ) -> tuple[int, int]: return fblur.process_blur_limit(value, info, min_value=3) def __init__( @@ -4309,13 +4655,30 @@ def __init__( def get_params(self) -> dict[str, Any]: return { - "ksize": self.py_random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2), + "ksize": self.py_random.randrange( + self.blur_limit[0], + self.blur_limit[1] + 1, + 2, + ), "sigma": self.py_random.uniform(*self.sigma_limit), "alpha": self.py_random.uniform(*self.alpha), } - def apply(self, img: np.ndarray, ksize: int, sigma: int, alpha: float, **params: Any) -> np.ndarray: - return fmain.unsharp_mask(img, ksize, sigma=sigma, alpha=alpha, threshold=self.threshold) + def apply( + self, + img: np.ndarray, + ksize: int, + sigma: int, + alpha: float, + **params: Any, + ) -> np.ndarray: + return fmain.unsharp_mask( + img, + ksize, + sigma=sigma, + alpha=alpha, + threshold=self.threshold, + ) def get_transform_init_args_names(self) -> tuple[str, ...]: return "blur_limit", "sigma_limit", "alpha", "threshold" @@ -4420,7 +4783,12 @@ def apply( ) -> np.ndarray: return fmain.pixel_dropout(img, drop_mask, drop_value) - def apply_to_mask(self, mask: np.ndarray, drop_mask: np.ndarray, **params: Any) -> np.ndarray: + def apply_to_mask( + self, + mask: np.ndarray, + drop_mask: np.ndarray, + **params: Any, + ) -> np.ndarray: if self.mask_drop_value is None: return mask @@ -4429,7 +4797,12 @@ def apply_to_mask(self, mask: np.ndarray, drop_mask: np.ndarray, **params: Any) return fmain.pixel_dropout(mask, drop_mask, self.mask_drop_value) - def apply_to_bboxes(self, bboxes: np.ndarray, drop_mask: np.ndarray | None, **params: Any) -> np.ndarray: + def apply_to_bboxes( + self, + bboxes: np.ndarray, + drop_mask: np.ndarray | None, + **params: Any, + ) -> np.ndarray: if drop_mask is None or self.per_channel: return bboxes @@ -4451,7 +4824,12 @@ def apply_to_bboxes(self, bboxes: np.ndarray, drop_mask: np.ndarray | None, **pa return normalize_bboxes(result, image_shape) - def apply_to_keypoints(self, keypoints: np.ndarray, drop_mask: np.ndarray | None, **params: Any) -> np.ndarray: + def apply_to_keypoints( + self, + keypoints: np.ndarray, + drop_mask: np.ndarray | None, + **params: Any, + ) -> np.ndarray: if drop_mask is None or self.per_channel: return keypoints @@ -4462,13 +4840,21 @@ def apply_to_keypoints(self, keypoints: np.ndarray, drop_mask: np.ndarray | None return fdropout.mask_dropout_keypoints(keypoints, drop_mask) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: image = data["image"] if "image" in data else data["images"][0] shape = image.shape if self.per_channel else image.shape[:2] # Use choice to create boolean matrix, if we will use binomial after that we will need type conversion - drop_mask = self.random_generator.choice([True, False], shape, p=[self.dropout_prob, 1 - self.dropout_prob]) + drop_mask = self.random_generator.choice( + [True, False], + shape, + p=[self.dropout_prob, 1 - self.dropout_prob], + ) drop_value: float | Sequence[float] | np.ndarray @@ -4485,7 +4871,11 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A dtype=image.dtype, ) elif image.dtype == np.float32: - drop_value = self.random_generator.uniform(0, 1, size=drop_shape).astype(image.dtype) + drop_value = self.random_generator.uniform( + 0, + 1, + size=drop_shape, + ).astype(image.dtype) else: raise ValueError(f"Unsupported dtype: {image.dtype}") else: @@ -4558,7 +4948,10 @@ class InitSchema(BaseTransformInitSchema): @field_validator("mode") @classmethod - def check_mode(cls, mode: SpatterMode | Sequence[SpatterMode]) -> Sequence[SpatterMode]: + def check_mode( + cls, + mode: SpatterMode | Sequence[SpatterMode], + ) -> Sequence[SpatterMode]: if isinstance(mode, str): return [mode] return mode @@ -4579,7 +4972,9 @@ def check_color(self) -> Self: if mode not in self.color: raise ValueError(f"Color for mode {mode} is not specified.") if len(self.color[mode]) != NUM_RGB_CHANNELS: - raise ValueError(f"Color for mode {mode} must be in RGB format.") + raise ValueError( + f"Color for mode {mode} must be in RGB format.", + ) result[mode] = self.color[mode] else: msg = "Color must be a list of RGB values or a dict mapping mode to RGB values." @@ -4619,7 +5014,11 @@ def apply( non_rgb_error(img) return fmain.spatter(img, non_mud, mud, drops, mode) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: height, width = params["shape"][:2] mean = self.py_random.uniform(*self.mean) @@ -4630,7 +5029,11 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A intensity = self.py_random.uniform(*self.intensity) color = np.array(self.color[mode]) / 255.0 - liquid_layer = self.random_generator.normal(size=(height, width), loc=mean, scale=std) + liquid_layer = self.random_generator.normal( + size=(height, width), + loc=mean, + scale=std, + ) liquid_layer = gaussian_filter(liquid_layer, sigma=sigma, mode="nearest") liquid_layer[liquid_layer < cutout_threshold] = 0 @@ -4670,7 +5073,15 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A } def get_transform_init_args_names(self) -> tuple[str, str, str, str, str, str, str]: - return "mean", "std", "gauss_sigma", "intensity", "cutout_threshold", "mode", "color" + return ( + "mean", + "std", + "gauss_sigma", + "intensity", + "cutout_threshold", + "mode", + "color", + ) class ChromaticAberration(ImageOnlyTransform): @@ -4759,8 +5170,14 @@ def __init__( always_apply: bool | None = None, ): super().__init__(p=p, always_apply=always_apply) - self.primary_distortion_limit = cast(tuple[float, float], primary_distortion_limit) - self.secondary_distortion_limit = cast(tuple[float, float], secondary_distortion_limit) + self.primary_distortion_limit = cast( + tuple[float, float], + primary_distortion_limit, + ) + self.secondary_distortion_limit = cast( + tuple[float, float], + secondary_distortion_limit, + ) self.mode = mode self.interpolation = interpolation @@ -4785,21 +5202,43 @@ def apply( def get_params(self) -> dict[str, float]: primary_distortion_red = self.py_random.uniform(*self.primary_distortion_limit) - secondary_distortion_red = self.py_random.uniform(*self.secondary_distortion_limit) + secondary_distortion_red = self.py_random.uniform( + *self.secondary_distortion_limit, + ) primary_distortion_blue = self.py_random.uniform(*self.primary_distortion_limit) - secondary_distortion_blue = self.py_random.uniform(*self.secondary_distortion_limit) + secondary_distortion_blue = self.py_random.uniform( + *self.secondary_distortion_limit, + ) - secondary_distortion_red = self._match_sign(primary_distortion_red, secondary_distortion_red) - secondary_distortion_blue = self._match_sign(primary_distortion_blue, secondary_distortion_blue) + secondary_distortion_red = self._match_sign( + primary_distortion_red, + secondary_distortion_red, + ) + secondary_distortion_blue = self._match_sign( + primary_distortion_blue, + secondary_distortion_blue, + ) if self.mode == "green_purple": # distortion coefficients of the red and blue channels have the same sign - primary_distortion_blue = self._match_sign(primary_distortion_red, primary_distortion_blue) - secondary_distortion_blue = self._match_sign(secondary_distortion_red, secondary_distortion_blue) + primary_distortion_blue = self._match_sign( + primary_distortion_red, + primary_distortion_blue, + ) + secondary_distortion_blue = self._match_sign( + secondary_distortion_red, + secondary_distortion_blue, + ) if self.mode == "red_blue": # distortion coefficients of the red and blue channels have the opposite sign - primary_distortion_blue = self._unmatch_sign(primary_distortion_red, primary_distortion_blue) - secondary_distortion_blue = self._unmatch_sign(secondary_distortion_red, secondary_distortion_blue) + primary_distortion_blue = self._unmatch_sign( + primary_distortion_red, + primary_distortion_blue, + ) + secondary_distortion_blue = self._unmatch_sign( + secondary_distortion_red, + secondary_distortion_blue, + ) return { "primary_distortion_red": primary_distortion_red, @@ -4823,7 +5262,12 @@ def _unmatch_sign(a: float, b: float) -> float: return b def get_transform_init_args_names(self) -> tuple[str, str, str, str]: - return "primary_distortion_limit", "secondary_distortion_limit", "mode", "interpolation" + return ( + "primary_distortion_limit", + "secondary_distortion_limit", + "mode", + "interpolation", + ) class Morphological(DualTransform): @@ -4881,19 +5325,39 @@ def __init__( self.scale = cast(tuple[int, int], scale) self.operation = operation - def apply(self, img: np.ndarray, kernel: tuple[int, int], **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + kernel: tuple[int, int], + **params: Any, + ) -> np.ndarray: return fmain.morphology(img, kernel, self.operation) - def apply_to_bboxes(self, bboxes: np.ndarray, kernel: tuple[int, int], **params: Any) -> np.ndarray: + def apply_to_bboxes( + self, + bboxes: np.ndarray, + kernel: tuple[int, int], + **params: Any, + ) -> np.ndarray: image_shape = params["shape"] denormalized_boxes = denormalize_bboxes(bboxes, image_shape) - result = fmain.bboxes_morphology(denormalized_boxes, kernel, self.operation, image_shape) + result = fmain.bboxes_morphology( + denormalized_boxes, + kernel, + self.operation, + image_shape, + ) return normalize_bboxes(result, image_shape) - def apply_to_keypoints(self, keypoints: np.ndarray, kernel: tuple[int, int], **params: Any) -> np.ndarray: + def apply_to_keypoints( + self, + keypoints: np.ndarray, + kernel: tuple[int, int], + **params: Any, + ) -> np.ndarray: return keypoints def get_params(self) -> dict[str, float]: @@ -4906,7 +5370,10 @@ def get_transform_init_args_names(self) -> tuple[str, ...]: PLANKIAN_JITTER_CONST = { - "MAX_TEMP": max(*fmain.PLANCKIAN_COEFFS["blackbody"].keys(), *fmain.PLANCKIAN_COEFFS["cied"].keys()), + "MAX_TEMP": max( + *fmain.PLANCKIAN_COEFFS["blackbody"].keys(), + *fmain.PLANCKIAN_COEFFS["cied"].keys(), + ), "MIN_BLACKBODY_TEMP": min(fmain.PLANCKIAN_COEFFS["blackbody"].keys()), "MIN_CIED_TEMP": min(fmain.PLANCKIAN_COEFFS["cied"].keys()), "WHITE_TEMP": 6_000, @@ -5011,23 +5478,35 @@ def validate_temperature(self) -> Self: if self.temperature_limit is None: if self.mode == "blackbody": - self.temperature_limit = int(PLANKIAN_JITTER_CONST["MIN_BLACKBODY_TEMP"]), max_temp + self.temperature_limit = ( + int(PLANKIAN_JITTER_CONST["MIN_BLACKBODY_TEMP"]), + max_temp, + ) elif self.mode == "cied": - self.temperature_limit = int(PLANKIAN_JITTER_CONST["MIN_CIED_TEMP"]), max_temp + self.temperature_limit = ( + int(PLANKIAN_JITTER_CONST["MIN_CIED_TEMP"]), + max_temp, + ) else: if self.mode == "blackbody" and ( min(self.temperature_limit) < PLANKIAN_JITTER_CONST["MIN_BLACKBODY_TEMP"] or max(self.temperature_limit) > max_temp ): - raise ValueError("Temperature limits for blackbody should be in [3000, 15000] range") + raise ValueError( + "Temperature limits for blackbody should be in [3000, 15000] range", + ) if self.mode == "cied" and ( min(self.temperature_limit) < PLANKIAN_JITTER_CONST["MIN_CIED_TEMP"] or max(self.temperature_limit) > max_temp ): - raise ValueError("Temperature limits for CIED should be in [4000, 15000] range") + raise ValueError( + "Temperature limits for CIED should be in [4000, 15000] range", + ) if not self.temperature_limit[0] <= PLANKIAN_JITTER_CONST["WHITE_TEMP"] <= self.temperature_limit[1]: - raise ValueError("White temperature should be within the temperature limits") + raise ValueError( + "White temperature should be within the temperature limits", + ) return self @@ -5088,7 +5567,11 @@ def get_params(self) -> dict[str, Any]: raise ValueError(f"Unknown sampling method: {self.sampling_method}") # Ensure temperature is within the valid range - temperature = np.clip(temperature, self.temperature_limit[0], self.temperature_limit[1]) + temperature = np.clip( + temperature, + self.temperature_limit[0], + self.temperature_limit[1], + ) return {"temperature": int(temperature)} @@ -5152,13 +5635,28 @@ class ShotNoise(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - scale_range: Annotated[tuple[float, float], AfterValidator(nondecreasing), AfterValidator(check_0plus)] + scale_range: Annotated[ + tuple[float, float], + AfterValidator(nondecreasing), + AfterValidator(check_0plus), + ] - def __init__(self, scale_range: tuple[float, float] = (0.1, 0.3), p: float = 0.5, always_apply: bool = False): + def __init__( + self, + scale_range: tuple[float, float] = (0.1, 0.3), + p: float = 0.5, + always_apply: bool = False, + ): super().__init__(p=p, always_apply=always_apply) self.scale_range = scale_range - def apply(self, img: np.ndarray, scale: float, random_seed: int, **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + scale: float, + random_seed: int, + **params: Any, + ) -> np.ndarray: return fmain.shot_noise(img, scale, np.random.default_rng(random_seed)) def get_params(self) -> dict[str, Any]: @@ -5180,43 +5678,67 @@ class NoiseParamsBase(BaseModel): class UniformParams(NoiseParamsBase): noise_type: Literal["uniform"] = "uniform" - ranges: list[tuple[float, float]] = Field(description="List of (min, max) ranges for each channel", min_length=1) + ranges: list[Sequence[float]] = Field( + description="List of (min, max) ranges for each channel", + min_length=1, + ) @field_validator("ranges", mode="after") @classmethod - def validate_ranges(cls, v: list[tuple[float, float]]) -> list[tuple[float, float]]: - for min_val, max_val in v: + def validate_ranges(cls, v: list[Sequence[float]]) -> list[tuple[float, float]]: + result = [] + for range_values in v: + if len(range_values) != PAIR: + raise ValueError("Each range must have exactly 2 values") + min_val, max_val = range_values if not (-1 <= min_val <= max_val <= 1): raise ValueError("Range values must be in [-1, 1] and min <= max") - return v + result.append((float(min_val), float(max_val))) + return result class GaussianParams(NoiseParamsBase): noise_type: Literal["gaussian"] = "gaussian" - mean_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=-1, max_val=1))] - std_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0, max_val=1))] + mean_range: Annotated[ + Sequence[float], + AfterValidator(check_range_bounds(min_val=-1, max_val=1)), + ] + std_range: Annotated[ + Sequence[float], + AfterValidator(check_range_bounds(min_val=0, max_val=1)), + ] class LaplaceParams(NoiseParamsBase): noise_type: Literal["laplace"] = "laplace" - mean_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=-1, max_val=1))] - scale_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0, max_val=1))] + mean_range: Annotated[ + Sequence[float], + AfterValidator(check_range_bounds(min_val=-1, max_val=1)), + ] + scale_range: Annotated[ + Sequence[float], + AfterValidator(check_range_bounds(min_val=0, max_val=1)), + ] class BetaParams(NoiseParamsBase): noise_type: Literal["beta"] = "beta" - alpha_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0))] - beta_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0))] - scale_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0, max_val=1))] - - -class PoissonParams(NoiseParamsBase): - noise_type: Literal["poisson"] = "poisson" - lambda_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(min_val=0))] + alpha_range: Annotated[ + Sequence[float], + AfterValidator(check_range_bounds(min_val=0)), + ] + beta_range: Annotated[ + Sequence[float], + AfterValidator(check_range_bounds(min_val=0)), + ] + scale_range: Annotated[ + Sequence[float], + AfterValidator(check_range_bounds(min_val=0, max_val=1)), + ] NoiseParams = Annotated[ - Union[UniformParams, GaussianParams, LaplaceParams, BetaParams, PoissonParams], + Union[UniformParams, GaussianParams, LaplaceParams, BetaParams], Field(discriminator="noise_type"), ] @@ -5234,7 +5756,6 @@ class AdditiveNoise(ImageOnlyTransform): - "gaussian": Normal distribution, models natural random processes - "laplace": Similar to Gaussian but with heavier tails, good for outliers - "beta": Flexible bounded distribution, can be symmetric or skewed - - "poisson": Models sensor/shot noise, intensity-dependent spatial_mode: How to generate and apply the noise. Options: - "constant": One noise value per channel, fastest @@ -5255,8 +5776,10 @@ class AdditiveNoise(ImageOnlyTransform): ranges: list[tuple[float, float]] List of (min, max) ranges for each channel. Each range must be in [-1, 1]. - Length must match number of channels. - Example: [(-0.2, 0.2), (-0.1, 0.1), (-0.1, 0.1)] for RGB + If only one range is provided, it will be used for all channels. + + [(-0.2, 0.2)] # Same range for all channels + [(-0.2, 0.2), (-0.1, 0.1), (-0.1, 0.1)] # Different ranges for RGB gaussian: mean_range: tuple[float, float], default (0.0, 0.0) @@ -5271,18 +5794,16 @@ class AdditiveNoise(ImageOnlyTransform): Range for sampling scale parameter, in [0, 1] beta: - alpha_range: tuple[float, float], default (2.0, 2.0) + alpha_range: tuple[float, float], default (0.5, 1.5) + Value < 1 = U-shaped, Value > 1 = Bell-shaped Range for sampling first shape parameter, in (0, inf) - beta_range: tuple[float, float], default (2.0, 2.0) + beta_range: tuple[float, float], default (0.5, 1.5) + Value < 1 = U-shaped, Value > 1 = Bell-shaped Range for sampling second shape parameter, in (0, inf) - scale_range: tuple[float, float], default (1.0, 1.0) + scale_range: tuple[float, float], default (0.1, 0.3) + Smaller scale for subtler noise Range for sampling output scale, in [0, 1] - poisson: - lambda_range: tuple[float, float], default (1.0, 1.0) - Range for sampling intensity parameter, in (0, inf) - Higher values = more noise - Note: Performance considerations: - "constant" mode is fastest as it generates only C values (C = number of channels) @@ -5294,13 +5815,11 @@ class AdditiveNoise(ImageOnlyTransform): - gaussian: Bell-shaped, symmetric, good for natural noise - laplace: Like gaussian but with heavier tails, good for outliers - beta: Very flexible shape, can be uniform, bell-shaped, or U-shaped - - poisson: Discrete, variance increases with intensity, models sensor noise Implementation details: - All noise is generated in normalized range and scaled by image max value - For uint8 images, final noise range is [-255, 255] - For float images, final noise range is [-1, 1] - - Poisson noise is handled differently as it's intensity-dependent Examples: Constant RGB shift with different ranges per channel: @@ -5317,41 +5836,50 @@ class AdditiveNoise(ImageOnlyTransform): ... noise_params={"mean_range": (0.0, 0.0), "std_range": (0.05, 0.15)} ... ) - Poisson noise for modeling sensor noise: - >>> transform = AdditiveNoise( - ... noise_type="poisson", - ... spatial_mode="per_pixel", - ... noise_params={"lambda_range": (1.0, 5.0)} - ... ) """ class InitSchema(BaseTransformInitSchema): - noise_type: Literal["uniform", "gaussian", "laplace", "beta", "poisson"] + noise_type: Literal["uniform", "gaussian", "laplace", "beta"] spatial_mode: Literal["constant", "per_pixel", "shared"] noise_params: dict[str, Any] | None approximation: float = Field(ge=0.0, le=1.0) @model_validator(mode="after") def validate_noise_params(self) -> Self: - if isinstance(self.noise_params, dict): - # Convert dict to appropriate NoiseParams object - params_class = { - "uniform": UniformParams, - "gaussian": GaussianParams, - "laplace": LaplaceParams, - "beta": BetaParams, - "poisson": PoissonParams, - }[self.noise_type] - - # Add noise_type to params if not present - params_dict = {**self.noise_params, "noise_type": self.noise_type} - self.noise_params = params_class(**params_dict) + # Default parameters for each noise type + default_params = { + "uniform": { + "ranges": [(-0.1, 0.1)], # Single channel by default + }, + "gaussian": {"mean_range": (0.0, 0.0), "std_range": (0.05, 0.15)}, + "laplace": {"mean_range": (0.0, 0.0), "scale_range": (0.05, 0.15)}, + "beta": { + "alpha_range": (0.5, 1.5), + "beta_range": (0.5, 1.5), + "scale_range": (0.1, 0.3), + }, + } + + # Use default params if none provided + params_dict = self.noise_params if self.noise_params is not None else default_params[self.noise_type] + + # Convert dict to appropriate NoiseParams object + params_class = { + "uniform": UniformParams, + "gaussian": GaussianParams, + "laplace": LaplaceParams, + "beta": BetaParams, + }[self.noise_type] + + # Add noise_type to params if not present + params_dict = {**params_dict, "noise_type": self.noise_type} # type: ignore[dict-item] + self.noise_params = params_class(**params_dict) return self def __init__( self, - noise_type: Literal["uniform", "gaussian", "laplace", "beta", "poisson"] = "uniform", + noise_type: Literal["uniform", "gaussian", "laplace", "beta"] = "uniform", spatial_mode: Literal["constant", "per_pixel", "shared"] = "constant", noise_params: dict[str, Any] | None = None, approximation: float = 1.0, @@ -5364,10 +5892,19 @@ def __init__( self.noise_params = noise_params self.approximation = approximation - def apply(self, img: np.ndarray, noise_map: np.ndarray, **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + noise_map: np.ndarray, + **params: Any, + ) -> np.ndarray: return fmain.add_noise(img, noise_map) - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: image = data["image"] if "image" in data else data["images"][0] max_value = MAX_VALUES_BY_DTYPE[image.dtype] @@ -5460,7 +5997,7 @@ class RGBShift(AdditiveNoise): See Also: - AdditiveNoise: More general noise transform with various options: - * Different noise distributions (uniform, gaussian, laplace, beta, poisson) + * Different noise distributions (uniform, gaussian, laplace, beta) * Spatial modes (constant, per-pixel, shared) * Approximation for faster computation - RandomToneCurve: For non-linear color transformations @@ -5609,7 +6146,11 @@ def __init__( self.amount = amount self.salt_vs_pepper = salt_vs_pepper - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: image = data["image"] if "image" in data else data["images"][0] # Sample total amount and salt ratio @@ -5629,7 +6170,13 @@ def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, A "pepper_mask": pepper_mask, } - def apply(self, img: np.ndarray, salt_mask: np.ndarray, pepper_mask: np.ndarray, **params: Any) -> np.ndarray: + def apply( + self, + img: np.ndarray, + salt_mask: np.ndarray, + pepper_mask: np.ndarray, + **params: Any, + ) -> np.ndarray: return fmain.apply_salt_and_pepper(img, salt_mask, pepper_mask) def get_transform_init_args_names(self) -> tuple[str, ...]: @@ -5753,8 +6300,14 @@ class PlasmaBrightnessContrast(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - brightness_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(-1, 1))] - contrast_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(-1, 1))] + brightness_range: Annotated[ + tuple[float, float], + AfterValidator(check_range_bounds(-1, 1)), + ] + contrast_range: Annotated[ + tuple[float, float], + AfterValidator(check_range_bounds(-1, 1)), + ] plasma_size: int = Field(default=256, gt=0) roughness: float = Field(default=3.0, gt=0) @@ -5773,7 +6326,11 @@ def __init__( self.plasma_size = plasma_size self.roughness = roughness - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: image = data["image"] if "image" in data else data["images"][0] # Sample adjustment strengths @@ -5927,7 +6484,11 @@ def __init__( self.plasma_size = plasma_size self.roughness = roughness - def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]: + def get_params_dependent_on_data( + self, + params: dict[str, Any], + data: dict[str, Any], + ) -> dict[str, Any]: image = data["image"] if "image" in data else data["images"][0] # Sample shadow intensity @@ -6091,11 +6652,23 @@ class Illumination(ImageOnlyTransform): class InitSchema(BaseTransformInitSchema): mode: Literal["linear", "corner", "gaussian"] - intensity_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0.01, 0.2))] + intensity_range: Annotated[ + tuple[float, float], + AfterValidator(check_range_bounds(0.01, 0.2)), + ] effect_type: Literal["brighten", "darken", "both"] - angle_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 360))] - center_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0, 1))] - sigma_range: Annotated[tuple[float, float], AfterValidator(check_range_bounds(0.2, 1.0))] + angle_range: Annotated[ + tuple[float, float], + AfterValidator(check_range_bounds(0, 360)), + ] + center_range: Annotated[ + tuple[float, float], + AfterValidator(check_range_bounds(0, 1)), + ] + sigma_range: Annotated[ + tuple[float, float], + AfterValidator(check_range_bounds(0.2, 1.0)), + ] def __init__( self, @@ -6172,7 +6745,14 @@ def apply(self, img: np.ndarray, **params: Any) -> np.ndarray: ) def get_transform_init_args_names(self) -> tuple[str, ...]: - return "mode", "intensity_range", "effect_type", "angle_range", "center_range", "sigma_range" + return ( + "mode", + "intensity_range", + "effect_type", + "angle_range", + "center_range", + "sigma_range", + ) class AutoContrast(ImageOnlyTransform): diff --git a/albumentations/augmentations/utils.py b/albumentations/augmentations/utils.py index 0c9b0e262..d0d919cc1 100644 --- a/albumentations/augmentations/utils.py +++ b/albumentations/augmentations/utils.py @@ -20,11 +20,11 @@ __all__ = [ - "read_bgr_image", - "read_rgb_image", - "read_grayscale", "angle_2pi_range", "non_rgb_error", + "read_bgr_image", + "read_grayscale", + "read_rgb_image", ] P = ParamSpec("P") diff --git a/albumentations/core/bbox_utils.py b/albumentations/core/bbox_utils.py index c8f414376..071c93ba5 100644 --- a/albumentations/core/bbox_utils.py +++ b/albumentations/core/bbox_utils.py @@ -11,15 +11,15 @@ from .utils import DataProcessor, Params __all__ = [ - "normalize_bboxes", - "denormalize_bboxes", - "convert_bboxes_to_albumentations", - "convert_bboxes_from_albumentations", + "BboxParams", + "BboxProcessor", "check_bboxes", + "convert_bboxes_from_albumentations", + "convert_bboxes_to_albumentations", + "denormalize_bboxes", "filter_bboxes", + "normalize_bboxes", "union_of_bboxes", - "BboxProcessor", - "BboxParams", ] BBOX_WITH_LABEL_SHAPE = 5 diff --git a/albumentations/core/composition.py b/albumentations/core/composition.py index 6eb9ec119..0c4cf3541 100644 --- a/albumentations/core/composition.py +++ b/albumentations/core/composition.py @@ -25,16 +25,16 @@ __all__ = [ "BaseCompose", + "BboxParams", "Compose", - "SomeOf", + "KeypointParams", "OneOf", "OneOrOther", - "BboxParams", - "KeypointParams", + "RandomOrder", "ReplayCompose", - "Sequential", "SelectiveChannelTransform", - "RandomOrder", + "Sequential", + "SomeOf", ] NUM_ONEOF_TRANSFORMS = 2 diff --git a/albumentations/core/keypoints_utils.py b/albumentations/core/keypoints_utils.py index a53e94475..8994f6dd6 100644 --- a/albumentations/core/keypoints_utils.py +++ b/albumentations/core/keypoints_utils.py @@ -11,13 +11,13 @@ from .utils import DataProcessor, Params __all__ = [ + "KeypointParams", + "KeypointsProcessor", "angle_to_2pi_range", "check_keypoints", "convert_keypoints_from_albumentations", "convert_keypoints_to_albumentations", "filter_keypoints", - "KeypointsProcessor", - "KeypointParams", ] keypoint_formats = {"xy", "yx", "xya", "xys", "xyas", "xysa"} diff --git a/albumentations/core/serialization.py b/albumentations/core/serialization.py index ca91bdd9e..9bc499ca4 100644 --- a/albumentations/core/serialization.py +++ b/albumentations/core/serialization.py @@ -19,7 +19,7 @@ from albumentations import __version__ -__all__ = ["to_dict", "from_dict", "save", "load"] +__all__ = ["from_dict", "load", "save", "to_dict"] SERIALIZABLE_REGISTRY: dict[str, SerializableMeta] = {} diff --git a/pyproject.toml b/pyproject.toml index 384fee3df..88e4f37c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,8 +128,7 @@ format.skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. lint.select = [ "ALL" ] lint.ignore = [ - "ANN101", - "ANN102", + "ANN001", "ANN204", "ANN401", "ARG001", @@ -156,9 +155,9 @@ lint.ignore = [ "PLR0913", "PTH123", "S311", - "TCH001", - "TCH002", - "TCH003", + "TC001", + "TC002", + "TC003", "TRY003", ] From 34e24356904dcf05a3a1534bca8e48299f2268d2 Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Mon, 2 Dec 2024 16:15:36 -0800 Subject: [PATCH 4/7] Refcactoring --- albumentations/augmentations/functional.py | 372 +++++++++++++++++---- albumentations/augmentations/transforms.py | 28 +- tests/aug_definitions.py | 32 +- tests/test_serialization.py | 140 ++++++-- 4 files changed, 451 insertions(+), 121 deletions(-) diff --git a/albumentations/augmentations/functional.py b/albumentations/augmentations/functional.py index 329aec769..263000855 100644 --- a/albumentations/augmentations/functional.py +++ b/albumentations/augmentations/functional.py @@ -88,7 +88,12 @@ @uint8_io @preserve_channel_dim -def shift_hsv(img: np.ndarray, hue_shift: float, sat_shift: float, val_shift: float) -> np.ndarray: +def shift_hsv( + img: np.ndarray, + hue_shift: float, + sat_shift: float, + val_shift: float, +) -> np.ndarray: if hue_shift == 0 and sat_shift == 0 and val_shift == 0: return img @@ -162,7 +167,7 @@ def solarize(img: np.ndarray, threshold: float) -> np.ndarray: @uint8_io @clipped -def posterize(img: np.ndarray, bits: Literal[0, 1, 2, 3, 4, 5, 6, 7, 8]) -> np.ndarray: +def posterize(img: np.ndarray, bits: Literal[1, 2, 3, 4, 5, 6, 7, 8]) -> np.ndarray: """Reduce the number of bits for each color channel. Args: @@ -176,8 +181,6 @@ def posterize(img: np.ndarray, bits: Literal[0, 1, 2, 3, 4, 5, 6, 7, 8]) -> np.n bits_array = np.uint8(bits) if not bits_array.shape or len(bits_array) == 1: - if bits_array == 0: - return np.zeros_like(img) if bits_array == EIGHT: return img @@ -189,9 +192,7 @@ def posterize(img: np.ndarray, bits: Literal[0, 1, 2, 3, 4, 5, 6, 7, 8]) -> np.n result_img = np.empty_like(img) for i, channel_bits in enumerate(bits_array): - if channel_bits == 0: - result_img[..., i] = np.zeros_like(img[..., i]) - elif channel_bits == EIGHT: + if channel_bits == EIGHT: result_img[..., i] = img[..., i].copy() else: lut = np.arange(0, 256, dtype=np.uint8) @@ -251,10 +252,16 @@ def _equalize_cv(img: np.ndarray, mask: np.ndarray | None = None) -> np.ndarray: return sz_lut(img, lut, inplace=True) -def _check_preconditions(img: np.ndarray, mask: np.ndarray | None, by_channels: bool) -> None: +def _check_preconditions( + img: np.ndarray, + mask: np.ndarray | None, + by_channels: bool, +) -> None: if mask is not None: if is_rgb_image(mask) and is_grayscale_image(img): - raise ValueError(f"Wrong mask shape. Image shape: {img.shape}. Mask shape: {mask.shape}") + raise ValueError( + f"Wrong mask shape. Image shape: {img.shape}. Mask shape: {mask.shape}", + ) if not by_channels and not is_grayscale_image(mask): msg = f"When by_channels=False only 1-channel mask supports. Mask shape: {mask.shape}" raise ValueError(msg) @@ -356,7 +363,11 @@ def move_tone_curve( """ t = np.linspace(0.0, 1.0, 256) - def evaluate_bez(t: np.ndarray, low_y: float | np.ndarray, high_y: float | np.ndarray) -> np.ndarray: + def evaluate_bez( + t: np.ndarray, + low_y: float | np.ndarray, + high_y: float | np.ndarray, + ) -> np.ndarray: one_minus_t = 1 - t return (3 * one_minus_t**2 * t * low_y + 3 * one_minus_t * t**2 * high_y + t**3) * 255 @@ -366,7 +377,11 @@ def evaluate_bez(t: np.ndarray, low_y: float | np.ndarray, high_y: float | np.nd lut = clip(np.rint(evaluate_bez(t, low_y, high_y)), np.uint8, inplace=False) return sz_lut(img, lut, inplace=False) if isinstance(low_y, np.ndarray) and isinstance(high_y, np.ndarray): - luts = clip(np.rint(evaluate_bez(t[:, np.newaxis], low_y, high_y).T), np.uint8, inplace=False) + luts = clip( + np.rint(evaluate_bez(t[:, np.newaxis], low_y, high_y).T), + np.uint8, + inplace=False, + ) return cv2.merge( [sz_lut(img[:, :, i], np.ascontiguousarray(luts[i]), inplace=False) for i in range(num_channels)], ) @@ -377,13 +392,20 @@ def evaluate_bez(t: np.ndarray, low_y: float | np.ndarray, high_y: float | np.nd @clipped -def linear_transformation_rgb(img: np.ndarray, transformation_matrix: np.ndarray) -> np.ndarray: +def linear_transformation_rgb( + img: np.ndarray, + transformation_matrix: np.ndarray, +) -> np.ndarray: return cv2.transform(img, transformation_matrix) @uint8_io @preserve_channel_dim -def clahe(img: np.ndarray, clip_limit: float, tile_grid_size: tuple[int, int]) -> np.ndarray: +def clahe( + img: np.ndarray, + clip_limit: float, + tile_grid_size: tuple[int, int], +) -> np.ndarray: """Apply Contrast Limited Adaptive Histogram Equalization (CLAHE) to the input image. This function enhances the contrast of the input image using CLAHE. For color images, @@ -436,7 +458,11 @@ def convolve(img: np.ndarray, kernel: np.ndarray) -> np.ndarray: @uint8_io @preserve_channel_dim -def image_compression(img: np.ndarray, quality: int, image_type: Literal[".jpg", ".webp"]) -> np.ndarray: +def image_compression( + img: np.ndarray, + quality: int, + image_type: Literal[".jpg", ".webp"], +) -> np.ndarray: """Apply compression to image. Args: @@ -493,7 +519,11 @@ def image_compression(img: np.ndarray, quality: int, image_type: Literal[".jpg", @uint8_io -def add_snow_bleach(img: np.ndarray, snow_point: float, brightness_coeff: float) -> np.ndarray: +def add_snow_bleach( + img: np.ndarray, + snow_point: float, + brightness_coeff: float, +) -> np.ndarray: """Adds a simple snow effect to the image by bleaching out pixels. This function simulates a basic snow effect by increasing the brightness of pixels @@ -644,7 +674,11 @@ def add_snow_texture( img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV).astype(np.float32) # Increase brightness - img_hsv[:, :, 2] = np.clip(img_hsv[:, :, 2] * (1 + brightness_coeff * snow_point), 0, max_value) + img_hsv[:, :, 2] = np.clip( + img_hsv[:, :, 2] * (1 + brightness_coeff * snow_point), + 0, + max_value, + ) # Generate snow texture snow_texture = cv2.GaussianBlur(snow_texture, (0, 0), sigmaX=1, sigmaY=1) @@ -658,7 +692,9 @@ def add_snow_texture( snow_texture *= depth_effect # Apply snow texture - snow_layer = (np.dstack([snow_texture] * 3) * max_value * snow_point).astype(np.float32) + snow_layer = (np.dstack([snow_texture] * 3) * max_value * snow_point).astype( + np.float32, + ) # Blend snow with original image img_with_snow = cv2.add(img_hsv, snow_layer) @@ -666,7 +702,13 @@ def add_snow_texture( # Add a slight blue tint to simulate cool snow color blue_tint = np.full_like(img_with_snow, (0.6, 0.75, 1)) # Slight blue in HSV - img_with_snow = cv2.addWeighted(img_with_snow, 0.85, blue_tint, 0.15 * snow_point, 0) + img_with_snow = cv2.addWeighted( + img_with_snow, + 0.85, + blue_tint, + 0.15 * snow_point, + 0, + ) # Convert back to RGB img_with_snow = cv2.cvtColor(img_with_snow.astype(np.uint8), cv2.COLOR_HSV2RGB) @@ -989,8 +1031,18 @@ def add_sun_flare_physics_based( # Add chromatic aberration channels = list(cv2.split(flare_layer)) - channels[0] = cv2.GaussianBlur(channels[0], (0, 0), sigmaX=3, sigmaY=3) # Blue channel - channels[2] = cv2.GaussianBlur(channels[2], (0, 0), sigmaX=5, sigmaY=5) # Red channel + channels[0] = cv2.GaussianBlur( + channels[0], + (0, 0), + sigmaX=3, + sigmaY=3, + ) # Blue channel + channels[2] = cv2.GaussianBlur( + channels[2], + (0, 0), + sigmaX=5, + sigmaY=5, + ) # Red channel flare_layer = cv2.merge(channels) # Blend the flare with the original image using screen blending @@ -999,7 +1051,11 @@ def add_sun_flare_physics_based( @uint8_io @preserve_channel_dim -def add_shadow(img: np.ndarray, vertices_list: list[np.ndarray], intensities: np.ndarray) -> np.ndarray: +def add_shadow( + img: np.ndarray, + vertices_list: list[np.ndarray], + intensities: np.ndarray, +) -> np.ndarray: """Add shadows to the image by reducing the intensity of the pixel values in specified regions. Args: @@ -1102,11 +1158,21 @@ def iso_noise( hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS) _, stddev = cv2.meanStdDev(hls) - luminance_noise = random_generator.poisson(stddev[1] * intensity, size=hls.shape[:2]) - color_noise = random_generator.normal(0, color_shift * intensity, size=hls.shape[:2]) + luminance_noise = random_generator.poisson( + stddev[1] * intensity, + size=hls.shape[:2], + ) + color_noise = random_generator.normal( + 0, + color_shift * intensity, + size=hls.shape[:2], + ) hls[..., 0] += color_noise - hls[..., 1] = add_array(hls[..., 1], luminance_noise * intensity * (1.0 - hls[..., 1])) + hls[..., 1] = add_array( + hls[..., 1], + luminance_noise * intensity * (1.0 - hls[..., 1]), + ) noised_hls = cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) return np.clip(noised_hls, 0, 1, out=noised_hls) # Ensure output is in [0, 1] range @@ -1302,7 +1368,14 @@ def to_gray_pca(img: np.ndarray) -> np.ndarray: def to_gray( img: np.ndarray, num_output_channels: int, - method: Literal["weighted_average", "from_lab", "desaturation", "average", "max", "pca"], + method: Literal[ + "weighted_average", + "from_lab", + "desaturation", + "average", + "max", + "pca", + ], ) -> np.ndarray: if method == "weighted_average": result = to_gray_weighted_average(img) @@ -1322,7 +1395,10 @@ def to_gray( return grayscale_to_multichannel(result, num_output_channels) -def grayscale_to_multichannel(grayscale_image: np.ndarray, num_output_channels: int = 3) -> np.ndarray: +def grayscale_to_multichannel( + grayscale_image: np.ndarray, + num_output_channels: int = 3, +) -> np.ndarray: """Convert a grayscale image to a multi-channel image. This function takes a 2D grayscale image or a 3D image with a single channel @@ -1363,7 +1439,13 @@ def downscale( if need_cast: img = to_float(img) - downscaled = cv2.resize(img, None, fx=scale, fy=scale, interpolation=down_interpolation) + downscaled = cv2.resize( + img, + None, + fx=scale, + fy=scale, + interpolation=down_interpolation, + ) upscaled = cv2.resize(downscaled, (width, height), interpolation=up_interpolation) return from_float(upscaled, target_dtype=np.uint8) if need_cast else upscaled @@ -1434,7 +1516,10 @@ def fancy_pca(img: np.ndarray, alpha_vector: np.ndarray) -> np.ndarray: eig_vecs = eig_vecs[:, sort_perm] # Create noise vector - noise = np.dot(np.dot(eig_vecs, np.diag(alpha_vector * eig_vals)), img_centered.T).T + noise = np.dot( + np.dot(eig_vecs, np.diag(alpha_vector * eig_vals)), + img_centered.T, + ).T # Add noise to the image img_pca = img_reshaped + noise @@ -1473,7 +1558,11 @@ def adjust_contrast_torchvision(img: np.ndarray, factor: float) -> np.ndarray: @clipped @preserve_channel_dim -def adjust_saturation_torchvision(img: np.ndarray, factor: float, gamma: float = 0) -> np.ndarray: +def adjust_saturation_torchvision( + img: np.ndarray, + factor: float, + gamma: float = 0, +) -> np.ndarray: if factor == 1 or is_grayscale_image(img): return img @@ -1581,7 +1670,11 @@ def unsharp_mask( alpha: float = 0.2, threshold: int = 10, ) -> np.ndarray: - blur_fn = maybe_process_in_chunks(cv2.GaussianBlur, ksize=(ksize, ksize), sigmaX=sigma) + blur_fn = maybe_process_in_chunks( + cv2.GaussianBlur, + ksize=(ksize, ksize), + sigmaX=sigma, + ) if image.ndim == NUM_MULTI_CHANNEL_DIMENSIONS and get_num_channels(image) == 1: image = np.squeeze(image, axis=-1) @@ -1599,11 +1692,19 @@ def unsharp_mask( soft_mask = blur_fn(mask) - return add_array(multiply(sharp, soft_mask), multiply(image, 1 - soft_mask), inplace=True) + return add_array( + multiply(sharp, soft_mask), + multiply(image, 1 - soft_mask), + inplace=True, + ) @preserve_channel_dim -def pixel_dropout(image: np.ndarray, drop_mask: np.ndarray, drop_value: float | Sequence[float]) -> np.ndarray: +def pixel_dropout( + image: np.ndarray, + drop_mask: np.ndarray, + drop_value: float | Sequence[float], +) -> np.ndarray: if isinstance(drop_value, (int, float)) and drop_value == 0: drop_values = np.zeros_like(image) else: @@ -1660,8 +1761,14 @@ def chromatic_aberration( camera_mat[1, 2] = height / 2.0 # Build distortion coefficients - distortion_coeffs_red = np.array([primary_distortion_red, secondary_distortion_red, 0, 0], dtype=np.float32) - distortion_coeffs_blue = np.array([primary_distortion_blue, secondary_distortion_blue, 0, 0], dtype=np.float32) + distortion_coeffs_red = np.array( + [primary_distortion_red, secondary_distortion_red, 0, 0], + dtype=np.float32, + ) + distortion_coeffs_blue = np.array( + [primary_distortion_blue, secondary_distortion_blue, 0, 0], + dtype=np.float32, + ) # Distort the red and blue channels red_distorted = _distort_channel( @@ -1719,7 +1826,11 @@ def dilate(img: np.ndarray, kernel: np.ndarray) -> np.ndarray: return cv2.dilate(img, kernel, iterations=1) -def morphology(img: np.ndarray, kernel: np.ndarray, operation: Literal["dilation", "erosion"]) -> np.ndarray: +def morphology( + img: np.ndarray, + kernel: np.ndarray, + operation: Literal["dilation", "erosion"], +) -> np.ndarray: if operation == "dilation": return dilate(img, kernel) if operation == "erosion": @@ -1799,7 +1910,11 @@ def bboxes_morphology( @clipped -def planckian_jitter(img: np.ndarray, temperature: int, mode: Literal["blackbody", "cied"]) -> np.ndarray: +def planckian_jitter( + img: np.ndarray, + temperature: int, + mode: Literal["blackbody", "cied"], +) -> np.ndarray: img = img.copy() # Get the min and max temperatures for the given mode min_temp = min(PLANCKIAN_COEFFS[mode].keys()) @@ -1810,8 +1925,14 @@ def planckian_jitter(img: np.ndarray, temperature: int, mode: Literal["blackbody # Linearly interpolate between 2 closest temperatures step = 500 - t_left = max((temperature // step) * step, min_temp) # Ensure t_left doesn't go below min_temp - t_right = min((temperature // step + 1) * step, max_temp) # Ensure t_right doesn't exceed max_temp + t_left = max( + (temperature // step) * step, + min_temp, + ) # Ensure t_left doesn't go below min_temp + t_right = min( + (temperature // step + 1) * step, + max_temp, + ) # Ensure t_right doesn't exceed max_temp # Handle the case where temperature is at or near min_temp or max_temp if t_left == t_right: @@ -1819,10 +1940,20 @@ def planckian_jitter(img: np.ndarray, temperature: int, mode: Literal["blackbody else: w_right = (temperature - t_left) / (t_right - t_left) w_left = 1 - w_right - coeffs = w_left * np.array(PLANCKIAN_COEFFS[mode][t_left]) + w_right * np.array(PLANCKIAN_COEFFS[mode][t_right]) + coeffs = w_left * np.array(PLANCKIAN_COEFFS[mode][t_left]) + w_right * np.array( + PLANCKIAN_COEFFS[mode][t_right], + ) - img[:, :, 0] = multiply_by_constant(img[:, :, 0], coeffs[0] / coeffs[1], inplace=True) - img[:, :, 2] = multiply_by_constant(img[:, :, 2], coeffs[2] / coeffs[1], inplace=True) + img[:, :, 0] = multiply_by_constant( + img[:, :, 0], + coeffs[0] / coeffs[1], + inplace=True, + ) + img[:, :, 2] = multiply_by_constant( + img[:, :, 2], + coeffs[2] / coeffs[1], + inplace=True, + ) return img @@ -1832,7 +1963,12 @@ def add_noise(img: np.ndarray, noise: np.ndarray) -> np.ndarray: return add(img, noise, inplace=False) -def slic(image: np.ndarray, n_segments: int, compactness: float = 10.0, max_iterations: int = 10) -> np.ndarray: +def slic( + image: np.ndarray, + n_segments: int, + compactness: float = 10.0, + max_iterations: int = 10, +) -> np.ndarray: """Simple Linear Iterative Clustering (SLIC) superpixel segmentation using OpenCV and NumPy. Args: @@ -1857,7 +1993,9 @@ def slic(image: np.ndarray, n_segments: int, compactness: float = 10.0, max_iter grid_step = int((num_pixels / n_segments) ** 0.5) x_range = np.arange(grid_step // 2, width, grid_step) y_range = np.arange(grid_step // 2, height, grid_step) - centers = np.array([(x, y) for y in y_range for x in x_range if x < width and y < height]) + centers = np.array( + [(x, y) for y in y_range for x in x_range if x < width and y < height], + ) # Initialize labels and distances labels = -1 * np.ones((height, width), dtype=np.int32) @@ -1896,7 +2034,11 @@ def slic(image: np.ndarray, n_segments: int, compactness: float = 10.0, max_iter @preserve_channel_dim @float32_io -def shot_noise(img: np.ndarray, scale: float, random_generator: np.random.Generator) -> np.ndarray: +def shot_noise( + img: np.ndarray, + scale: float, + random_generator: np.random.Generator, +) -> np.ndarray: """Apply shot noise to the image by simulating photon counting in linear light space. This function simulates photon shot noise, which occurs due to the quantum nature of light. @@ -1944,7 +2086,11 @@ def shot_noise(img: np.ndarray, scale: float, random_generator: np.random.Genera scaled_img = (img_linear + scale * 1e-6) / scale # Generate Poisson noise - noisy_img = multiply_by_constant(random_generator.poisson(scaled_img).astype(np.float32), scale, inplace=True) + noisy_img = multiply_by_constant( + random_generator.poisson(scaled_img).astype(np.float32), + scale, + inplace=True, + ) # Scale back and apply gamma correction return power(np.clip(noisy_img, 0, 1, out=noisy_img), 1 / 2.2) @@ -1996,12 +2142,30 @@ def generate_noise( return np.zeros(shape, dtype=np.float32) """Generate noise with optional approximation for speed.""" if spatial_mode == "constant": - return generate_constant_noise(noise_type, shape, params, max_value, random_generator) + return generate_constant_noise( + noise_type, + shape, + params, + max_value, + random_generator, + ) if approximation == 1.0: if spatial_mode == "shared": - return generate_shared_noise(noise_type, shape, params, max_value, random_generator) - return generate_per_pixel_noise(noise_type, shape, params, max_value, random_generator) + return generate_shared_noise( + noise_type, + shape, + params, + max_value, + random_generator, + ) + return generate_per_pixel_noise( + noise_type, + shape, + params, + max_value, + random_generator, + ) # Calculate reduced size for noise generation height, width = shape[:2] @@ -2011,9 +2175,21 @@ def generate_noise( # Generate noise at reduced resolution if spatial_mode == "shared": - noise = generate_shared_noise(noise_type, reduced_shape, params, max_value, random_generator) + noise = generate_shared_noise( + noise_type, + reduced_shape, + params, + max_value, + random_generator, + ) else: # per_pixel - noise = generate_per_pixel_noise(noise_type, reduced_shape, params, max_value, random_generator) + noise = generate_per_pixel_noise( + noise_type, + reduced_shape, + params, + max_value, + random_generator, + ) # Resize noise to original size using existing resize function return fgeometric.resize(noise, (height, width), interpolation=cv2.INTER_LINEAR) @@ -2028,7 +2204,13 @@ def generate_constant_noise( ) -> np.ndarray: """Generate one value per channel.""" num_channels = shape[-1] if len(shape) > MONO_CHANNEL_DIMENSIONS else 1 - return sample_noise(noise_type, (num_channels,), params, max_value, random_generator) + return sample_noise( + noise_type, + (num_channels,), + params, + max_value, + random_generator, + ) def generate_per_pixel_noise( @@ -2086,23 +2268,35 @@ def sample_uniform( if len(ranges) == 1: ranges = ranges * num_channels elif len(ranges) < num_channels: - raise ValueError(f"Not enough ranges provided. Expected {num_channels}, got {len(ranges)}") + raise ValueError( + f"Not enough ranges provided. Expected {num_channels}, got {len(ranges)}", + ) - return np.array([random_generator.uniform(low, high) for low, high in ranges[:num_channels]]) + return np.array( + [random_generator.uniform(low, high) for low, high in ranges[:num_channels]], + ) # use first range for spatial noise low, high = params["ranges"][0] return random_generator.uniform(low, high, size=size) -def sample_gaussian(size: tuple[int, ...], params: dict[str, Any], random_generator: np.random.Generator) -> np.ndarray: +def sample_gaussian( + size: tuple[int, ...], + params: dict[str, Any], + random_generator: np.random.Generator, +) -> np.ndarray: """Sample from Gaussian distribution.""" mean = random_generator.uniform(*params["mean_range"]) std = random_generator.uniform(*params["std_range"]) return random_generator.normal(mean, std, size=size) -def sample_laplace(size: tuple[int, ...], params: dict[str, Any], random_generator: np.random.Generator) -> np.ndarray: +def sample_laplace( + size: tuple[int, ...], + params: dict[str, Any], + random_generator: np.random.Generator, +) -> np.ndarray: """Sample from Laplace distribution. The Laplace distribution is also known as the double exponential distribution. @@ -2113,7 +2307,11 @@ def sample_laplace(size: tuple[int, ...], params: dict[str, Any], random_generat return random_generator.laplace(loc=loc, scale=scale, size=size) -def sample_beta(size: tuple[int, ...], params: dict[str, Any], random_generator: np.random.Generator) -> np.ndarray: +def sample_beta( + size: tuple[int, ...], + params: dict[str, Any], + random_generator: np.random.Generator, +) -> np.ndarray: """Sample from Beta distribution. The Beta distribution is bounded by [0, 1] and then scaled and shifted to [-scale, scale]. @@ -2150,7 +2348,13 @@ def generate_shared_noise( """ # Generate noise for (H, W) height, width = shape[:2] - noise_map = sample_noise(noise_type, (height, width), params, max_value, random_generator) + noise_map = sample_noise( + noise_type, + (height, width), + params, + max_value, + random_generator, + ) # If input is multichannel, broadcast noise to all channels if len(shape) > MONO_CHANNEL_DIMENSIONS: @@ -2159,9 +2363,19 @@ def generate_shared_noise( @preserve_channel_dim -def sharpen_gaussian(img: np.ndarray, alpha: float, kernel_size: int, sigma: float) -> np.ndarray: +def sharpen_gaussian( + img: np.ndarray, + alpha: float, + kernel_size: int, + sigma: float, +) -> np.ndarray: """Sharpen image using Gaussian blur.""" - blurred = cv2.GaussianBlur(img, ksize=(kernel_size, kernel_size), sigmaX=sigma, sigmaY=sigma) + blurred = cv2.GaussianBlur( + img, + ksize=(kernel_size, kernel_size), + sigmaX=sigma, + sigmaY=sigma, + ) return add_weighted(blurred, 1 - alpha, img, alpha) @@ -2192,12 +2406,20 @@ def get_grid_size(size: int, target_shape: tuple[int, int]) -> int: return 2 ** int(np.ceil(np.log2(max(size, *target_shape)))) -def random_offset(current_size: int, total_size: int, roughness: float, random_generator: np.random.Generator) -> float: +def random_offset( + current_size: int, + total_size: int, + roughness: float, + random_generator: np.random.Generator, +) -> float: """Calculate random offset based on current grid size.""" return (random_generator.random() - 0.5) * (current_size / total_size) ** (roughness / 2) -def initialize_grid(grid_size: int, random_generator: np.random.Generator) -> np.ndarray: +def initialize_grid( + grid_size: int, + random_generator: np.random.Generator, +) -> np.ndarray: """Initialize grid with random corners.""" pattern = np.zeros((grid_size + 1, grid_size + 1), dtype=np.float32) for corner in [(0, 0), (0, -1), (-1, 0), (-1, -1)]: @@ -2221,7 +2443,12 @@ def square_step( pattern[y + step, x], # bottom-left pattern[y + step, x + step], # bottom-right ] - return sum(corners) / 4.0 + random_offset(step, grid_size, roughness, random_generator) + return sum(corners) / 4.0 + random_offset( + step, + grid_size, + roughness, + random_generator, + ) def diamond_step( @@ -2244,7 +2471,12 @@ def diamond_step( if x + half <= grid_size: points.append(pattern[y, x + half]) - return sum(points) / len(points) + random_offset(half * 2, grid_size, roughness, random_generator) + return sum(points) / len(points) + random_offset( + half * 2, + grid_size, + roughness, + random_generator, + ) def generate_plasma_pattern( @@ -2296,7 +2528,15 @@ def generate_plasma_pattern( # Diamond step for y in range(0, grid_size + 1, half_step): for x in range((y + half_step) % step_size, grid_size + 1, step_size): - pattern[y, x] = diamond_step(pattern, y, x, half_step, grid_size, roughness, random_generator) + pattern[y, x] = diamond_step( + pattern, + y, + x, + half_step, + grid_size, + roughness, + random_generator, + ) step_size = half_step @@ -2479,7 +2719,9 @@ def apply_gaussian_illumination( center_x = width * center[0] center_y = height * center[1] sigma_pixels = max(height, width) * sigma - gaussian = np.exp(-((x - center_x) ** 2 + (y - center_y) ** 2) / (2 * sigma_pixels**2)) + gaussian = np.exp( + -((x - center_x) ** 2 + (y - center_y) ** 2) / (2 * sigma_pixels**2), + ) return apply_illumination_pattern(result, gaussian, intensity) diff --git a/albumentations/augmentations/transforms.py b/albumentations/augmentations/transforms.py index 128881193..ce9573427 100644 --- a/albumentations/augmentations/transforms.py +++ b/albumentations/augmentations/transforms.py @@ -2079,8 +2079,8 @@ class Posterize(ImageOnlyTransform): Args: num_bits (int | tuple[int, int] | list[int] | list[tuple[int, int]]): Defines the number of bits to keep for each color channel. Can be specified in several ways: - - Single int: Same number of bits for all channels. Range: [0, 8]. - - tuple of two ints: (min_bits, max_bits) to randomly choose from. Range for each: [0, 8]. + - Single int: Same number of bits for all channels. Range: [1, 8]. + - tuple of two ints: (min_bits, max_bits) to randomly choose from. Range for each: [1, 8]. - list of three ints: Specific number of bits for each channel [r_bits, g_bits, b_bits]. - list of three tuples: Ranges for each channel [(r_min, r_max), (g_min, g_max), (b_min, b_max)]. Default: 4 @@ -2139,10 +2139,7 @@ class Posterize(ImageOnlyTransform): """ class InitSchema(BaseTransformInitSchema): - num_bits: Annotated[ - int | tuple[int, int] | list[tuple[int, int]], - Field(default=4, description="Number of high bits"), - ] + num_bits: int | tuple[int, int] | list[tuple[int, int]] @field_validator("num_bits") @classmethod @@ -2151,10 +2148,10 @@ def validate_num_bits( num_bits: Any, ) -> tuple[int, int] | list[tuple[int, int]]: if isinstance(num_bits, int): - return to_tuple(num_bits, num_bits) - if isinstance(num_bits, Sequence): - return [to_tuple(i, 0) for i in num_bits] - return cast(tuple[int, int], to_tuple(num_bits, 0)) + return (num_bits, num_bits) + if isinstance(num_bits, Sequence) and len(num_bits) > PAIR: + return [to_tuple(i, i) for i in num_bits] + return cast(tuple[int, int], to_tuple(num_bits, num_bits)) def __init__( self, @@ -2163,21 +2160,16 @@ def __init__( always_apply: bool | None = None, ): super().__init__(p=p, always_apply=always_apply) - self.num_bits = cast(Union[tuple[int, ...], list[tuple[int, ...]]], num_bits) + self.num_bits = cast(Union[tuple[int, int], list[tuple[int, int]]], num_bits) def apply(self, img: np.ndarray, num_bits: int, **params: Any) -> np.ndarray: return fmain.posterize(img, num_bits) def get_params(self) -> dict[str, Any]: if isinstance(self.num_bits, list): - num_bits = [self.py_random.randint(int(i[0]), int(i[1])) for i in self.num_bits] + num_bits = [self.py_random.randint(*i) for i in self.num_bits] return {"num_bits": num_bits} - return { - "num_bits": self.py_random.randint( - int(self.num_bits[0]), - int(self.num_bits[1]), - ), - } + return {"num_bits": self.py_random.randint(*self.num_bits)} def get_transform_init_args_names(self) -> tuple[str, ...]: return ("num_bits",) diff --git a/tests/aug_definitions.py b/tests/aug_definitions.py index f35fb1cfe..aeb7a97fc 100644 --- a/tests/aug_definitions.py +++ b/tests/aug_definitions.py @@ -21,10 +21,20 @@ [A.MotionBlur, {"blur_limit": 3}], [A.MedianBlur, {"blur_limit": 3}], [A.GaussianBlur, {"blur_limit": 3}], - [A.GaussNoise, {"std_range": (0.2, 0.44), "mean_range": (0.0, 0.0), "per_channel": False}], + [ + A.GaussNoise, + {"std_range": (0.2, 0.44), "mean_range": (0.0, 0.0), "per_channel": False}, + ], [A.CLAHE, {"clip_limit": 2, "tile_grid_size": (12, 12)}], [A.RandomGamma, {"gamma_limit": (10, 90)}], - [A.CoarseDropout, {"num_holes_range": (2, 5), "hole_height_range": (3, 4), "hole_width_range": (4, 6)}], + [ + A.CoarseDropout, + { + "num_holes_range": (2, 5), + "hole_height_range": (3, 4), + "hole_width_range": (4, 6), + }, + ], [ A.RandomSnow, {"snow_point_range": (0.2, 0.4), "brightness_coeff": 4}, @@ -163,7 +173,7 @@ [A.LongestMaxSize, {"max_size": 128, "interpolation": cv2.INTER_CUBIC}], [A.RandomGridShuffle, {"grid": (4, 4)}], [A.Solarize, {"threshold": 32}], - [A.Posterize, {"num_bits": 1}], + [A.Posterize, {"num_bits": (3, 5)}], [A.Equalize, {"mode": "pil", "by_channels": False}], [ A.MultiplicativeNoise, @@ -364,7 +374,21 @@ font_path="./tests/filesLiberationSerif-Bold.ttf", font_size_range=(0.8, 0.9), color="red", - stopwords=["a", "the", "is", "of", "it", "and", "to", "in", "on", "with", "for", "at", "by"], + stopwords=[ + "a", + "the", + "is", + "of", + "it", + "and", + "to", + "in", + "on", + "with", + "for", + "at", + "by", + ], ), ], [A.GridElasticDeform, {"num_grid_xy": (10, 10), "magnitude": 10}], diff --git a/tests/test_serialization.py b/tests/test_serialization.py index b243d3e5c..0813a971a 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -12,7 +12,13 @@ from albumentations.core.serialization import SERIALIZABLE_REGISTRY, shorten_class_name from albumentations.core.transforms_interface import ImageOnlyTransform from tests.aug_definitions import AUGMENTATION_CLS_PARAMS -from tests.conftest import FLOAT32_IMAGES, IMAGES, SQUARE_FLOAT_IMAGE, SQUARE_UINT8_IMAGE, UINT8_IMAGES +from tests.conftest import ( + FLOAT32_IMAGES, + IMAGES, + SQUARE_FLOAT_IMAGE, + SQUARE_UINT8_IMAGE, + UINT8_IMAGES, +) from .utils import ( OpenMock, @@ -235,7 +241,9 @@ def test_augmentations_for_bboxes_serialization( seed, albumentations_bboxes, ): - image = SQUARE_FLOAT_IMAGE if augmentation_cls == A.FromFloat else SQUARE_UINT8_IMAGE + image = ( + SQUARE_FLOAT_IMAGE if augmentation_cls == A.FromFloat else SQUARE_UINT8_IMAGE + ) aug = augmentation_cls(p=p, **params) aug.set_random_seed(seed) data = {"image": image, "bboxes": albumentations_bboxes} @@ -299,8 +307,12 @@ def test_augmentations_for_bboxes_serialization( ) @pytest.mark.parametrize("p", [0.5, 1]) @pytest.mark.parametrize("seed", TEST_SEEDS) -def test_augmentations_for_keypoints_serialization(augmentation_cls, params, p, seed, keypoints): - image = SQUARE_FLOAT_IMAGE if augmentation_cls == A.FromFloat else SQUARE_UINT8_IMAGE +def test_augmentations_for_keypoints_serialization( + augmentation_cls, params, p, seed, keypoints +): + image = ( + SQUARE_FLOAT_IMAGE if augmentation_cls == A.FromFloat else SQUARE_UINT8_IMAGE + ) aug = augmentation_cls(p=p, **params) aug.set_random_seed(seed) data = {"image": image, "keypoints": keypoints} @@ -315,7 +327,9 @@ def test_augmentations_for_keypoints_serialization(augmentation_cls, params, p, aug_data = aug(**data) deserialized_aug_data = deserialized_aug(**data) np.testing.assert_array_equal(aug_data["image"], deserialized_aug_data["image"]) - np.testing.assert_array_equal(aug_data["keypoints"], deserialized_aug_data["keypoints"]) + np.testing.assert_array_equal( + aug_data["keypoints"], deserialized_aug_data["keypoints"] + ) @pytest.mark.parametrize( @@ -370,7 +384,9 @@ def test_transform_pipeline_serialization(seed, image): A.Compose( [ A.Resize(1024, 1024), - A.RandomSizedCrop(min_max_height=(256, 1024), size=(512, 512), p=1), + A.RandomSizedCrop( + min_max_height=(256, 1024), size=(512, 512), p=1 + ), A.OneOf( [ A.RandomSizedCrop( @@ -390,7 +406,9 @@ def test_transform_pipeline_serialization(seed, image): A.Compose( [ A.Resize(1024, 1024), - A.RandomSizedCrop(min_max_height=(256, 1025), size=(256, 256), p=1), + A.RandomSizedCrop( + min_max_height=(256, 1025), size=(256, 256), p=1 + ), A.OneOf([A.HueSaturationValue(p=0.5), A.RGBShift(p=0.7)], p=1), ], ), @@ -430,7 +448,9 @@ def test_transform_pipeline_serialization(seed, image): ) @pytest.mark.parametrize("seed", TEST_SEEDS) @pytest.mark.parametrize("image", IMAGES) -def test_transform_pipeline_serialization_with_bboxes(seed, image, bboxes, bbox_format, labels): +def test_transform_pipeline_serialization_with_bboxes( + seed, image, bboxes, bbox_format, labels +): aug = A.Compose( [ A.OneOrOther( @@ -480,7 +500,9 @@ def test_transform_pipeline_serialization_with_bboxes(seed, image, bboxes, bbox_ ) @pytest.mark.parametrize("seed", TEST_SEEDS) @pytest.mark.parametrize("image", IMAGES) -def test_transform_pipeline_serialization_with_keypoints(seed, image, keypoints, keypoint_format, labels): +def test_transform_pipeline_serialization_with_keypoints( + seed, image, keypoints, keypoint_format, labels +): aug = A.Compose( [ A.OneOrOther( @@ -517,10 +539,14 @@ def test_transform_pipeline_serialization_with_keypoints(seed, image, keypoints, deserialized_aug.set_random_seed(seed) aug_data = aug(image=image, keypoints=keypoints, labels=labels) - deserialized_aug_data = deserialized_aug(image=image, keypoints=keypoints, labels=labels) + deserialized_aug_data = deserialized_aug( + image=image, keypoints=keypoints, labels=labels + ) np.testing.assert_array_equal(aug_data["image"], deserialized_aug_data["image"]) - np.testing.assert_array_equal(aug_data["keypoints"], deserialized_aug_data["keypoints"]) + np.testing.assert_array_equal( + aug_data["keypoints"], deserialized_aug_data["keypoints"] + ) @pytest.mark.parametrize( @@ -536,19 +562,24 @@ def test_transform_pipeline_serialization_with_keypoints(seed, image, keypoints, ), ) @pytest.mark.parametrize("seed", TEST_SEEDS) -def test_additional_targets_for_image_only_serialization(augmentation_cls, params, seed): - image = SQUARE_FLOAT_IMAGE if augmentation_cls == A.FromFloat else SQUARE_UINT8_IMAGE +def test_additional_targets_for_image_only_serialization( + augmentation_cls, params, seed +): + image = ( + SQUARE_FLOAT_IMAGE if augmentation_cls == A.FromFloat else SQUARE_UINT8_IMAGE + ) aug = A.Compose( [augmentation_cls(p=1.0, **params)], additional_targets={"image2": "image"}, + seed=seed, ) - aug.set_random_seed(seed) image2 = image.copy() serialized_aug = A.to_dict(aug) deserialized_aug = A.from_dict(serialized_aug) deserialized_aug.set_random_seed(seed) + aug_data = aug(image=image, image2=image2) deserialized_aug_data = deserialized_aug(image=image, image2=image2) @@ -586,21 +617,32 @@ def vflip_keypoint(keypoints, **kwargs): serialized_aug = A.to_dict(aug) deserialized_aug = A.from_dict(serialized_aug, nonserializable={"vflip": aug}) deserialized_aug.set_random_seed(seed) - aug_data = aug(image=image, mask=mask, bboxes=albumentations_bboxes, keypoints=keypoints) - deserialized_aug_data = deserialized_aug(image=image, mask=mask, bboxes=albumentations_bboxes, keypoints=keypoints) + aug_data = aug( + image=image, mask=mask, bboxes=albumentations_bboxes, keypoints=keypoints + ) + deserialized_aug_data = deserialized_aug( + image=image, mask=mask, bboxes=albumentations_bboxes, keypoints=keypoints + ) np.testing.assert_array_equal(aug_data["image"], deserialized_aug_data["image"]) np.testing.assert_array_equal(aug_data["mask"], deserialized_aug_data["mask"]) np.testing.assert_array_equal(aug_data["bboxes"], deserialized_aug_data["bboxes"]) - np.testing.assert_array_equal(aug_data["keypoints"], deserialized_aug_data["keypoints"]) + np.testing.assert_array_equal( + aug_data["keypoints"], deserialized_aug_data["keypoints"] + ) @pytest.mark.parametrize( "transform_file_name", - ["transform_v1.1.0_without_totensor.json", "transform_serialization_v2_without_totensor.json"], + [ + "transform_v1.1.0_without_totensor.json", + "transform_serialization_v2_without_totensor.json", + ], ) @pytest.mark.parametrize("data_format", ("yaml", "json")) @pytest.mark.parametrize("seed", TEST_SEEDS) -def test_serialization_conversion_without_totensor(transform_file_name, data_format, seed): +def test_serialization_conversion_without_totensor( + transform_file_name, data_format, seed +): image = SQUARE_UINT8_IMAGE # Step 1: Load transform from file @@ -621,22 +663,34 @@ def test_serialization_conversion_without_totensor(transform_file_name, data_for buffer.close() assert ( - DeepDiff(transform.to_dict(), transform_from_buffer.to_dict(), ignore_type_in_groups=[(tuple, list)]) == {} + DeepDiff( + transform.to_dict(), + transform_from_buffer.to_dict(), + ignore_type_in_groups=[(tuple, list)], + ) + == {} ), f"The loaded transform is not equal to the original one {DeepDiff(transform.to_dict(), transform_from_buffer.to_dict(), ignore_type_in_groups=[(tuple, list)])}" image1 = transform(image=image)["image"] image2 = transform_from_buffer(image=image)["image"] - assert np.array_equal(image1, image2), f"The transformed images are not equal {(image1 - image2).mean()}" + assert np.array_equal( + image1, image2 + ), f"The transformed images are not equal {(image1 - image2).mean()}" @pytest.mark.parametrize( "transform_file_name", - ["transform_v1.1.0_with_totensor.json", "transform_serialization_v2_with_totensor.json"], + [ + "transform_v1.1.0_with_totensor.json", + "transform_serialization_v2_with_totensor.json", + ], ) @pytest.mark.parametrize("data_format", ("yaml", "json")) @pytest.mark.parametrize("seed", TEST_SEEDS) -def test_serialization_conversion_with_totensor(transform_file_name: str, data_format: str, seed: int) -> None: +def test_serialization_conversion_with_totensor( + transform_file_name: str, data_format: str, seed: int +) -> None: image = SQUARE_UINT8_IMAGE # Load transform from file @@ -658,13 +712,21 @@ def test_serialization_conversion_with_totensor(transform_file_name: str, data_f buffer.close() # Ensure the buffer is closed after use assert ( - DeepDiff(transform.to_dict(), transform_from_buffer.to_dict(), ignore_type_in_groups=[(tuple, list)]) == {} + DeepDiff( + transform.to_dict(), + transform_from_buffer.to_dict(), + ignore_type_in_groups=[(tuple, list)], + ) + == {} ), f"The loaded transform is not equal to the original one {DeepDiff(transform.to_dict(), transform_from_buffer.to_dict(), ignore_type_in_groups=[(tuple, list)])}" image1 = transform(image=image)["image"] image2 = transform_from_buffer(image=image)["image"] - np.testing.assert_array_equal(image1, image2), f"The transformed images are not equal {(image1 - image2).mean()}" + ( + np.testing.assert_array_equal(image1, image2), + f"The transformed images are not equal {(image1 - image2).mean()}", + ) def test_custom_transform_with_overlapping_name(): @@ -672,7 +734,10 @@ class HorizontalFlip(ImageOnlyTransform): pass assert SERIALIZABLE_REGISTRY["HorizontalFlip"] == A.HorizontalFlip - assert SERIALIZABLE_REGISTRY["tests.test_serialization.HorizontalFlip"] == HorizontalFlip + assert ( + SERIALIZABLE_REGISTRY["tests.test_serialization.HorizontalFlip"] + == HorizontalFlip + ) def test_serialization_v2_to_dict() -> None: @@ -703,14 +768,18 @@ def test_shorten_class_name(class_fullname, expected_short_class_name): @pytest.mark.parametrize("seed", TEST_SEEDS) @pytest.mark.parametrize("p", [1]) -def test_template_transform_serialization(template: np.ndarray, seed: int, p: float) -> None: +def test_template_transform_serialization( + template: np.ndarray, seed: int, p: float +) -> None: image = SQUARE_UINT8_IMAGE template_transform = A.TemplateTransform(name="template", templates=template, p=p) aug = A.Compose([A.HorizontalFlip(p=1), template_transform, A.Blur(p=1)]) aug.set_random_seed(seed) serialized_aug = A.to_dict(aug) - deserialized_aug = A.from_dict(serialized_aug, nonserializable={"template": template_transform}) + deserialized_aug = A.from_dict( + serialized_aug, nonserializable={"template": template_transform} + ) deserialized_aug.set_random_seed(seed) aug_data = aug(image=image) deserialized_aug_data = deserialized_aug(image=image) @@ -758,7 +827,9 @@ def test_template_transform_serialization(template: np.ndarray, seed: int, p: fl }, ), ) -def test_augmentations_serialization(augmentation_cls: A.BasicTransform, params: Dict[str, Any]) -> None: +def test_augmentations_serialization( + augmentation_cls: A.BasicTransform, params: Dict[str, Any] +) -> None: instance = augmentation_cls(**params) def get_all_init_schema_fields(model_cls: A.BasicTransform) -> Set[str]: @@ -776,11 +847,12 @@ def get_all_init_schema_fields(model_cls: A.BasicTransform) -> Set[str]: if hasattr(model_cls, "InitSchema"): for field_name, field in model_cls.InitSchema.model_fields.items(): # Check if field is deprecated either directly or in its default annotation - is_deprecated = ( - field.deprecated is not None - or (hasattr(field.default, "metadata") - and any(getattr(m, "deprecated", None) is not None - for m in field.default.metadata)) + is_deprecated = field.deprecated is not None or ( + hasattr(field.default, "metadata") + and any( + getattr(m, "deprecated", None) is not None + for m in field.default.metadata + ) ) if not is_deprecated: fields.add(field_name) From d7d0f4b2aa962491734dbc5b1eb3a5745121df2b Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Mon, 2 Dec 2024 16:20:41 -0800 Subject: [PATCH 5/7] Fix in min value for Posterize --- albumentations/augmentations/transforms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/albumentations/augmentations/transforms.py b/albumentations/augmentations/transforms.py index ce9573427..5565c266a 100644 --- a/albumentations/augmentations/transforms.py +++ b/albumentations/augmentations/transforms.py @@ -70,6 +70,7 @@ NoOp, ) from albumentations.core.types import ( + EIGHT, MAX_RAIN_ANGLE, MONO_CHANNEL_DIMENSIONS, NUM_RGB_CHANNELS, @@ -2148,6 +2149,8 @@ def validate_num_bits( num_bits: Any, ) -> tuple[int, int] | list[tuple[int, int]]: if isinstance(num_bits, int): + if num_bits < 1 or num_bits > EIGHT: + raise ValueError("num_bits must be in the range [1, 8]") return (num_bits, num_bits) if isinstance(num_bits, Sequence) and len(num_bits) > PAIR: return [to_tuple(i, i) for i in num_bits] From 288056c0d5feec10ec48c6e5d32b57edf42fa4e4 Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Mon, 2 Dec 2024 18:17:02 -0800 Subject: [PATCH 6/7] Tests for AutoContrast --- albumentations/augmentations/functional.py | 16 +- tests/functional/test_functional.py | 438 ++++++++++++++++----- 2 files changed, 351 insertions(+), 103 deletions(-) diff --git a/albumentations/augmentations/functional.py b/albumentations/augmentations/functional.py index 263000855..a00c19063 100644 --- a/albumentations/augmentations/functional.py +++ b/albumentations/augmentations/functional.py @@ -2747,9 +2747,7 @@ def auto_contrast(img: np.ndarray) -> np.ndarray: 4. Uses lookup table for scaling """ result = img.copy() - num_channels = get_num_channels(img) - max_value = MAX_VALUES_BY_DTYPE[img.dtype] for i in range(num_channels): @@ -2761,17 +2759,21 @@ def auto_contrast(img: np.ndarray) -> np.ndarray: # Calculate cumulative distribution cdf = hist.cumsum() - min_value = cdf.min() - max_value = cdf.max() + # Find the minimum and maximum non-zero values in the CDF + if cdf[cdf > 0].size == 0: + continue # Skip if the channel is constant or empty + + cdf_min = cdf[cdf > 0].min() + cdf_max = cdf.max() - if min_value == max_value: + if cdf_min == cdf_max: continue # Normalize CDF - cdf = (cdf - min_value) * max_value / (max_value - min_value + 1e-6) + cdf = (cdf - cdf_min) * max_value / (cdf_max - cdf_min) # Create lookup table - lut = clip(np.around(cdf), np.uint8) + lut = np.clip(np.around(cdf), 0, max_value).astype(np.uint8) # Apply lookup table if img.ndim > MONO_CHANNEL_DIMENSIONS: diff --git a/tests/functional/test_functional.py b/tests/functional/test_functional.py index b6242fc61..aa19a83fa 100644 --- a/tests/functional/test_functional.py +++ b/tests/functional/test_functional.py @@ -3,25 +3,43 @@ import cv2 import numpy as np import pytest -from albucore import MAX_VALUES_BY_DTYPE, clip, get_num_channels, is_multispectral_image, to_float +from albucore import ( + MAX_VALUES_BY_DTYPE, + clip, + get_num_channels, + is_multispectral_image, + to_float, +) from numpy.testing import assert_array_almost_equal_nulp import albumentations.augmentations.functional as F import albumentations.augmentations.geometric.functional as fgeometric from albumentations.core.types import d4_group_elements -from tests.conftest import IMAGES, RECTANGULAR_IMAGES, RECTANGULAR_UINT8_IMAGE, SQUARE_UINT8_IMAGE, UINT8_IMAGES +from tests.conftest import ( + IMAGES, + RECTANGULAR_IMAGES, + RECTANGULAR_UINT8_IMAGE, + SQUARE_UINT8_IMAGE, + UINT8_IMAGES, +) from tests.utils import convert_2d_to_target_format from copy import deepcopy -@pytest.mark.parametrize(["input_shape", "expected_shape"], [[(128, 64), (64, 128)], [(128, 64, 3), (64, 128, 3)]]) +@pytest.mark.parametrize( + ["input_shape", "expected_shape"], + [[(128, 64), (64, 128)], [(128, 64, 3), (64, 128, 3)]], +) def test_transpose(input_shape, expected_shape): img = np.random.randint(low=0, high=256, size=input_shape, dtype=np.uint8) transposed = fgeometric.transpose(img) assert transposed.shape == expected_shape -@pytest.mark.parametrize(["input_shape", "expected_shape"], [[(128, 64), (64, 128)], [(128, 64, 3), (64, 128, 3)]]) +@pytest.mark.parametrize( + ["input_shape", "expected_shape"], + [[(128, 64), (64, 128)], [(128, 64, 3), (64, 128, 3)]], +) def test_transpose_float(input_shape, expected_shape): img = np.random.uniform(low=0.0, high=1.0, size=input_shape).astype("float32") transposed = fgeometric.transpose(img) @@ -39,8 +57,12 @@ def test_rot90(target): @pytest.mark.parametrize("target", ["image", "image_4_channels"]) def test_rot90_float(target): - img = np.array([[0.0, 0.0, 0.4], [0.0, 0.0, 0.4], [0.0, 0.0, 0.4]], dtype=np.float32) - expected = np.array([[0.4, 0.4, 0.4], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], dtype=np.float32) + img = np.array( + [[0.0, 0.0, 0.4], [0.0, 0.0, 0.4], [0.0, 0.0, 0.4]], dtype=np.float32 + ) + expected = np.array( + [[0.4, 0.4, 0.4], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], dtype=np.float32 + ) img, expected = convert_2d_to_target_format([img, expected], target=target) rotated = fgeometric.rot90(img, factor=1) assert_array_almost_equal_nulp(rotated, expected) @@ -49,9 +71,13 @@ def test_rot90_float(target): @pytest.mark.parametrize("target", ["image", "mask"]) def test_pad(target): img = np.array([[1, 2], [3, 4]], dtype=np.uint8) - expected = np.array([[4, 3, 4, 3], [2, 1, 2, 1], [4, 3, 4, 3], [2, 1, 2, 1]], dtype=np.uint8) + expected = np.array( + [[4, 3, 4, 3], [2, 1, 2, 1], [4, 3, 4, 3], [2, 1, 2, 1]], dtype=np.uint8 + ) img, expected = convert_2d_to_target_format([img, expected], target=target) - padded = fgeometric.pad(img, min_height=4, min_width=4, border_mode=cv2.BORDER_REFLECT_101, value=None) + padded = fgeometric.pad( + img, min_height=4, min_width=4, border_mode=cv2.BORDER_REFLECT_101, value=None + ) assert np.array_equal(padded, expected) @@ -59,11 +85,18 @@ def test_pad(target): def test_pad_float(target): img = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32) expected = np.array( - [[0.4, 0.3, 0.4, 0.3], [0.2, 0.1, 0.2, 0.1], [0.4, 0.3, 0.4, 0.3], [0.2, 0.1, 0.2, 0.1]], + [ + [0.4, 0.3, 0.4, 0.3], + [0.2, 0.1, 0.2, 0.1], + [0.4, 0.3, 0.4, 0.3], + [0.2, 0.1, 0.2, 0.1], + ], dtype=np.float32, ) img, expected = convert_2d_to_target_format([img, expected], target=target) - padded_img = fgeometric.pad(img, min_height=4, min_width=4, value=None, border_mode=cv2.BORDER_REFLECT_101) + padded_img = fgeometric.pad( + img, min_height=4, min_width=4, value=None, border_mode=cv2.BORDER_REFLECT_101 + ) assert_array_almost_equal_nulp(padded_img, expected) @@ -126,26 +159,39 @@ def test_longest_max_size(target): expected = np.array([[2, 3], [6, 7], [10, 11]], dtype=np.uint8) img, expected = convert_2d_to_target_format([img, expected], target=target) - scaled = fgeometric.longest_max_size(img, max_size=3, interpolation=cv2.INTER_LINEAR) + scaled = fgeometric.longest_max_size( + img, max_size=3, interpolation=cv2.INTER_LINEAR + ) assert np.array_equal(scaled, expected) @pytest.mark.parametrize("target", ["image", "mask"]) def test_smallest_max_size(target): img = np.array( - [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]], + [ + [1, 2, 3, 4, 5, 6], + [7, 8, 9, 10, 11, 12], + [12, 13, 14, 15, 16, 17], + [18, 19, 20, 21, 22, 23], + ], dtype=np.uint8, ) - expected = np.array([[2, 4, 5, 7], [10, 11, 13, 14], [17, 19, 20, 22]], dtype=np.uint8) + expected = np.array( + [[2, 4, 5, 7], [10, 11, 13, 14], [17, 19, 20, 22]], dtype=np.uint8 + ) img, expected = convert_2d_to_target_format([img, expected], target=target) - scaled = fgeometric.smallest_max_size(img, max_size=3, interpolation=cv2.INTER_LINEAR) + scaled = fgeometric.smallest_max_size( + img, max_size=3, interpolation=cv2.INTER_LINEAR + ) assert np.array_equal(scaled, expected) @pytest.mark.parametrize("target", ["image", "mask"]) def test_resize_linear_interpolation(target): - img = np.array([[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]], dtype=np.uint8) + img = np.array( + [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]], dtype=np.uint8 + ) expected = np.array([[2, 2], [4, 4]], dtype=np.uint8) img, expected = convert_2d_to_target_format([img, expected], target=target) resized_img = fgeometric.resize(img, (2, 2), interpolation=cv2.INTER_LINEAR) @@ -157,7 +203,9 @@ def test_resize_linear_interpolation(target): @pytest.mark.parametrize("target", ["image", "mask"]) def test_resize_nearest_interpolation(target): - img = np.array([[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]], dtype=np.uint8) + img = np.array( + [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], [4, 4, 4, 4]], dtype=np.uint8 + ) expected = np.array([[1, 1], [3, 3]], dtype=np.uint8) img, expected = convert_2d_to_target_format([img, expected], target=target) resized_img = fgeometric.resize(img, (2, 2), interpolation=cv2.INTER_NEAREST) @@ -183,7 +231,12 @@ def test_resize_different_height_and_width(target): @pytest.mark.parametrize("target", ["image", "mask"]) def test_resize_default_interpolation_float(target): img = np.array( - [[0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2], [0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4]], + [ + [0.1, 0.1, 0.1, 0.1], + [0.2, 0.2, 0.2, 0.2], + [0.3, 0.3, 0.3, 0.3], + [0.4, 0.4, 0.4, 0.4], + ], dtype=np.float32, ) expected = np.array([[0.15, 0.15], [0.35, 0.35]], dtype=np.float32) @@ -198,7 +251,12 @@ def test_resize_default_interpolation_float(target): @pytest.mark.parametrize("target", ["image", "mask"]) def test_resize_nearest_interpolation_float(target): img = np.array( - [[0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2], [0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4]], + [ + [0.1, 0.1, 0.1, 0.1], + [0.2, 0.2, 0.2, 0.2], + [0.3, 0.3, 0.3, 0.3], + [0.4, 0.4, 0.4, 0.4], + ], dtype=np.float32, ) expected = np.array([[0.1, 0.1], [0.3, 0.3]], dtype=np.float32) @@ -243,7 +301,9 @@ def test_fun_max_size(): target_width = 256 img = np.empty((330, 49), dtype=np.uint8) - out = fgeometric.smallest_max_size(img, target_width, interpolation=cv2.INTER_LINEAR) + out = fgeometric.smallest_max_size( + img, target_width, interpolation=cv2.INTER_LINEAR + ) assert out.shape == (1724, target_width) @@ -336,7 +396,7 @@ def test_swap_tiles_on_image(img, tiles, mapping, expected): @pytest.mark.parametrize("image", IMAGES) -@pytest.mark.parametrize("threshold", [0.0, 1/3, 2/3, 1.0, 1.1]) +@pytest.mark.parametrize("threshold", [0.0, 1 / 3, 2 / 3, 1.0, 1.1]) def test_solarize(image, threshold): max_value = MAX_VALUES_BY_DTYPE[image.dtype] check_img = image.copy() @@ -377,7 +437,9 @@ def test_solarize(image, threshold): ), ], ) -def test_equalize_checks(img_shape, img_dtype, mask_shape, by_channels, expected_error, expected_message): +def test_equalize_checks( + img_shape, img_dtype, mask_shape, by_channels, expected_error, expected_message +): img = ( np.random.randint(0, 255, img_shape).astype(img_dtype) if img_dtype == np.uint8 @@ -416,7 +478,10 @@ def test_equalize_grayscale_mask(): mask = np.zeros([256, 256], dtype=bool) mask[:10, :10] = True - assert np.all(cv2.equalizeHist(img[:10, :10]) == F.equalize(img, mask=mask, mode="cv")[:10, :10]) + assert np.all( + cv2.equalizeHist(img[:10, :10]) + == F.equalize(img, mask=mask, mode="cv")[:10, :10] + ) def test_equalize_rgb_mask(): @@ -434,7 +499,9 @@ def test_equalize_rgb_mask(): img_cv = _img.copy()[:10, :10] img_cv[..., 0] = cv2.equalizeHist(img_cv[..., 0]) img_cv = cv2.cvtColor(img_cv, cv2.COLOR_YCrCb2RGB) - assert np.all(img_cv == F.equalize(img, mask=mask, mode="cv", by_channels=False)[:10, :10]) + assert np.all( + img_cv == F.equalize(img, mask=mask, mode="cv", by_channels=False)[:10, :10] + ) mask = np.zeros([256, 256, 3], dtype=bool) mask[:10, :10, 0] = True @@ -471,7 +538,10 @@ def test_downscale_random(): @pytest.mark.parametrize( "img", - [np.random.randint(0, 256, [100, 100], dtype=np.uint8), np.random.random([100, 100]).astype(np.float32)], + [ + np.random.randint(0, 256, [100, 100], dtype=np.uint8), + np.random.random([100, 100]).astype(np.float32), + ], ) def test_shift_hsv_gray(img): F.shift_hsv(img, 0.5, 0.5, 0.5) @@ -481,9 +551,15 @@ def test_shift_hsv_gray(img): "tiles, expected", [ # Simple case with two different shapes - (np.array([[0, 0, 2, 2], [0, 2, 2, 4], [2, 0, 4, 2], [2, 2, 4, 4]]), {(2, 2): [0, 1, 2, 3]}), + ( + np.array([[0, 0, 2, 2], [0, 2, 2, 4], [2, 0, 4, 2], [2, 2, 4, 4]]), + {(2, 2): [0, 1, 2, 3]}, + ), # Tiles with three different shapes - (np.array([[0, 0, 1, 3], [0, 3, 1, 6], [1, 0, 4, 3], [1, 3, 4, 6]]), {(1, 3): [0, 1], (3, 3): [2, 3]}), + ( + np.array([[0, 0, 1, 3], [0, 3, 1, 6], [1, 0, 4, 3], [1, 3, 4, 6]]), + {(1, 3): [0, 1], (3, 3): [2, 3]}, + ), # Single tile (np.array([[0, 0, 1, 1]]), {(1, 1): [0]}), # No tiles @@ -497,7 +573,9 @@ def test_create_shape_groups(tiles, expected): assert len(result) == len(expected), "Incorrect number of shape groups" for shape in expected: assert shape in result, f"Shape {shape} is not in the result" - assert sorted(result[shape]) == sorted(expected[shape]), f"Incorrect indices for shape {shape}" + assert sorted(result[shape]) == sorted( + expected[shape] + ), f"Incorrect indices for shape {shape}" @pytest.mark.parametrize( @@ -514,8 +592,12 @@ def test_create_shape_groups(tiles, expected): def test_shuffle_tiles_within_shape_groups(shape_groups, random_seed, expected_output): generator = np.random.default_rng(random_seed) shape_groups_original = deepcopy(shape_groups) - actual_output = fgeometric.shuffle_tiles_within_shape_groups(shape_groups, generator) - assert shape_groups == shape_groups_original, "Input shape groups should not be modified" + actual_output = fgeometric.shuffle_tiles_within_shape_groups( + shape_groups, generator + ) + assert ( + shape_groups == shape_groups_original + ), "Input shape groups should not be modified" np.testing.assert_array_equal(actual_output, expected_output) @@ -523,19 +605,33 @@ def test_shuffle_tiles_within_shape_groups(shape_groups, random_seed, expected_o "group_member,expected", [ ("e", np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])), # Identity - ("r90", np.array([[2, 5, 8], [1, 4, 7], [0, 3, 6]])), # Rotate 90 degrees counterclockwise + ( + "r90", + np.array([[2, 5, 8], [1, 4, 7], [0, 3, 6]]), + ), # Rotate 90 degrees counterclockwise ("r180", np.array([[8, 7, 6], [5, 4, 3], [2, 1, 0]])), # Rotate 180 degrees - ("r270", np.array([[6, 3, 0], [7, 4, 1], [8, 5, 2]])), # Rotate 270 degrees counterclockwise + ( + "r270", + np.array([[6, 3, 0], [7, 4, 1], [8, 5, 2]]), + ), # Rotate 270 degrees counterclockwise ("v", np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])), # Vertical flip - ("t", np.array([[0, 3, 6], [1, 4, 7], [2, 5, 8]])), # Transpose (reflect over main diagonal) + ( + "t", + np.array([[0, 3, 6], [1, 4, 7], [2, 5, 8]]), + ), # Transpose (reflect over main diagonal) ("h", np.array([[2, 1, 0], [5, 4, 3], [8, 7, 6]])), # Horizontal flip - ("hvt", np.array([[8, 5, 2], [7, 4, 1], [6, 3, 0]])), # Transpose (reflect over anti-diagonal) + ( + "hvt", + np.array([[8, 5, 2], [7, 4, 1], [6, 3, 0]]), + ), # Transpose (reflect over anti-diagonal) ], ) def test_d4_transformations(group_member, expected): img = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype=np.uint8) transformed_img = fgeometric.d4(img, group_member) - assert np.array_equal(transformed_img, expected), f"Failed for transformation {group_member}" + assert np.array_equal( + transformed_img, expected + ), f"Failed for transformation {group_member}" def get_md5_hash(image): @@ -551,7 +647,9 @@ def test_d4_unique(image): for element in d4_group_elements: hashes.add(get_md5_hash(fgeometric.d4(image, element))) - assert len(hashes) == len(set(hashes)), "d4 should generate unique images for all group elements" + assert len(hashes) == len( + set(hashes) + ), "d4 should generate unique images for all group elements" @pytest.mark.parametrize("image", RECTANGULAR_IMAGES) @@ -559,7 +657,9 @@ def test_d4_unique(image): def test_d4_output_shape_with_group(image, group_member): result = fgeometric.d4(image, group_member) if group_member in ["r90", "r270", "t", "hvt"]: - assert result.shape[:2] == image.shape[:2][::-1], "Output shape should be the transpose of input shape" + assert ( + result.shape[:2] == image.shape[:2][::-1] + ), "Output shape should be the transpose of input shape" else: assert result.shape == image.shape, "Output shape should match input shape" @@ -567,7 +667,9 @@ def test_d4_output_shape_with_group(image, group_member): @pytest.mark.parametrize("image", RECTANGULAR_IMAGES) def test_transpose_output_shape(image): result = fgeometric.transpose(image) - assert result.shape[:2] == image.shape[:2][::-1], "Output shape should be the transpose of input shape" + assert ( + result.shape[:2] == image.shape[:2][::-1] + ), "Output shape should be the transpose of input shape" @pytest.mark.parametrize("image", RECTANGULAR_IMAGES) @@ -575,7 +677,9 @@ def test_transpose_output_shape(image): def test_d4_output_shape_with_factor(image, factor): result = fgeometric.rot90(image, factor) if factor in {1, 3}: - assert result.shape[:2] == image.shape[:2][::-1], "Output shape should be the transpose of input shape" + assert ( + result.shape[:2] == image.shape[:2][::-1] + ), "Output shape should be the transpose of input shape" else: assert result.shape == image.shape, "Output shape should match input shape" @@ -606,42 +710,126 @@ def test_transpose_2(shape): def test_planckian_jitter_blackbody(): img = np.array( [ - [[0.4963, 0.6977, 0.1759], [0.7682, 0.8, 0.2698], [0.0885, 0.161, 0.1507], [0.132, 0.2823, 0.0317]], - [[0.3074, 0.6816, 0.2081], [0.6341, 0.9152, 0.9298], [0.4901, 0.3971, 0.7231], [0.8964, 0.8742, 0.7423]], - [[0.4556, 0.4194, 0.5263], [0.6323, 0.5529, 0.2437], [0.3489, 0.9527, 0.5846], [0.4017, 0.0362, 0.0332]], - [[0.0223, 0.1852, 0.1387], [0.1689, 0.3734, 0.2422], [0.2939, 0.3051, 0.8155], [0.5185, 0.932, 0.7932]], + [ + [0.4963, 0.6977, 0.1759], + [0.7682, 0.8, 0.2698], + [0.0885, 0.161, 0.1507], + [0.132, 0.2823, 0.0317], + ], + [ + [0.3074, 0.6816, 0.2081], + [0.6341, 0.9152, 0.9298], + [0.4901, 0.3971, 0.7231], + [0.8964, 0.8742, 0.7423], + ], + [ + [0.4556, 0.4194, 0.5263], + [0.6323, 0.5529, 0.2437], + [0.3489, 0.9527, 0.5846], + [0.4017, 0.0362, 0.0332], + ], + [ + [0.0223, 0.1852, 0.1387], + [0.1689, 0.3734, 0.2422], + [0.2939, 0.3051, 0.8155], + [0.5185, 0.932, 0.7932], + ], ], ) expected_blackbody_plankian_jitter = np.array( [ - [[0.735, 0.6977, 0.0691], [1.0, 0.8, 0.1059], [0.1311, 0.161, 0.0592], [0.1955, 0.2823, 0.0124]], - [[0.4553, 0.6816, 0.0817], [0.9391, 0.9152, 0.365], [0.7258, 0.3971, 0.2839], [1.0, 0.8742, 0.2914]], - [[0.6748, 0.4194, 0.2066], [0.9364, 0.5529, 0.0957], [0.5167, 0.9527, 0.2295], [0.5949, 0.0362, 0.013]], - [[0.033, 0.1852, 0.0545], [0.2501, 0.3734, 0.0951], [0.4353, 0.3051, 0.3202], [0.7679, 0.932, 0.3114]], + [ + [0.735, 0.6977, 0.0691], + [1.0, 0.8, 0.1059], + [0.1311, 0.161, 0.0592], + [0.1955, 0.2823, 0.0124], + ], + [ + [0.4553, 0.6816, 0.0817], + [0.9391, 0.9152, 0.365], + [0.7258, 0.3971, 0.2839], + [1.0, 0.8742, 0.2914], + ], + [ + [0.6748, 0.4194, 0.2066], + [0.9364, 0.5529, 0.0957], + [0.5167, 0.9527, 0.2295], + [0.5949, 0.0362, 0.013], + ], + [ + [0.033, 0.1852, 0.0545], + [0.2501, 0.3734, 0.0951], + [0.4353, 0.3051, 0.3202], + [0.7679, 0.932, 0.3114], + ], ], ) - blackbody_plankian_jitter = F.planckian_jitter(img, temperature=3500, mode="blackbody") - assert np.allclose(blackbody_plankian_jitter, expected_blackbody_plankian_jitter, atol=1e-4) + blackbody_plankian_jitter = F.planckian_jitter( + img, temperature=3500, mode="blackbody" + ) + assert np.allclose( + blackbody_plankian_jitter, expected_blackbody_plankian_jitter, atol=1e-4 + ) def test_planckian_jitter_cied(): img = np.array( [ - [[0.4963, 0.6977, 0.1759], [0.7682, 0.8, 0.2698], [0.0885, 0.161, 0.1507], [0.132, 0.2823, 0.0317]], - [[0.3074, 0.6816, 0.2081], [0.6341, 0.9152, 0.9298], [0.4901, 0.3971, 0.7231], [0.8964, 0.8742, 0.7423]], - [[0.4556, 0.4194, 0.5263], [0.6323, 0.5529, 0.2437], [0.3489, 0.9527, 0.5846], [0.4017, 0.0362, 0.0332]], - [[0.0223, 0.1852, 0.1387], [0.1689, 0.3734, 0.2422], [0.2939, 0.3051, 0.8155], [0.5185, 0.932, 0.7932]], + [ + [0.4963, 0.6977, 0.1759], + [0.7682, 0.8, 0.2698], + [0.0885, 0.161, 0.1507], + [0.132, 0.2823, 0.0317], + ], + [ + [0.3074, 0.6816, 0.2081], + [0.6341, 0.9152, 0.9298], + [0.4901, 0.3971, 0.7231], + [0.8964, 0.8742, 0.7423], + ], + [ + [0.4556, 0.4194, 0.5263], + [0.6323, 0.5529, 0.2437], + [0.3489, 0.9527, 0.5846], + [0.4017, 0.0362, 0.0332], + ], + [ + [0.0223, 0.1852, 0.1387], + [0.1689, 0.3734, 0.2422], + [0.2939, 0.3051, 0.8155], + [0.5185, 0.932, 0.7932], + ], ], ) expected_cied_plankian_jitter = np.array( [ - [[0.6058, 0.6977, 0.1149], [0.9377, 0.8000, 0.1762], [0.1080, 0.1610, 0.0984], [0.1611, 0.2823, 0.0207]], - [[0.3752, 0.6816, 0.1359], [0.7740, 0.9152, 0.6072], [0.5982, 0.3971, 0.4722], [1.0000, 0.8742, 0.4848]], - [[0.5561, 0.4194, 0.3437], [0.7718, 0.5529, 0.1592], [0.4259, 0.9527, 0.3818], [0.4903, 0.0362, 0.0217]], - [[0.0272, 0.1852, 0.0906], [0.2062, 0.3734, 0.1582], [0.3587, 0.3051, 0.5326], [0.6329, 0.9320, 0.5180]], + [ + [0.6058, 0.6977, 0.1149], + [0.9377, 0.8000, 0.1762], + [0.1080, 0.1610, 0.0984], + [0.1611, 0.2823, 0.0207], + ], + [ + [0.3752, 0.6816, 0.1359], + [0.7740, 0.9152, 0.6072], + [0.5982, 0.3971, 0.4722], + [1.0000, 0.8742, 0.4848], + ], + [ + [0.5561, 0.4194, 0.3437], + [0.7718, 0.5529, 0.1592], + [0.4259, 0.9527, 0.3818], + [0.4903, 0.0362, 0.0217], + ], + [ + [0.0272, 0.1852, 0.0906], + [0.2062, 0.3734, 0.1582], + [0.3587, 0.3051, 0.5326], + [0.6329, 0.9320, 0.5180], + ], ], ) cied_plankian_jitter = F.planckian_jitter(img, temperature=4500, mode="cied") @@ -697,7 +885,10 @@ def test_planckian_jitter_interpolation(): result_mid = F.planckian_jitter(img, (temp1 + temp2) // 2, mode) # The mid-temperature result should be between the two extremes - assert np.all((result_mid >= np.minimum(result1, result2)) & (result_mid <= np.maximum(result1, result2))) + assert np.all( + (result_mid >= np.minimum(result1, result2)) + & (result_mid <= np.maximum(result1, result2)) + ) @pytest.mark.parametrize("mode", ["blackbody", "cied"]) @@ -726,7 +917,9 @@ def test_random_tone_curve(image): num_channels = get_num_channels(image) result_float_value = F.move_tone_curve(image, low_y, high_y) - result_array_value = F.move_tone_curve(image, np.array([low_y] * num_channels), np.array([high_y] * num_channels)) + result_array_value = F.move_tone_curve( + image, np.array([low_y] * num_channels), np.array([high_y] * num_channels) + ) np.testing.assert_allclose(result_float_value, result_array_value) @@ -735,11 +928,14 @@ def test_random_tone_curve(image): @pytest.mark.parametrize("image", UINT8_IMAGES) -@pytest.mark.parametrize("color_shift, intensity", [ - (0, 0), # No noise - (0.5, 0.5), # Medium noise - (1, 1), # Maximum noise -]) +@pytest.mark.parametrize( + "color_shift, intensity", + [ + (0, 0), # No noise + (0.5, 0.5), # Medium noise + (1, 1), # Maximum noise + ], +) def test_iso_noise(image, color_shift, intensity): """Test that iso_noise produces expected noise levels.""" # Convert image to float and back @@ -748,18 +944,12 @@ def test_iso_noise(image, color_shift, intensity): # Generate noise using the same random state instance rng = np.random.default_rng(42) result_uint8 = F.iso_noise( - image, - color_shift=color_shift, - intensity=intensity, - random_generator=rng + image, color_shift=color_shift, intensity=intensity, random_generator=rng ) rng = np.random.default_rng(42) result_float = F.iso_noise( - float_image, - color_shift=color_shift, - intensity=intensity, - random_generator=rng + float_image, color_shift=color_shift, intensity=intensity, random_generator=rng ) # Convert float result back to uint8 @@ -779,7 +969,6 @@ def test_iso_noise(image, color_shift, intensity): np.testing.assert_allclose(result_uint8, result_float, rtol=1e-5, atol=1) - @pytest.mark.parametrize( "input_image, num_output_channels, expected_shape", [ @@ -838,7 +1027,10 @@ def test_to_gray_from_lab(dtype): def test_to_gray_desaturation(dtype, channels): img = create_test_image(10, 10, channels, dtype) result = F.to_gray_desaturation(img) - expected = (np.max(img.astype(np.float32), axis=-1) + np.min(img.astype(np.float32), axis=-1)) / 2 + expected = ( + np.max(img.astype(np.float32), axis=-1) + + np.min(img.astype(np.float32), axis=-1) + ) / 2 if dtype == np.uint8: expected = expected.astype(np.uint8) np.testing.assert_allclose(result, expected, rtol=1e-5, atol=1) @@ -895,7 +1087,9 @@ def test_float32_uint8_consistency(func): result_uint8 = func(img_uint8) result_float32 = func(img_float32) - np.testing.assert_allclose(result_uint8 / 255.0, result_float32, rtol=1e-5, atol=1e-2) + np.testing.assert_allclose( + result_uint8 / 255.0, result_float32, rtol=1e-5, atol=1e-2 + ) @pytest.mark.parametrize( @@ -925,7 +1119,7 @@ def test_fancy_pca_mean_preservation(shape): image = np.random.rand(*shape).astype(np.float32) alpha_vector = np.random.uniform(-0.1, 0.1, shape[-1]) result = F.fancy_pca(image, alpha_vector) - np.testing.assert_almost_equal(np.mean(image), np.mean(result), decimal=5) + np.testing.assert_almost_equal(np.mean(image), np.mean(result), decimal=4) @pytest.mark.parametrize( @@ -948,23 +1142,23 @@ def test_fancy_pca_zero_alpha(shape, dtype): np.testing.assert_array_equal(image, result) + @pytest.mark.parametrize( ["image_type", "quality", "shape", "expected_shape"], [ # Test JPEG compression (".jpg", 80, (100, 100, 3), (100, 100, 3)), # RGB image - (".jpg", 10, (50, 50, 1), (50, 50, 1)), # Grayscale image - (".jpg", 90, (30, 30, 2), (30, 30, 2)), # 2-channel image - (".jpg", 70, (40, 40, 4), (40, 40, 4)), # RGBA image - (".jpg", 50, (60, 60, 5), (60, 60, 5)), # 5-channel image - + (".jpg", 10, (50, 50, 1), (50, 50, 1)), # Grayscale image + (".jpg", 90, (30, 30, 2), (30, 30, 2)), # 2-channel image + (".jpg", 70, (40, 40, 4), (40, 40, 4)), # RGBA image + (".jpg", 50, (60, 60, 5), (60, 60, 5)), # 5-channel image # Test WebP compression - (".webp", 80, (100, 100, 3), (100, 100, 3)), # RGB image - (".webp", 10, (50, 50, 1), (50, 50, 1)), # Grayscale image - (".webp", 90, (30, 30, 2), (30, 30, 2)), # 2-channel image - (".webp", 70, (40, 40, 4), (40, 40, 4)), # RGBA image - (".webp", 50, (60, 60, 5), (60, 60, 5)), # 5-channel image - ] + (".webp", 80, (100, 100, 3), (100, 100, 3)), # RGB image + (".webp", 10, (50, 50, 1), (50, 50, 1)), # Grayscale image + (".webp", 90, (30, 30, 2), (30, 30, 2)), # 2-channel image + (".webp", 70, (40, 40, 4), (40, 40, 4)), # RGBA image + (".webp", 50, (60, 60, 5), (60, 60, 5)), # 5-channel image + ], ) def test_image_compression_shapes(image_type, quality, shape, expected_shape): """Test that image_compression preserves input shapes.""" @@ -993,13 +1187,12 @@ def test_image_compression_channel_consistency(): [ # Test JPEG compression - only supports 1 and 3 channels (".jpg", 80, (100, 100, 3)), # RGB image - (".jpg", 10, (50, 50, 1)), # Grayscale image - + (".jpg", 10, (50, 50, 1)), # Grayscale image # Test WebP compression - supports 1, 3, and 4 channels - (".webp", 80, (100, 100, 3)), # RGB image - (".webp", 10, (50, 50, 1)), # Grayscale image - (".webp", 70, (40, 40, 4)), # RGBA image - ] + (".webp", 80, (100, 100, 3)), # RGB image + (".webp", 10, (50, 50, 1)), # Grayscale image + (".webp", 70, (40, 40, 4)), # RGBA image + ], ) def test_image_compression_supported_shapes(image_type, quality, shape): """Test image_compression with supported channel counts.""" @@ -1009,9 +1202,7 @@ def test_image_compression_supported_shapes(image_type, quality, shape): assert compressed.dtype == np.uint8 -@pytest.mark.parametrize( - "image_type", [".jpg", ".webp"] -) +@pytest.mark.parametrize("image_type", [".jpg", ".webp"]) def test_image_compression_quality_with_patterns(image_type): """Test that lower quality results in more compression artifacts.""" # Create an image with high frequency patterns that are sensitive to compression @@ -1027,4 +1218,59 @@ def test_image_compression_quality_with_patterns(image_type): high_diff = np.abs(image - high_quality).mean() low_diff = np.abs(image - low_quality).mean() - assert low_diff > high_diff, f"Low quality diff ({low_diff}) should be greater than high quality diff ({high_diff})" + assert ( + low_diff > high_diff + ), f"Low quality diff ({low_diff}) should be greater than high quality diff ({high_diff})" + + +@pytest.mark.parametrize( + "img, expected", + [ + # Test with a normal RGB image + ( + np.array( + [[[50, 100, 150], [200, 250, 100]], [[100, 150, 200], [50, 100, 150]]], + dtype=np.uint8, + ), + "non_constant", # We expect the function to adjust contrast, so we check if it's not constant + ), + # Test with a constant channel image + ( + np.array( + [ + [[100, 100, 100], [100, 100, 100]], + [[100, 100, 100], [100, 100, 100]], + ], + dtype=np.uint8, + ), + "constant", # The output should remain constant + ), + # Test with a grayscale image + ( + np.array([[50, 100], [150, 200]], dtype=np.uint8), + "constant", # The output should remain constant + ), + # Test with an image already using full intensity range + ( + np.array([[0, 85], [170, 255]], dtype=np.uint8), + "constant", # The output should remain constant + ), + # Test with an all-zero image + ( + np.zeros((2, 2, 3), dtype=np.uint8), + "constant", # The output should remain constant + ), + ], +) +def test_auto_contrast(img, expected): + result = F.auto_contrast(img) + + if expected == "constant": + ( + np.testing.assert_array_equal(result, img), + "The output should remain constant for constant input.", + ) + elif expected == "non_constant": + assert not np.all( + result == img + ), "The output should change for non-constant input." From 9291e0df0772450ad13efc636fe65943f2e17aaf Mon Sep 17 00:00:00 2001 From: Vladimir Iglovikov Date: Mon, 2 Dec 2024 19:39:49 -0800 Subject: [PATCH 7/7] Cleanup --- albumentations/augmentations/functional.py | 2 +- tests/functional/test_functional.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/albumentations/augmentations/functional.py b/albumentations/augmentations/functional.py index a00c19063..5639c26dd 100644 --- a/albumentations/augmentations/functional.py +++ b/albumentations/augmentations/functional.py @@ -172,7 +172,7 @@ def posterize(img: np.ndarray, bits: Literal[1, 2, 3, 4, 5, 6, 7, 8]) -> np.ndar Args: img: image to posterize. - bits: number of high bits. Must be in range [0, 8] + bits: number of high bits. Must be in range [1, 8] Returns: Image with reduced color channels. diff --git a/tests/functional/test_functional.py b/tests/functional/test_functional.py index aa19a83fa..b0b37ef72 100644 --- a/tests/functional/test_functional.py +++ b/tests/functional/test_functional.py @@ -18,7 +18,6 @@ from tests.conftest import ( IMAGES, RECTANGULAR_IMAGES, - RECTANGULAR_UINT8_IMAGE, SQUARE_UINT8_IMAGE, UINT8_IMAGES, )