Adding image random text wrapper (#254)

Summary: Pull Request resolved: #254 Adding a to add text to an image that can do the following after a pre-defined text is given: - Randomly generates x and y position to start writing the text - Breaks the text into N lines, so that it can fit in the image - Moves the x start point to the left if the text cannot fit - Reduces font size it text still cannot fit - Randomly selects color Reviewed By: erikbrinkman, jbitton Differential Revision: D64933482 fbshipit-source-id: e76853b2e2b39cdcac71c35f6baa4bda7901925f
facebookresearch · Oct 29, 2024 · ac18f48 · ac18f48
1 parent 1113b1d
commit ac18f48
Show file tree

Hide file tree

Showing 10 changed files with 337 additions and 2 deletions.
diff --git a/augly/image/__init__.py b/augly/image/__init__.py
@@ -31,6 +31,7 @@
     overlay_onto_screenshot,
     overlay_stripes,
     overlay_text,
+    overlay_wrap_text,
     pad,
     pad_square,
     perspective_transform,
@@ -106,6 +107,7 @@
     OverlayOntoScreenshot,
     OverlayStripes,
     OverlayText,
+    OverlayWrapText,
     Pad,
     PadSquare,
     PerspectiveTransform,
@@ -153,6 +155,7 @@
     "OverlayOntoScreenshot",
     "OverlayStripes",
     "OverlayText",
+    "OverlayWrapText",
     "Pad",
     "PadSquare",
     "PerspectiveTransform",
@@ -195,6 +198,7 @@
     "overlay_onto_screenshot",
     "overlay_stripes",
     "overlay_text",
+    "overlay_wrap_text",
     "pad",
     "pad_square",
     "perspective_transform",

diff --git a/augly/image/functional.py b/augly/image/functional.py
@@ -11,12 +11,14 @@
 import math
 import os
 import pickle
+import random
 from copy import deepcopy
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 from augly import utils
 from augly.image import utils as imutils
+from augly.image.helpers import fit_text_in_bbox
 from augly.image.utils.bboxes import spatial_bbox_helper
 from PIL import Image, ImageDraw, ImageEnhance, ImageFilter, ImageFont
 
@@ -1630,6 +1632,114 @@ def overlay_text(
     return imutils.ret_and_save_image(image, output_path, src_mode)
 
 
+def overlay_wrap_text(
+    image: Image.Image,
+    text: str,
+    output_path: Optional[str] = None,
+    min_font_size_ratio: float = 0.02,
+    max_font_size_ratio: float = 0.2,
+    font_file: str = utils.DEFAULT_TEXT_OVERLAY_FONT_PATH,
+    font_size: Optional[float] = None,
+    color: Optional[tuple[int, int, int]] = None,
+    metadata: Optional[list[dict[str, object]]] = None,
+    random_seed: Optional[int] = None,
+) -> Image.Image:
+    """Randomly overlay a pre-defined text on an image
+
+    @param img: Image to overlay text on
+
+    @param text: Text to overlay on image
+
+    @param output_path Path to save resulting image
+
+    @param min_font_size_ratio: Minimum font size ratio w.r.t. the image to use for text
+
+    @param max_font_size_ratio: Maximum font size ratio w.r.t. the image to use for text
+
+    @param font_size: Font size to use for text
+
+    @param color: Color to use for text
+
+    @param metadata : List to store metadata about the function execution
+
+    @returns: Image with text overlayed
+    """
+    rand = random.Random(random_seed)
+
+    assert (
+        0.0 <= min_font_size_ratio <= 1.0
+    ), "Font size must be a value in the range [0.0, 1.0]"
+
+    assert (
+        0.0 <= max_font_size_ratio <= 1.0
+    ), "Font size must be a value in the range [0.0, 1.0]"
+
+    if font_size:
+        assert (
+            0.0 <= font_size <= 1.0
+        ), "Font size must be a value in the range [0.0, 1.0]"
+
+    if color:
+        utils.validate_rgb_color(color)
+
+    image = imutils.validate_and_load_image(image)
+
+    func_kwargs = imutils.get_func_kwargs(metadata, locals())
+    src_mode = image.mode
+
+    width, height = image.size
+
+    min_font_size = int(min(width, height) * min_font_size_ratio)
+    max_font_size = int(min(width, height) * max_font_size_ratio)
+
+    if not font_size:
+        # get a random font size between min_font_size_ratio and max_font_size_ratio of the image size
+        font_size = rand.uniform(min_font_size_ratio, max_font_size_ratio)
+
+    font_size = int(min(width, height) * font_size)
+    # if font size is too small, increase it to min_font_size of the image size
+    font_size = max(font_size, min_font_size)
+    # if font size is too large, decrease it to max_font_size of the image size
+    font_size = min(font_size, max_font_size)
+
+    local_font_path = utils.pathmgr.get_local_path(font_file)
+
+    random_x, random_y, lines, line_height, font = fit_text_in_bbox(
+        text,
+        height,
+        width,
+        local_font_path,
+        font_size,
+        min_font_size,
+        rand,
+    )
+
+    if not color:
+        # get a random color
+        color = (rand.randrange(255), rand.randrange(255), rand.randrange(255))
+
+    red, green, blue = color
+    draw = ImageDraw.Draw(image)
+    for line in lines:
+        # draw text on the image
+        draw.text(
+            (random_x, random_y),
+            line,
+            fill=(red, green, blue),
+            font=font,  # pyre-ignore [6]
+        )
+        random_y = random_y + line_height
+
+    imutils.get_metadata(
+        metadata=metadata,
+        function_name="overlay_wrap_text",
+        aug_image=image,
+        **func_kwargs,
+    )
+
+    return imutils.ret_and_save_image(image, output_path, src_mode)
+
+
 def pad(
     image: Union[str, Image.Image],
     output_path: Optional[str] = None,

diff --git a/augly/image/helpers.py b/augly/image/helpers.py
@@ -7,10 +7,11 @@
 
 # pyre-unsafe
 
-from typing import Callable
+import random
+from typing import Callable, List, Tuple
 
 import numpy as np
-from PIL import Image
+from PIL import Image, ImageFont
 
 
 def aug_np_wrapper(
@@ -30,3 +31,101 @@ def aug_np_wrapper(
     pil_image = Image.fromarray(image)
     aug_image = aug_function(pil_image, **kwargs)
     return np.array(aug_image)
+
+
+def fit_text_in_bbox(
+    text: str,
+    img_height: int,
+    img_width: int,
+    font_path: str,
+    font_size: int,
+    min_font_size: int,
+    rand: random.Random,
+) -> Tuple[int, int, List[str], int, ImageFont.FreeTypeFont]:
+    """Fits text into a bounding box by adjusting font size and x-coordinate
+
+    @param text: Text to fit into bounding box
+
+    @param img_height: Height of image
+
+    @param img_width: Width of image
+
+    @param font_path: Path to font file
+
+    @param font_size: Font size to start with
+
+    @param min_font_size: Minimum font size to try
+
+    @param rand: Random number generator
+
+    @returns: x and y coordinates to start writing, text split into lines, line heigh, and font style
+    """
+    x_min = int(img_width * 0.05)  # reserves 5% on the left
+    x_max = int(img_width * 0.5)  # starts writing at the center of the image
+    random_x = rand.randint(
+        x_min, x_max
+    )  # generate random x-coordinate to start writing
+
+    max_img_width = int(img_width * 0.95)  # reserves 5% on the right side of image
+
+    while True:
+        # loads font
+        font = ImageFont.truetype(font_path, font_size)
+
+        # wrap text around image
+        lines = wrap_text_for_image_overlay(text, font, int(max_img_width - random_x))
+        _, _, _, line_height = font.getbbox("hg")
+
+        y_min = int(img_height * 0.05)  # reserves 5% on the top
+        y_max = int(img_height * 0.9)  # reseves 10% to the bottom
+        y_max -= (
+            len(lines) * line_height
+        )  # adjust max y-coordinate for text height and number of lines
+
+        if y_max < y_min:
+            if random_x > x_min:
+                # adjust x-coordinate by 10% to try to fit text
+                random_x = int(max(random_x - 0.1 * max_img_width, x_min))
+
+            elif font_size > min_font_size:
+                # reduces font size by 1pt to try to fit text
+                font_size -= 1
+            else:
+                raise ValueError("Text too long to fit onto image!")
+        else:
+            random_y = rand.randint(
+                y_min, y_max
+            )  # generate random y-coordinate to start writing
+            return random_x, random_y, lines, line_height, font
+
+
+def wrap_text_for_image_overlay(
+    text: str, font: ImageFont.FreeTypeFont, max_width: int
+) -> List[str]:
+    """Wraps text around an image
+
+    @param text (str): Text to wrap
+
+    @param font (PIL.ImageFont): Font to use for text
+
+    @param max_width (int): Maximum width of the image
+
+    @returns: List of wrapped text, where each element is a line of text
+    """
+    lines = []
+
+    if font.getbbox(text)[2] <= max_width:
+        return [text]
+    else:
+        words = text.split(" ")
+        line_words = []
+        lines = []
+        for word in words:
+            if font.getbbox(" ".join(line_words + [word]))[2] <= max_width:
+                line_words.append(word)
+            else:
+                lines.append(" ".join(line_words))
+                line_words = [word]
+        lines.append(" ".join(line_words))
+
+    return lines
diff --git a/augly/image/transforms.py b/augly/image/transforms.py
@@ -1493,6 +1493,88 @@ def apply_transform(
         )
 
 
+class OverlayWrapText(BaseTransform):
+    def __init__(
+        self,
+        text: str,
+        min_font_size_ratio: float = 0.02,
+        max_font_size_ratio: float = 0.2,
+        font_file: str = utils.DEFAULT_TEXT_OVERLAY_FONT_PATH,
+        font_size: Optional[float] = None,
+        color: Optional[tuple[int, int, int]] = None,
+        random_seed: Optional[int] = None,
+        p: float = 1.0,
+    ):
+        """Randomly overlay a pre-defined text on an image
+
+        @param img: Image to overlay text on
+
+        @param text: Text to overlay on image
+
+        @param output_path Path to save resulting image
+
+        @param min_font_size_ratio: Minimum font size ratio w.r.t. the image to use for text
+
+        @param max_font_size_ratio: Maximum font size ratio w.r.t. the image to use for text
+
+        @param font_size: Font size to use for text
+
+        @param color: Color to use for text
+
+        @param metadata : List to store metadata about the function execution
+
+        @param p: the probability of the transform being applied; default value is 1.0
+
+        @returns: Image with text overlayed
+        """
+        super().__init__(p)
+        self.text, self.color = text, color
+        self.min_font_size_ratio, self.max_font_size_ratio = (
+            min_font_size_ratio,
+            max_font_size_ratio,
+        )
+        self.font_file, self.font_size = font_file, font_size
+        self.random_seed = random_seed
+
+    def apply_transform(
+        self,
+        image: Image.Image,
+        metadata: Optional[List[Dict[str, Any]]] = None,
+        bboxes: Optional[List[Tuple]] = None,
+        bbox_format: Optional[str] = None,
+    ) -> Image.Image:
+        """
+        Randomly overlay a pre-defined text on an image
+
+        @param image: PIL Image to be augmented
+
+        @param metadata: if set to be a list, metadata about the function execution
+            including its name, the source & dest width, height, etc. will be appended to
+            the inputted list. If set to None, no metadata will be appended or returned
+
+        @param bboxes: a list of bounding boxes can be passed in here if desired. If
+            provided, this list will be modified in place such that each bounding box is
+            transformed according to this function
+
+        @param bbox_format: signifies what bounding box format was used in `bboxes`. Must
+            specify `bbox_format` if `bboxes` is provided. Supported bbox_format values
+            are "pascal_voc", "pascal_voc_norm", "coco", and "yolo"
+
+        @returns: Augmented PIL Image
+        """
+        return F.overlay_wrap_text(
+            image,
+            text=self.text,
+            min_font_size_ratio=self.min_font_size_ratio,
+            max_font_size_ratio=self.max_font_size_ratio,
+            font_file=self.font_file,
+            font_size=self.font_size,
+            color=self.color,
+            metadata=metadata,
+            random_seed=self.random_seed,
+        )
+
+
 class Pad(BaseTransform):
     def __init__(
         self,

diff --git a/augly/tests/assets/expected_metadata/image_tests/expected_metadata.json b/augly/tests/assets/expected_metadata/image_tests/expected_metadata.json
@@ -428,6 +428,25 @@
             "y_pos": 0.5
         }
     ],
+    "overlay_wrap_text": [
+        {
+            "text": "Testing if the function can wrap this awesome text and not go out of bounds",
+            "output_path": null,
+            "max_font_size_ratio": 0.2,
+            "min_font_size_ratio": 0.02,
+            "font_file": "fonts/Allura-Regular.ttf",
+            "font_size": 0.2,
+            "color": null,
+            "intensity": 0,
+            "name": "overlay_wrap_text",
+            "random_seed": 42,
+            "rand": null,
+            "dst_height": 1080,
+            "dst_width": 1920,
+            "src_height": 1080,
+            "src_width": 1920
+        }
+    ],
     "pad": [
         {
             "bbox_format": "yolo",

diff --git a/augly/tests/assets/image/dfdc_expected_output/test_overlay_wrap_text.png b/augly/tests/assets/image/dfdc_expected_output/test_overlay_wrap_text.png