Overlay Elements 🔗

Code for the transform is based on the code from https://github.com/danaaubakirova/doc-augmentation by Dana Aubakirova

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload

!pip install -U pillow

Requirement already satisfied: pillow in /opt/homebrew/Caskroom/miniconda/base/envs/albumentations_examples/lib/python3.9/site-packages (11.1.0)

%matplotlib inline

import cv2
import numpy as np
from matplotlib import pyplot as plt

from PIL import Image, ImageDraw, ImageFont

from pylab import *

import albumentations as A

import json

def visualize(image):
    plt.figure(figsize=(20, 10))
    plt.axis("off")
    plt.imshow(image)

font_path = "../data/documents/LiberationSerif-Regular.ttf"

image = cv2.imread("../data/documents/docs.png", cv2.IMREAD_COLOR_RGB)

with open("../data/documents/text.json") as f:
    labels = json.load(f)

visualize(image)

transform = A.Compose([A.OverlayElements(p=1)], strict=True, seed=137)

Render images to paste 🔗

def render_text(bbox_shape, text, font):
    bbox_height, bbox_width = bbox_shape

    # Create an empty RGB image with the size of the bounding box
    bbox_img = Image.new("RGB", (bbox_width, bbox_height), color="white")
    draw = ImageDraw.Draw(bbox_img)

    # Draw the text in red
    draw.text((0, 0), text, fill="red", font=font)

    return np.array(bbox_img)

bbox_indices_to_update = np.random.choice(range(len(labels["text"])), 10)

labels.keys()

dict_keys(['text', 'bbox', 'poly', 'score'])

image_height, image_width = image.shape[:2]
num_channels = image.shape[2] if len(image.shape) == 3 else 1

metadata = []
for index in bbox_indices_to_update:
    selected_bbox = labels["bbox"][index]

    # You may apply any transforms you want to text like random deletion, swapping words, applying synonims, etc
    text = labels["text"][index]

    left, top, width_norm, height_norm = selected_bbox

    bbox_height = int(image_height * height_norm)
    bbox_width = int(image_width * width_norm)

    font = ImageFont.truetype(font_path, int(0.90 * bbox_height))

    overlay_image = render_text((bbox_height, bbox_width), text, font)

    metadata += [
        &#123;
            "image": overlay_image,
            "bbox": (left, top, left + width_norm, top + height_norm),
        &#125;,
    ]

Paste new text to image 🔗

transformed = transform(image=image, overlay_metadata=metadata)

visualize(transformed["image"])

As a part of the augmentation pipeline 🔗

transform_complex = A.Compose(
    [
        A.OverlayElements(p=1),
        A.RandomCrop(p=1, height=1024, width=1024),
        A.PlanckianJitter(p=1),
        A.Affine(p=1, scale=0.9, shear=10, translate_percent=0.05, rotate=10),
    ],
)

transformed = transform_complex(image=image, overlay_metadata=metadata)

visualize(transformed["image"])

No code provided