Skip to content

Overlay Elements

Code for the transform is based on the code from https://github.com/danaaubakirova/doc-augmentation by Dana Aubakirova

Python
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Python
!pip install -U pillow
Requirement already satisfied: pillow in /Users/vladimiriglovikov/anaconda3/envs/albumentations/lib/python3.8/site-packages (10.3.0)
Python
%matplotlib inline
Python
import cv2
from matplotlib import pyplot as plt
Python
from PIL import ImageDraw, ImageFont, Image
Python
from pylab import *
Python
import albumentations as A
Python
import json
Python
def visualize(image):
    plt.figure(figsize=(20, 10))
    plt.axis('off')
    plt.imshow(image)
Python
def load_rgb(image_path):
    image = cv2.imread(image_path)
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
Python
font_path = "../data/documents/LiberationSerif-Regular.ttf"
Python
image = load_rgb("../data/documents/docs.png")
Python
with open("../data/documents/text.json") as f:
    labels = json.load(f)
Python
visualize(image)

png

Python
transform = A.Compose([A.OverlayElements(p=1)])

Render images to paste

Python
def render_text(bbox_shape, text, font):
    bbox_height, bbox_width = bbox_shape

    # Create an empty RGB image with the size of the bounding box
    bbox_img = Image.new("RGB", (bbox_width, bbox_height), color="white")
    draw = ImageDraw.Draw(bbox_img)

    # Draw the text in red
    draw.text((0, 0), text, fill="red", font=font)

    return np.array(bbox_img)
Python
bbox_indices_to_update = np.random.choice(range(len(labels["text"])), 10)
Python
labels.keys()
dict_keys(['text', 'bbox', 'poly', 'score'])
Python
image_height, image_width = image.shape[:2]
num_channels = image.shape[2] if len(image.shape) == 3 else 1
Python
metadata = []
for index in bbox_indices_to_update:
    selected_bbox = labels["bbox"][index]

    # You may apply any transforms you want to text like random deletion, swapping words, applying synonims, etc
    text = labels["text"][index]

    left, top, width_norm, height_norm = selected_bbox

    bbox_height = int(image_height * height_norm)
    bbox_width = int(image_width * width_norm)

    font = ImageFont.truetype(font_path, int(0.90 * bbox_height))

    overlay_image = render_text((bbox_height, bbox_width), text, font)

    metadata += [
        {
            "image": overlay_image,
            "bbox": (left, top, left + width_norm, top + height_norm)
        }
    ]

Paste new text to image

Python
transformed = transform(image=image, overlay_metadata=metadata)
Python
visualize(transformed["image"])

png

As a part of the augmentation pipeline

Python
transform_complex = A.Compose([A.OverlayElements(p=1),
                               A.RandomCrop(p=1, height=1024, width=1024),
                               A.PlanckianJitter(p=1),
                               A.Affine(p=1)
                              ])
Python
transformed = transform_complex(image=image, overlay_metadata=metadata)
Python
visualize(transformed["image"])

png

Python