Overlay Elements 🔗
Code for the transform is based on the code from https://github.com/danaaubakirova/doc-augmentation by Dana Aubakirova
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
!pip install -U pillow
Requirement already satisfied: pillow in /opt/homebrew/Caskroom/miniconda/base/envs/albumentations_examples/lib/python3.9/site-packages (11.1.0)
%matplotlib inline
import cv2
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image, ImageDraw, ImageFont
from pylab import *
import albumentations as A
import json
def visualize(image):
plt.figure(figsize=(20, 10))
plt.axis("off")
plt.imshow(image)
font_path = "../data/documents/LiberationSerif-Regular.ttf"
image = cv2.imread("../data/documents/docs.png", cv2.IMREAD_COLOR_RGB)
with open("../data/documents/text.json") as f:
labels = json.load(f)
visualize(image)
transform = A.Compose([A.OverlayElements(p=1)], strict=True, seed=137)
Render images to paste 🔗
def render_text(bbox_shape, text, font):
bbox_height, bbox_width = bbox_shape
# Create an empty RGB image with the size of the bounding box
bbox_img = Image.new("RGB", (bbox_width, bbox_height), color="white")
draw = ImageDraw.Draw(bbox_img)
# Draw the text in red
draw.text((0, 0), text, fill="red", font=font)
return np.array(bbox_img)
bbox_indices_to_update = np.random.choice(range(len(labels["text"])), 10)
labels.keys()
dict_keys(['text', 'bbox', 'poly', 'score'])
image_height, image_width = image.shape[:2]
num_channels = image.shape[2] if len(image.shape) == 3 else 1
metadata = []
for index in bbox_indices_to_update:
selected_bbox = labels["bbox"][index]
# You may apply any transforms you want to text like random deletion, swapping words, applying synonims, etc
text = labels["text"][index]
left, top, width_norm, height_norm = selected_bbox
bbox_height = int(image_height * height_norm)
bbox_width = int(image_width * width_norm)
font = ImageFont.truetype(font_path, int(0.90 * bbox_height))
overlay_image = render_text((bbox_height, bbox_width), text, font)
metadata += [
{
"image": overlay_image,
"bbox": (left, top, left + width_norm, top + height_norm),
},
]
Paste new text to image 🔗
transformed = transform(image=image, overlay_metadata=metadata)
visualize(transformed["image"])
As a part of the augmentation pipeline 🔗
transform_complex = A.Compose(
[
A.OverlayElements(p=1),
A.RandomCrop(p=1, height=1024, width=1024),
A.PlanckianJitter(p=1),
A.Affine(p=1, scale=0.9, shear=10, translate_percent=0.05, rotate=10),
],
)
transformed = transform_complex(image=image, overlay_metadata=metadata)
visualize(transformed["image"])