Geometric functional transforms (augmentations.geometric.functional)¶

def bbox_d4 (bbox, group_member, rows=None, cols=None) [view source on GitHub]¶

Applies a D_4 symmetry group transformation to a bounding box.

The function transforms a bounding box according to the specified group member from the D_4 group. These transformations include rotations and reflections, specified to work on an image's bounding box given its dimensions.

• bbox (BoxInternalType): The bounding box to transform. This should be a structure specifying coordinates like (xmin, ymin, xmax, ymax).
• group_member (D4Type): A string identifier for the D_4 group transformation to apply. Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hvt', 'h', 't'.
• rows (int): The number of rows in the image, used to adjust transformations that depend on image dimensions.
• cols (int): The number of columns in the image, used for the same purposes as rows.
• BoxInternalType: The transformed bounding box.
• ValueError: If an invalid group member is specified.

Examples:

• Applying a 90-degree rotation: bbox_d4((10, 20, 110, 120), 'r90', 100, 100) This would rotate the bounding box 90 degrees within a 100x100 image.
Source code in albumentations/augmentations/geometric/functional.py
Python
def bbox_d4(
bbox: BoxInternalType,
group_member: D4Type,
rows: int | None = None,
cols: int | None = None,
) -> BoxInternalType:
"""Applies a D_4 symmetry group transformation to a bounding box.

The function transforms a bounding box according to the specified group member from the D_4 group.
These transformations include rotations and reflections, specified to work on an image's bounding box given
its dimensions.

Parameters:
- bbox (BoxInternalType): The bounding box to transform. This should be a structure specifying coordinates
like (xmin, ymin, xmax, ymax).
- group_member (D4Type): A string identifier for the D_4 group transformation to apply.
Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hvt', 'h', 't'.
- rows (int): The number of rows in the image, used to adjust transformations that depend on image dimensions.
- cols (int): The number of columns in the image, used for the same purposes as rows.

Returns:
- BoxInternalType: The transformed bounding box.

Raises:
- ValueError: If an invalid group member is specified.

Examples:
- Applying a 90-degree rotation:
bbox_d4((10, 20, 110, 120), 'r90', 100, 100)
This would rotate the bounding box 90 degrees within a 100x100 image.
"""
transformations = {
"e": lambda x: x,  # Identity transformation
"r90": lambda x: bbox_rot90(x, 1),  # Rotate 90 degrees
"r180": lambda x: bbox_rot90(x, 2),  # Rotate 180 degrees
"r270": lambda x: bbox_rot90(x, 3),  # Rotate 270 degrees
"v": lambda x: bbox_vflip(x, rows, cols),  # Vertical flip
"hvt": lambda x: bbox_transpose(bbox_rot90(x, 2)),  # Reflect over anti-diagonal
"h": lambda x: bbox_hflip(x),  # Horizontal flip
"t": lambda x: bbox_transpose(x),  # Transpose (reflect over main diagonal)
}

# Execute the appropriate transformation
if group_member in transformations:
return transformations[group_member](bbox)

raise ValueError(f"Invalid group member: {group_member}")


def bbox_flip (bbox, d, rows=None, cols=None) [view source on GitHub]¶

Flip a bounding box either vertically, horizontally or both depending on the value of d.

Parameters:

Name Type Description
bbox BoxInternalType

A bounding box (x_min, y_min, x_max, y_max).

d int

dimension. 0 for vertical flip, 1 for horizontal, -1 for transpose

rows int | None

Image rows.

cols int | None

Image cols.

Returns:

Type Description
BoxInternalType

A bounding box (x_min, y_min, x_max, y_max).

Exceptions:

Type Description
ValueError

if value of d is not -1, 0 or 1.

Source code in albumentations/augmentations/geometric/functional.py
Python
def bbox_flip(bbox: BoxInternalType, d: int, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Flip a bounding box either vertically, horizontally or both depending on the value of d.

Args:
bbox: A bounding box (x_min, y_min, x_max, y_max).
d: dimension. 0 for vertical flip, 1 for horizontal, -1 for transpose
rows: Image rows.
cols: Image cols.

Returns:
A bounding box (x_min, y_min, x_max, y_max).

Raises:
ValueError: if value of d is not -1, 0 or 1.

"""
if d == 0:
bbox = bbox_vflip(bbox)
elif d == 1:
bbox = bbox_hflip(bbox)
elif d == -1:
bbox = bbox_hflip(bbox)
bbox = bbox_vflip(bbox)
else:
raise ValueError(f"Invalid d value {d}. Valid values are -1, 0 and 1")
return bbox


def bbox_hflip (bbox, rows=None, cols=None) [view source on GitHub]¶

Flip a bounding box horizontally around the y-axis.

Parameters:

Name Type Description
bbox BoxInternalType

A bounding box (x_min, y_min, x_max, y_max).

rows int | None

Image rows.

cols int | None

Image cols.

Returns:

Type Description
BoxInternalType

A bounding box (x_min, y_min, x_max, y_max).

Source code in albumentations/augmentations/geometric/functional.py
Python
def bbox_hflip(bbox: BoxInternalType, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Flip a bounding box horizontally around the y-axis.

Args:
bbox: A bounding box (x_min, y_min, x_max, y_max).
rows: Image rows.
cols: Image cols.

Returns:
A bounding box (x_min, y_min, x_max, y_max).

"""
x_min, y_min, x_max, y_max = bbox[:4]
return 1 - x_max, y_min, 1 - x_min, y_max


def bbox_rot90 (bbox, factor, rows=None, cols=None) [view source on GitHub]¶

Rotates a bounding box by 90 degrees CCW (see np.rot90)

Parameters:

Name Type Description
bbox BoxInternalType

A bounding box tuple (x_min, y_min, x_max, y_max).

factor int

Number of CCW rotations. Must be in set {0, 1, 2, 3} See np.rot90.

rows int | None

Image rows.

cols int | None

Image cols.

Returns:

Type Description
tuple

A bounding box tuple (x_min, y_min, x_max, y_max).

Source code in albumentations/augmentations/geometric/functional.py
Python
def bbox_rot90(bbox: BoxInternalType, factor: int, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Rotates a bounding box by 90 degrees CCW (see np.rot90)

Args:
bbox: A bounding box tuple (x_min, y_min, x_max, y_max).
factor: Number of CCW rotations. Must be in set {0, 1, 2, 3} See np.rot90.
rows: Image rows.
cols: Image cols.

Returns:
tuple: A bounding box tuple (x_min, y_min, x_max, y_max).

"""
if factor not in {0, 1, 2, 3}:
msg = "Parameter n must be in set {0, 1, 2, 3}"
raise ValueError(msg)
x_min, y_min, x_max, y_max = bbox[:4]
if factor == 1:
bbox = y_min, 1 - x_max, y_max, 1 - x_min
elif factor == ROT90_180_FACTOR:
bbox = 1 - x_max, 1 - y_max, 1 - x_min, 1 - y_min
elif factor == ROT90_270_FACTOR:
bbox = 1 - y_max, x_min, 1 - y_min, x_max
return bbox


def bbox_rotate (bbox, angle, method, rows, cols) [view source on GitHub]¶

Rotates a bounding box by angle degrees.

Parameters:

Name Type Description
bbox BoxInternalType

A bounding box (x_min, y_min, x_max, y_max).

angle float

Angle of rotation in degrees.

method str

Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box".

rows int

Image rows.

cols int

Image cols.

Returns:

Type Description
BoxInternalType

A bounding box (x_min, y_min, x_max, y_max).

Source code in albumentations/augmentations/geometric/functional.py
Python
def bbox_rotate(bbox: BoxInternalType, angle: float, method: str, rows: int, cols: int) -> BoxInternalType:
"""Rotates a bounding box by angle degrees.

Args:
bbox: A bounding box (x_min, y_min, x_max, y_max).
angle: Angle of rotation in degrees.
method: Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box".
rows: Image rows.
cols: Image cols.

Returns:
A bounding box (x_min, y_min, x_max, y_max).

Reference:
https://arxiv.org/abs/2109.13488

"""
x_min, y_min, x_max, y_max = bbox[:4]
scale = cols / float(rows)
if method == "largest_box":
x = np.array([x_min, x_max, x_max, x_min]) - 0.5
y = np.array([y_min, y_min, y_max, y_max]) - 0.5
elif method == "ellipse":
w = (x_max - x_min) / 2
h = (y_max - y_min) / 2
data = np.arange(0, 360, dtype=np.float32)
x = w * np.sin(np.radians(data)) + (w + x_min - 0.5)
y = h * np.cos(np.radians(data)) + (h + y_min - 0.5)
else:
raise ValueError(f"Method {method} is not a valid rotation method.")
x_t = (np.cos(angle) * x * scale + np.sin(angle) * y) / scale
y_t = -np.sin(angle) * x * scale + np.cos(angle) * y
x_t = x_t + 0.5
y_t = y_t + 0.5

x_min, x_max = min(x_t), max(x_t)
y_min, y_max = min(y_t), max(y_t)

return x_min, y_min, x_max, y_max


def bbox_transpose (bbox, rows=None, cols=None) [view source on GitHub]¶

Transposes a bounding box along given axis.

Parameters:

Name Type Description
bbox KeypointInternalType

A bounding box (x_min, y_min, x_max, y_max).

rows int | None

Image rows.

cols int | None

Image cols.

Returns:

Type Description
KeypointInternalType

A bounding box tuple (x_min, y_min, x_max, y_max).

Exceptions:

Type Description
ValueError

If axis not equal to 0 or 1.

Source code in albumentations/augmentations/geometric/functional.py
Python
def bbox_transpose(
bbox: KeypointInternalType,
rows: int | None = None,
cols: int | None = None,
) -> KeypointInternalType:
"""Transposes a bounding box along given axis.

Args:
bbox: A bounding box (x_min, y_min, x_max, y_max).
rows: Image rows.
cols: Image cols.

Returns:
A bounding box tuple (x_min, y_min, x_max, y_max).

Raises:
ValueError: If axis not equal to 0 or 1.

"""
x_min, y_min, x_max, y_max = bbox[:4]
return (y_min, x_min, y_max, x_max)


def bbox_vflip (bbox, rows=None, cols=None) [view source on GitHub]¶

Flip a bounding box vertically around the x-axis.

Parameters:

Name Type Description
bbox BoxInternalType

A bounding box (x_min, y_min, x_max, y_max).

rows int | None

Image rows.

cols int | None

Image cols.

Returns:

Type Description
tuple

A bounding box (x_min, y_min, x_max, y_max).

Source code in albumentations/augmentations/geometric/functional.py
Python
def bbox_vflip(bbox: BoxInternalType, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Flip a bounding box vertically around the x-axis.

Args:
bbox: A bounding box (x_min, y_min, x_max, y_max).
rows: Image rows.
cols: Image cols.

Returns:
tuple: A bounding box (x_min, y_min, x_max, y_max).

"""
x_min, y_min, x_max, y_max = bbox[:4]
return x_min, 1 - y_max, x_max, 1 - y_min


def d4 (img, group_member) [view source on GitHub]¶

Applies a D_4 symmetry group transformation to an image array.

This function manipulates an image using transformations such as rotations and flips, corresponding to the D_4 dihedral group symmetry operations. Each transformation is identified by a unique group member code.

• img (np.ndarray): The input image array to transform.
• group_member (D4Type): A string identifier indicating the specific transformation to apply. Valid codes include:
• 'e': Identity (no transformation).
• 'r90': Rotate 90 degrees counterclockwise.
• 'r180': Rotate 180 degrees.
• 'r270': Rotate 270 degrees counterclockwise.
• 'v': Vertical flip.
• 'hvt': Transpose over second diagonal
• 'h': Horizontal flip.
• 't': Transpose (reflect over the main diagonal).
• np.ndarray: The transformed image array.
• ValueError: If an invalid group member is specified.

Examples:

• Rotating an image by 90 degrees: transformed_image = d4(original_image, 'r90')
• Applying a horizontal flip to an image: transformed_image = d4(original_image, 'h')
Source code in albumentations/augmentations/geometric/functional.py
Python
def d4(img: np.ndarray, group_member: D4Type) -> np.ndarray:
"""Applies a D_4 symmetry group transformation to an image array.

This function manipulates an image using transformations such as rotations and flips,
corresponding to the D_4 dihedral group symmetry operations.
Each transformation is identified by a unique group member code.

Parameters:
- img (np.ndarray): The input image array to transform.
- group_member (D4Type): A string identifier indicating the specific transformation to apply. Valid codes include:
- 'e': Identity (no transformation).
- 'r90': Rotate 90 degrees counterclockwise.
- 'r180': Rotate 180 degrees.
- 'r270': Rotate 270 degrees counterclockwise.
- 'v': Vertical flip.
- 'hvt': Transpose over second diagonal
- 'h': Horizontal flip.
- 't': Transpose (reflect over the main diagonal).

Returns:
- np.ndarray: The transformed image array.

Raises:
- ValueError: If an invalid group member is specified.

Examples:
- Rotating an image by 90 degrees:
transformed_image = d4(original_image, 'r90')
- Applying a horizontal flip to an image:
transformed_image = d4(original_image, 'h')
"""
transformations = {
"e": lambda x: x,  # Identity transformation
"r90": lambda x: rot90(x, 1),  # Rotate 90 degrees
"r180": lambda x: rot90(x, 2),  # Rotate 180 degrees
"r270": lambda x: rot90(x, 3),  # Rotate 270 degrees
"v": vflip,  # Vertical flip
"hvt": lambda x: transpose(rot90(x, 2)),  # Reflect over anti-diagonal
"h": hflip,  # Horizontal flip
"t": transpose,  # Transpose (reflect over main diagonal)
}

# Execute the appropriate transformation
if group_member in transformations:
return transformations[group_member](img)

raise ValueError(f"Invalid group member: {group_member}")


def elastic_transform (img, alpha, sigma, interpolation, border_mode, value=None, random_state=None, approximate=False, same_dxdy=False) [view source on GitHub]¶

Apply an elastic transformation to an image.

Source code in albumentations/augmentations/geometric/functional.py
Python
@preserve_channel_dim
def elastic_transform(
img: np.ndarray,
alpha: float,
sigma: float,
interpolation: int,
border_mode: int,
value: ColorType | None = None,
random_state: np.random.RandomState | None = None,
approximate: bool = False,
same_dxdy: bool = False,
) -> np.ndarray:
"""Apply an elastic transformation to an image."""
if approximate:
return elastic_transform_approximate(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
)
return elastic_transform_precise(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
)


def elastic_transform_approximate (img, alpha, sigma, interpolation, border_mode, value, random_state, same_dxdy=False) [view source on GitHub]¶

Apply an approximate elastic transformation to an image.

Source code in albumentations/augmentations/geometric/functional.py
Python
def elastic_transform_approximate(
img: np.ndarray,
alpha: float,
sigma: float,
interpolation: int,
border_mode: int,
value: ColorType | None,
random_state: np.random.RandomState | None,
same_dxdy: bool = False,
) -> np.ndarray:
"""Apply an approximate elastic transformation to an image."""
return elastic_transform_helper(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
kernel_size=(17, 17),
)


def elastic_transform_precise (img, alpha, sigma, interpolation, border_mode, value, random_state, same_dxdy=False) [view source on GitHub]¶

Apply a precise elastic transformation to an image.

This function applies an elastic deformation to the input image using a precise method. The transformation involves creating random displacement fields, smoothing them using Gaussian blur with adaptive kernel size, and then remapping the image according to the smoothed displacement fields.

Parameters:

Name Type Description
img np.ndarray

Input image.

alpha float

Scaling factor for the random displacement fields.

sigma float

Standard deviation for Gaussian blur applied to the displacement fields.

interpolation int

Interpolation method to be used (e.g., cv2.INTER_LINEAR).

border_mode int

Pixel extrapolation method (e.g., cv2.BORDER_CONSTANT).

value ColorType | None

Border value if border_mode is cv2.BORDER_CONSTANT.

random_state np.random.RandomState | None

Random state for reproducibility.

same_dxdy bool

If True, use the same displacement field for both x and y directions.

Returns:

Type Description
np.ndarray

Transformed image with precise elastic deformation applied.

Source code in albumentations/augmentations/geometric/functional.py
Python
def elastic_transform_precise(
img: np.ndarray,
alpha: float,
sigma: float,
interpolation: int,
border_mode: int,
value: ColorType | None,
random_state: np.random.RandomState | None,
same_dxdy: bool = False,
) -> np.ndarray:
"""Apply a precise elastic transformation to an image.

This function applies an elastic deformation to the input image using a precise method.
The transformation involves creating random displacement fields, smoothing them using Gaussian
blur with adaptive kernel size, and then remapping the image according to the smoothed displacement fields.

Args:
img (np.ndarray): Input image.
alpha (float): Scaling factor for the random displacement fields.
sigma (float): Standard deviation for Gaussian blur applied to the displacement fields.
interpolation (int): Interpolation method to be used (e.g., cv2.INTER_LINEAR).
border_mode (int): Pixel extrapolation method (e.g., cv2.BORDER_CONSTANT).
value (ColorType | None): Border value if border_mode is cv2.BORDER_CONSTANT.
random_state (np.random.RandomState | None): Random state for reproducibility.
same_dxdy (bool, optional): If True, use the same displacement field for both x and y directions.

Returns:
np.ndarray: Transformed image with precise elastic deformation applied.
"""
return elastic_transform_helper(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
kernel_size=(0, 0),
)


def find_keypoint (position, distance_map, threshold, inverted) [view source on GitHub]¶

Determine if a valid keypoint can be found at the given position.

Source code in albumentations/augmentations/geometric/functional.py
Python
def find_keypoint(
position: tuple[int, int],
distance_map: np.ndarray,
threshold: float | None,
inverted: bool,
) -> tuple[float, float] | None:
"""Determine if a valid keypoint can be found at the given position."""
y, x = position
value = distance_map[y, x]
if not inverted and threshold is not None and value >= threshold:
return None
if inverted and threshold is not None and value < threshold:
return None
return float(x), float(y)


def flip_bboxes (bboxes, flip_horizontal=False, flip_vertical=False, rows=1, cols=1) [view source on GitHub]¶

Flip bounding boxes horizontally and/or vertically.

Parameters:

Name Type Description
bboxes np.ndarray

Array of bounding boxes with shape (n, m) where each row is [x_min, y_min, x_max, y_max, ...].

flip_horizontal bool

Whether to flip horizontally.

flip_vertical bool

Whether to flip vertically.

rows int

Height of the image.

cols int

Width of the image.

Returns:

Type Description
np.ndarray

Flipped bounding boxes.

Source code in albumentations/augmentations/geometric/functional.py
Python
def flip_bboxes(
bboxes: np.ndarray,
flip_horizontal: bool = False,
flip_vertical: bool = False,
rows: int = 1,
cols: int = 1,
) -> np.ndarray:
"""Flip bounding boxes horizontally and/or vertically.

Args:
bboxes (np.ndarray): Array of bounding boxes with shape (n, m) where each row is
[x_min, y_min, x_max, y_max, ...].
flip_horizontal (bool): Whether to flip horizontally.
flip_vertical (bool): Whether to flip vertically.
rows (int): Height of the image.
cols (int): Width of the image.

Returns:
np.ndarray: Flipped bounding boxes.
"""
flipped_bboxes = bboxes.copy()
if flip_horizontal:
flipped_bboxes[:, [0, 2]] = cols - flipped_bboxes[:, [2, 0]]
if flip_vertical:
flipped_bboxes[:, [1, 3]] = rows - flipped_bboxes[:, [3, 1]]
return flipped_bboxes


def from_distance_maps (distance_maps, inverted, if_not_found_coords, threshold) [view source on GitHub]¶

Convert outputs of to_distance_maps to KeypointsOnImage. This is the inverse of to_distance_maps.

Source code in albumentations/augmentations/geometric/functional.py
Python
def from_distance_maps(
distance_maps: np.ndarray,
inverted: bool,
if_not_found_coords: Sequence[int] | dict[str, Any] | None,
threshold: float | None,
) -> list[tuple[float, float]]:
"""Convert outputs of to_distance_maps to KeypointsOnImage.
This is the inverse of to_distance_maps.
"""
if distance_maps.ndim != NUM_MULTI_CHANNEL_DIMENSIONS:
msg = f"Expected three-dimensional input, got {distance_maps.ndim} dimensions and shape {distance_maps.shape}."
raise ValueError(msg)
height, width, nb_keypoints = distance_maps.shape

drop_if_not_found, if_not_found_x, if_not_found_y = validate_if_not_found_coords(if_not_found_coords)

keypoints = []
for i in range(nb_keypoints):
hitidx_flat = np.argmax(distance_maps[..., i]) if inverted else np.argmin(distance_maps[..., i])
hitidx_ndim = np.unravel_index(hitidx_flat, (height, width))
keypoint = find_keypoint(hitidx_ndim, distance_maps[:, :, i], threshold, inverted)
if keypoint:
keypoints.append(keypoint)
elif not drop_if_not_found:
keypoints.append((if_not_found_x, if_not_found_y))

return keypoints


def generate_reflected_bboxes (bboxes, grid_dims, rows, cols) [view source on GitHub]¶

Generate reflected bounding boxes for the entire reflection grid.

Parameters:

Name Type Description
bboxes np.ndarray

Original bounding boxes.

grid_dims dict[str, tuple[int, int]]

Grid dimensions and original position.

rows int

Height of the original image.

cols int

Width of the original image.

Returns:

Type Description
np.ndarray

Array of reflected and shifted bounding boxes for the entire grid.

Source code in albumentations/augmentations/geometric/functional.py
Python
def generate_reflected_bboxes(
bboxes: np.ndarray,
grid_dims: dict[str, tuple[int, int]],
rows: int,
cols: int,
) -> np.ndarray:
"""Generate reflected bounding boxes for the entire reflection grid.

Args:
bboxes (np.ndarray): Original bounding boxes.
grid_dims (dict[str, tuple[int, int]]): Grid dimensions and original position.
rows (int): Height of the original image.
cols (int): Width of the original image.

Returns:
np.ndarray: Array of reflected and shifted bounding boxes for the entire grid.
"""
grid_rows, grid_cols = grid_dims["grid_shape"]
original_row, original_col = grid_dims["original_position"]

# Prepare flipped versions of bboxes
bboxes_hflipped = flip_bboxes(bboxes, flip_horizontal=True, rows=rows, cols=cols)
bboxes_vflipped = flip_bboxes(bboxes, flip_vertical=True, rows=rows, cols=cols)
bboxes_hvflipped = flip_bboxes(bboxes, flip_horizontal=True, flip_vertical=True, rows=rows, cols=cols)

# Shift all versions to the original position
shift_vector = np.array([original_col * cols, original_row * rows, original_col * cols, original_row * rows])
bboxes_shifted = shift_bboxes(bboxes, shift_vector)
bboxes_hflipped_shifted = shift_bboxes(bboxes_hflipped, shift_vector)
bboxes_vflipped_shifted = shift_bboxes(bboxes_vflipped, shift_vector)
bboxes_hvflipped_shifted = shift_bboxes(bboxes_hvflipped, shift_vector)

new_bboxes = []

for grid_row in range(grid_rows):
for grid_col in range(grid_cols):
# Determine which version of bboxes to use based on grid position
if (grid_row - original_row) % 2 == 0 and (grid_col - original_col) % 2 == 0:
current_bboxes = bboxes_shifted
elif (grid_row - original_row) % 2 == 0:
current_bboxes = bboxes_hflipped_shifted
elif (grid_col - original_col) % 2 == 0:
current_bboxes = bboxes_vflipped_shifted
else:
current_bboxes = bboxes_hvflipped_shifted

# Shift to the current grid cell
cell_shift = np.array(
[
(grid_col - original_col) * cols,
(grid_row - original_row) * rows,
(grid_col - original_col) * cols,
(grid_row - original_row) * rows,
],
)
shifted_bboxes = shift_bboxes(current_bboxes, cell_shift)

new_bboxes.append(shifted_bboxes)

return np.vstack(new_bboxes)


def get_pad_grid_dimensions (pad_top, pad_bottom, pad_left, pad_right, rows, cols) [view source on GitHub]¶

Calculate the dimensions of the grid needed for reflection padding and the position of the original image.

Parameters:

Name Type Description
pad_top int

Number of pixels to pad above the image.

pad_bottom int

Number of pixels to pad below the image.

pad_left int

Number of pixels to pad to the left of the image.

pad_right int

Number of pixels to pad to the right of the image.

rows int

Height of the original image in pixels.

cols int

Width of the original image in pixels.

Returns:

Type Description
dict[str, tuple[int, int]]

A dictionary containing: - 'grid_shape': A tuple (grid_rows, grid_cols) where: - grid_rows (int): Number of times the image needs to be repeated vertically. - grid_cols (int): Number of times the image needs to be repeated horizontally. - 'original_position': A tuple (original_row, original_col) where: - original_row (int): Row index of the original image in the grid. - original_col (int): Column index of the original image in the grid.

Source code in albumentations/augmentations/geometric/functional.py
Python
def get_pad_grid_dimensions(
rows: int,
cols: int,
) -> dict[str, tuple[int, int]]:
"""Calculate the dimensions of the grid needed for reflection padding and the position of the original image.

Args:
pad_left (int): Number of pixels to pad to the left of the image.
pad_right (int): Number of pixels to pad to the right of the image.
rows (int): Height of the original image in pixels.
cols (int): Width of the original image in pixels.

Returns:
dict[str, tuple[int, int]]: A dictionary containing:
- 'grid_shape': A tuple (grid_rows, grid_cols) where:
- grid_rows (int): Number of times the image needs to be repeated vertically.
- grid_cols (int): Number of times the image needs to be repeated horizontally.
- 'original_position': A tuple (original_row, original_col) where:
- original_row (int): Row index of the original image in the grid.
- original_col (int): Column index of the original image in the grid.
"""

return {"grid_shape": (grid_rows, grid_cols), "original_position": (original_row, original_col)}


def keypoint_d4 (keypoint, group_member, rows, cols, ** params) [view source on GitHub]¶

Applies a D_4 symmetry group transformation to a keypoint.

This function adjusts a keypoint's coordinates according to the specified D_4 group transformation, which includes rotations and reflections suitable for image processing tasks. These transformations account for the dimensions of the image to ensure the keypoint remains within its boundaries.

• keypoint (KeypointInternalType): The keypoint to transform. T his should be a structure or tuple specifying coordinates like (x, y, [additional parameters]).
• group_member (D4Type): A string identifier for the D_4 group transformation to apply. Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hv', 'h', 't'.
• rows (int): The number of rows in the image.
• cols (int): The number of columns in the image.
• params (Any): Not used
• KeypointInternalType: The transformed keypoint.
• ValueError: If an invalid group member is specified, indicating that the specified transformation does not exist.

Examples:

• Rotating a keypoint by 90 degrees in a 100x100 image: keypoint_d4((50, 30), 'r90', 100, 100) This would move the keypoint from (50, 30) to (70, 50) assuming standard coordinate transformations.
Source code in albumentations/augmentations/geometric/functional.py
Python
def keypoint_d4(
keypoint: KeypointInternalType,
group_member: D4Type,
rows: int,
cols: int,
**params: Any,
) -> KeypointInternalType:
"""Applies a D_4 symmetry group transformation to a keypoint.

This function adjusts a keypoint's coordinates according to the specified D_4 group transformation,
which includes rotations and reflections suitable for image processing tasks. These transformations account
for the dimensions of the image to ensure the keypoint remains within its boundaries.

Parameters:
- keypoint (KeypointInternalType): The keypoint to transform. T
his should be a structure or tuple specifying coordinates
- group_member (D4Type): A string identifier for the D_4 group transformation to apply.
Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hv', 'h', 't'.
- rows (int): The number of rows in the image.
- cols (int): The number of columns in the image.
- params (Any): Not used

Returns:
- KeypointInternalType: The transformed keypoint.

Raises:
- ValueError: If an invalid group member is specified, indicating that the specified transformation does not exist.

Examples:
- Rotating a keypoint by 90 degrees in a 100x100 image:
keypoint_d4((50, 30), 'r90', 100, 100)
This would move the keypoint from (50, 30) to (70, 50) assuming standard coordinate transformations.
"""
transformations = {
"e": lambda x: x,  # Identity transformation
"r90": lambda x: keypoint_rot90(x, 1, rows, cols),  # Rotate 90 degrees
"r180": lambda x: keypoint_rot90(x, 2, rows, cols),  # Rotate 180 degrees
"r270": lambda x: keypoint_rot90(x, 3, rows, cols),  # Rotate 270 degrees
"v": lambda x: keypoint_vflip(x, rows, cols),  # Vertical flip
"hvt": lambda x: keypoint_transpose(keypoint_rot90(x, 2, rows, cols), rows, cols),  # Reflect over anti diagonal
"h": lambda x: keypoint_hflip(x, rows, cols),  # Horizontal flip
"t": lambda x: keypoint_transpose(x, rows, cols),  # Transpose (reflect over main diagonal)
}
# Execute the appropriate transformation
if group_member in transformations:
return transformations[group_member](keypoint)

raise ValueError(f"Invalid group member: {group_member}")


def keypoint_flip (keypoint, d, rows, cols) [view source on GitHub]¶

Flip a keypoint either vertically, horizontally or both depending on the value of d.

Parameters:

Name Type Description
keypoint KeypointInternalType

A keypoint (x, y, angle, scale).

d int

Number of flip. Must be -1, 0 or 1: * 0 - vertical flip, * 1 - horizontal flip, * -1 - vertical and horizontal flip.

rows int

Image height.

cols int

Image width.

Returns:

Type Description
KeypointInternalType

A keypoint (x, y, angle, scale).

Exceptions:

Type Description
ValueError

if value of d is not -1, 0 or 1.

Source code in albumentations/augmentations/geometric/functional.py
Python
@angle_2pi_range
def keypoint_flip(keypoint: KeypointInternalType, d: int, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint either vertically, horizontally or both depending on the value of d.

Args:
keypoint: A keypoint (x, y, angle, scale).
d: Number of flip. Must be -1, 0 or 1:
* 0 - vertical flip,
* 1 - horizontal flip,
* -1 - vertical and horizontal flip.
rows: Image height.
cols: Image width.

Returns:
A keypoint (x, y, angle, scale).

Raises:
ValueError: if value of d is not -1, 0 or 1.

"""
if d == 0:
keypoint = keypoint_vflip(keypoint, rows, cols)
elif d == 1:
keypoint = keypoint_hflip(keypoint, rows, cols)
elif d == -1:
keypoint = keypoint_hflip(keypoint, rows, cols)
keypoint = keypoint_vflip(keypoint, rows, cols)
else:
raise ValueError(f"Invalid d value {d}. Valid values are -1, 0 and 1")
return keypoint


def keypoint_hflip (keypoint, rows, cols) [view source on GitHub]¶

Flip a keypoint horizontally around the y-axis.

Parameters:

Name Type Description
keypoint KeypointInternalType

A keypoint (x, y, angle, scale).

rows int

Image height.

cols int

Image width.

Returns:

Type Description
KeypointInternalType

A keypoint (x, y, angle, scale).

Source code in albumentations/augmentations/geometric/functional.py
Python
@angle_2pi_range
def keypoint_hflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint horizontally around the y-axis.

Args:
keypoint: A keypoint (x, y, angle, scale).
rows: Image height.
cols: Image width.

Returns:
A keypoint (x, y, angle, scale).

"""
x, y, angle, scale = keypoint[:4]
angle = math.pi - angle
return (cols - 1) - x, y, angle, scale


def keypoint_rot90 (keypoint, factor, rows, cols, ** params) [view source on GitHub]¶

Rotate a keypoint by 90 degrees counter-clockwise (CCW) a specified number of times.

Parameters:

Name Type Description
keypoint KeypointInternalType

A keypoint in the format (x, y, angle, scale).

factor int

The number of 90 degree CCW rotations to apply. Must be in the range [0, 3].

rows int

The height of the image the keypoint belongs to.

cols int

The width of the image the keypoint belongs to.

**params Any

Returns:

Type Description
KeypointInternalType

The rotated keypoint in the format (x, y, angle, scale).

Exceptions:

Type Description
ValueError

If the factor is not in the set {0, 1, 2, 3}.

Source code in albumentations/augmentations/geometric/functional.py
Python
@angle_2pi_range
def keypoint_rot90(
keypoint: KeypointInternalType,
factor: int,
rows: int,
cols: int,
**params: Any,
) -> KeypointInternalType:
"""Rotate a keypoint by 90 degrees counter-clockwise (CCW) a specified number of times.

Args:
keypoint (KeypointInternalType): A keypoint in the format (x, y, angle, scale).
factor (int): The number of 90 degree CCW rotations to apply. Must be in the range [0, 3].
rows (int): The height of the image the keypoint belongs to.
cols (int): The width of the image the keypoint belongs to.

Returns:
KeypointInternalType: The rotated keypoint in the format (x, y, angle, scale).

Raises:
ValueError: If the factor is not in the set {0, 1, 2, 3}.
"""
x, y, angle, scale = keypoint

if factor not in {0, 1, 2, 3}:
raise ValueError("Parameter factor must be in set {0, 1, 2, 3}")

if factor == 1:
x, y, angle = y, (cols - 1) - x, angle - math.pi / 2
elif factor == ROT90_180_FACTOR:
x, y, angle = (cols - 1) - x, (rows - 1) - y, angle - math.pi
elif factor == ROT90_270_FACTOR:
x, y, angle = (rows - 1) - y, x, angle + math.pi / 2

return x, y, angle, scale


def keypoint_rotate (keypoint, angle, rows, cols, ** params) [view source on GitHub]¶

Rotate a keypoint by a specified angle.

Parameters:

Name Type Description
keypoint KeypointInternalType

A keypoint in the format (x, y, angle, scale).

angle float

The angle by which to rotate the keypoint, in degrees.

rows int

The height of the image the keypoint belongs to.

cols int

The width of the image the keypoint belongs to.

**params Any

Returns:

Type Description
KeypointInternalType

The rotated keypoint in the format (x, y, angle, scale).

Note

The rotation is performed around the center of the image.

Source code in albumentations/augmentations/geometric/functional.py
Python
@angle_2pi_range
def keypoint_rotate(
keypoint: KeypointInternalType,
angle: float,
rows: int,
cols: int,
**params: Any,
) -> KeypointInternalType:
"""Rotate a keypoint by a specified angle.

Args:
keypoint (KeypointInternalType): A keypoint in the format (x, y, angle, scale).
angle (float): The angle by which to rotate the keypoint, in degrees.
rows (int): The height of the image the keypoint belongs to.
cols (int): The width of the image the keypoint belongs to.

Returns:
KeypointInternalType: The rotated keypoint in the format (x, y, angle, scale).

Note:
The rotation is performed around the center of the image.
"""
image_center = center(cols, rows)
matrix = cv2.getRotationMatrix2D(image_center, angle, 1.0)
x, y, a, s = keypoint[:4]
x, y = cv2.transform(np.array([[[x, y]]]), matrix).squeeze()
return x, y, a + math.radians(angle), s


def keypoint_scale (keypoint, scale_x, scale_y) [view source on GitHub]¶

Scales a keypoint by scale_x and scale_y.

Parameters:

Name Type Description
keypoint KeypointInternalType

A keypoint (x, y, angle, scale).

scale_x float

Scale coefficient x-axis.

scale_y float

Scale coefficient y-axis.

Returns:

Type Description
KeypointInternalType

A keypoint (x, y, angle, scale).

Source code in albumentations/augmentations/geometric/functional.py
Python
def keypoint_scale(keypoint: KeypointInternalType, scale_x: float, scale_y: float) -> KeypointInternalType:
"""Scales a keypoint by scale_x and scale_y.

Args:
keypoint: A keypoint (x, y, angle, scale).
scale_x: Scale coefficient x-axis.
scale_y: Scale coefficient y-axis.

Returns:
A keypoint (x, y, angle, scale).

"""
x, y, angle, scale = keypoint[:4]
return x * scale_x, y * scale_y, angle, scale * max(scale_x, scale_y)


def keypoint_transpose (keypoint, rows, cols) [view source on GitHub]¶

Transposes a keypoint along a specified axis: main diagonal

Parameters:

Name Type Description
keypoint KeypointInternalType

A keypoint (x, y, angle, scale).

rows int

Total number of rows (height) in the image.

cols int

Total number of columns (width) in the image.

Returns:

Type Description
KeypointInternalType

A transformed keypoint (x, y, angle, scale).

Exceptions:

Type Description
ValueError

If axis is not 0 or 1.

Source code in albumentations/augmentations/geometric/functional.py
Python
@angle_2pi_range
def keypoint_transpose(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Transposes a keypoint along a specified axis: main diagonal

Args:
keypoint: A keypoint (x, y, angle, scale).
rows: Total number of rows (height) in the image.
cols: Total number of columns (width) in the image.

Returns:
A transformed keypoint (x, y, angle, scale).

Raises:
ValueError: If axis is not 0 or 1.

"""
x, y, angle, scale = keypoint[:4]

# Transpose over the main diagonal: swap x and y.
new_x, new_y = y, x
# Adjust angle to reflect the coordinate swap.
angle = np.pi / 2 - angle if angle <= np.pi else 3 * np.pi / 2 - angle

return new_x, new_y, angle, scale


def keypoint_vflip (keypoint, rows, cols) [view source on GitHub]¶

Flip a keypoint vertically around the x-axis.

Parameters:

Name Type Description
keypoint KeypointInternalType

A keypoint (x, y, angle, scale).

rows int

Image height.

cols int

Image width.

Returns:

Type Description
tuple

A keypoint (x, y, angle, scale).

Source code in albumentations/augmentations/geometric/functional.py
Python
@angle_2pi_range
def keypoint_vflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint vertically around the x-axis.

Args:
keypoint: A keypoint (x, y, angle, scale).
rows: Image height.
cols: Image width.

Returns:
tuple: A keypoint (x, y, angle, scale).

"""
x, y, angle, scale = keypoint[:4]
angle = -angle
return x, (rows - 1) - y, angle, scale


def optical_distortion (img, k, dx, dy, interpolation, border_mode, value=None) [view source on GitHub]¶

Barrel / pincushion distortion. Unconventional augment.

Source code in albumentations/augmentations/geometric/functional.py
Python
@preserve_channel_dim
def optical_distortion(
img: np.ndarray,
k: int,
dx: int,
dy: int,
interpolation: int,
border_mode: int,
value: ColorType | None = None,
) -> np.ndarray:
"""Barrel / pincushion distortion. Unconventional augment.

Reference:
|  https://stackoverflow.com/questions/6199636/formulas-for-barrel-pincushion-distortion
|  https://stackoverflow.com/questions/10364201/image-transformation-in-opencv
|  https://stackoverflow.com/questions/2477774/correcting-fisheye-distortion-programmatically
"""
height, width = img.shape[:2]

fx = width
fy = height

cx = width * 0.5 + dx
cy = height * 0.5 + dy

camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)

distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
map1, map2 = cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)
return cv2.remap(img, map1, map2, interpolation=interpolation, borderMode=border_mode, borderValue=value)


def rotation2d_matrix_to_euler_angles (matrix, y_up) [view source on GitHub]¶

matrix (np.ndarray): Rotation matrix y_up (bool): is Y axis looks up or down

Source code in albumentations/augmentations/geometric/functional.py
Python
def rotation2d_matrix_to_euler_angles(matrix: np.ndarray, y_up: bool) -> float:
"""Args:
matrix (np.ndarray): Rotation matrix
y_up (bool): is Y axis looks up or down

"""
if y_up:
return np.arctan2(matrix[1, 0], matrix[0, 0])
return np.arctan2(-matrix[1, 0], matrix[0, 0])


def shift_bboxes (bboxes, shift_vector) [view source on GitHub]¶

Shift bounding boxes by a given vector.

Parameters:

Name Type Description
bboxes np.ndarray

Array of bounding boxes with shape (n, m) where n is the number of bboxes and m >= 4. The first 4 columns are [x_min, y_min, x_max, y_max].

shift_vector np.ndarray

Vector to shift the bounding boxes by, with shape (4,) for [shift_x, shift_y, shift_x, shift_y].

Returns:

Type Description
np.ndarray

Shifted bounding boxes with the same shape as input.

Source code in albumentations/augmentations/geometric/functional.py
Python
def shift_bboxes(bboxes: np.ndarray, shift_vector: np.ndarray) -> np.ndarray:
"""Shift bounding boxes by a given vector.

Args:
bboxes (np.ndarray): Array of bounding boxes with shape (n, m) where n is the number of bboxes
and m >= 4. The first 4 columns are [x_min, y_min, x_max, y_max].
shift_vector (np.ndarray): Vector to shift the bounding boxes by, with shape (4,) for
[shift_x, shift_y, shift_x, shift_y].

Returns:
np.ndarray: Shifted bounding boxes with the same shape as input.
"""
# Create a copy of the input array to avoid modifying it in-place
shifted_bboxes = bboxes.copy()

# Add the shift vector to the first 4 columns
shifted_bboxes[:, :4] += shift_vector

return shifted_bboxes


def to_distance_maps (keypoints, height, width, inverted=False) [view source on GitHub]¶

Generate a (H,W,N) array of distance maps for N keypoints.

The n-th distance map contains at every location (y, x) the euclidean distance to the n-th keypoint.

This function can be used as a helper when augmenting keypoints with a method that only supports the augmentation of images.

Parameters:

Name Type Description
keypoints Sequence[tuple[float, float]]

keypoint coordinates

height int

image height

width int

image width

inverted bool

If True, inverted distance maps are returned where each distance value d is replaced by d/(d+1), i.e. the distance maps have values in the range (0.0, 1.0] with 1.0 denoting exactly the position of the respective keypoint.

Returns:

Type Description
np.ndarray

(H, W, N) ndarray A float32 array containing N distance maps for N keypoints. Each location (y, x, n) in the array denotes the euclidean distance at (y, x) to the n-th keypoint. If inverted is True, the distance d is replaced by d/(d+1). The height and width of the array match the height and width in KeypointsOnImage.shape.

Source code in albumentations/augmentations/geometric/functional.py
Python
def to_distance_maps(
keypoints: Sequence[tuple[float, float]],
height: int,
width: int,
inverted: bool = False,
) -> np.ndarray:
"""Generate a (H,W,N) array of distance maps for N keypoints.

The n-th distance map contains at every location (y, x) the
euclidean distance to the n-th keypoint.

This function can be used as a helper when augmenting keypoints with a
method that only supports the augmentation of images.

Args:
keypoints: keypoint coordinates
height: image height
width: image width
inverted (bool): If True, inverted distance maps are returned where each
distance value d is replaced by d/(d+1), i.e. the distance
maps have values in the range (0.0, 1.0] with 1.0 denoting
exactly the position of the respective keypoint.

Returns:
(H, W, N) ndarray
A float32 array containing N distance maps for N
keypoints. Each location (y, x, n) in the array denotes the
euclidean distance at (y, x) to the n-th keypoint.
If inverted is True, the distance d is replaced
by d/(d+1). The height and width of the array match the
height and width in KeypointsOnImage.shape.

"""
distance_maps = np.zeros((height, width, len(keypoints)), dtype=np.float32)

yy = np.arange(0, height)
xx = np.arange(0, width)
grid_xx, grid_yy = np.meshgrid(xx, yy)

for i, (x, y) in enumerate(keypoints):
distance_maps[:, :, i] = (grid_xx - x) ** 2 + (grid_yy - y) ** 2

distance_maps = np.sqrt(distance_maps)
if inverted:
return 1 / (distance_maps + 1)
return distance_maps


def transpose (img) [view source on GitHub]¶

Transposes the first two dimensions of an array of any dimensionality. Retains the order of any additional dimensions.

Parameters:

Name Type Description
img np.ndarray

Input array.

Returns:

Type Description
np.ndarray

Transposed array.

Source code in albumentations/augmentations/geometric/functional.py
Python
def transpose(img: np.ndarray) -> np.ndarray:
"""Transposes the first two dimensions of an array of any dimensionality.
Retains the order of any additional dimensions.

Args:
img (np.ndarray): Input array.

Returns:
np.ndarray: Transposed array.
"""
# Generate the new axes order
new_axes = list(range(img.ndim))
new_axes[0], new_axes[1] = 1, 0  # Swap the first two dimensions

# Transpose the array using the new axes order
return img.transpose(new_axes)


def validate_bboxes (bboxes, image_shape) [view source on GitHub]¶

Validate bounding boxes and remove invalid ones.

Parameters:

Name Type Description
bboxes np.ndarray

Array of bounding boxes with shape (n, 4) where each row is [x_min, y_min, x_max, y_max].

image_shape tuple[int, int]

Shape of the image as (height, width).

Returns:

Type Description
np.ndarray

Array of valid bounding boxes, potentially with fewer boxes than the input.

Examples:

Python
>>> bboxes = np.array([[10, 20, 30, 40], [-10, -10, 5, 5], [100, 100, 120, 120]])
>>> valid_bboxes = validate_bboxes(bboxes, (100, 100))
>>> print(valid_bboxes)
[[10 20 30 40]]

Source code in albumentations/augmentations/geometric/functional.py
Python
def validate_bboxes(bboxes: np.ndarray, image_shape: tuple[int, int]) -> np.ndarray:
"""Validate bounding boxes and remove invalid ones.

Args:
bboxes (np.ndarray): Array of bounding boxes with shape (n, 4) where each row is [x_min, y_min, x_max, y_max].
image_shape (tuple[int, int]): Shape of the image as (height, width).

Returns:
np.ndarray: Array of valid bounding boxes, potentially with fewer boxes than the input.

Example:
>>> bboxes = np.array([[10, 20, 30, 40], [-10, -10, 5, 5], [100, 100, 120, 120]])
>>> valid_bboxes = validate_bboxes(bboxes, (100, 100))
>>> print(valid_bboxes)
[[10 20 30 40]]
"""
rows, cols = image_shape

x_min, y_min, x_max, y_max = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]

valid_indices = (x_max > 0) & (y_max > 0) & (x_min < cols) & (y_min < rows)

return bboxes[valid_indices]


def validate_if_not_found_coords (if_not_found_coords) [view source on GitHub]¶

Validate and process if_not_found_coords parameter.

Source code in albumentations/augmentations/geometric/functional.py
Python
def validate_if_not_found_coords(
if_not_found_coords: Sequence[int] | dict[str, Any] | None,
) -> tuple[bool, int, int]:
"""Validate and process if_not_found_coords parameter."""
if if_not_found_coords is None:
return True, -1, -1
if isinstance(if_not_found_coords, (tuple, list)):
if len(if_not_found_coords) != TWO:
msg = "Expected tuple/list 'if_not_found_coords' to contain exactly two entries."
raise ValueError(msg)
return False, if_not_found_coords[0], if_not_found_coords[1]
if isinstance(if_not_found_coords, dict):
return False, if_not_found_coords["x"], if_not_found_coords["y"]

msg = "Expected if_not_found_coords to be None, tuple, list, or dict."
raise ValueError(msg)