Geometric functional transforms (augmentations.geometric.functional)¶
def bbox_d4 (bbox, group_member, rows=None, cols=None)
[view source on GitHub]¶
Applies a D_4
symmetry group transformation to a bounding box.
The function transforms a bounding box according to the specified group member from the D_4
group. These transformations include rotations and reflections, specified to work on an image's bounding box given its dimensions.
- bbox (BoxInternalType): The bounding box to transform. This should be a structure specifying coordinates like (xmin, ymin, xmax, ymax).
- group_member (D4Type): A string identifier for the
D_4
group transformation to apply. Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hvt', 'h', 't'. - rows (int): The number of rows in the image, used to adjust transformations that depend on image dimensions.
- cols (int): The number of columns in the image, used for the same purposes as rows.
- BoxInternalType: The transformed bounding box.
- ValueError: If an invalid group member is specified.
Examples:
- Applying a 90-degree rotation:
bbox_d4((10, 20, 110, 120), 'r90', 100, 100)
This would rotate the bounding box 90 degrees within a 100x100 image.
Source code in albumentations/augmentations/geometric/functional.py
def bbox_d4(
bbox: BoxInternalType,
group_member: D4Type,
rows: int | None = None,
cols: int | None = None,
) -> BoxInternalType:
"""Applies a `D_4` symmetry group transformation to a bounding box.
The function transforms a bounding box according to the specified group member from the `D_4` group.
These transformations include rotations and reflections, specified to work on an image's bounding box given
its dimensions.
Parameters:
- bbox (BoxInternalType): The bounding box to transform. This should be a structure specifying coordinates
like (xmin, ymin, xmax, ymax).
- group_member (D4Type): A string identifier for the `D_4` group transformation to apply.
Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hvt', 'h', 't'.
- rows (int): The number of rows in the image, used to adjust transformations that depend on image dimensions.
- cols (int): The number of columns in the image, used for the same purposes as rows.
Returns:
- BoxInternalType: The transformed bounding box.
Raises:
- ValueError: If an invalid group member is specified.
Examples:
- Applying a 90-degree rotation:
`bbox_d4((10, 20, 110, 120), 'r90', 100, 100)`
This would rotate the bounding box 90 degrees within a 100x100 image.
"""
transformations = {
"e": lambda x: x, # Identity transformation
"r90": lambda x: bbox_rot90(x, 1), # Rotate 90 degrees
"r180": lambda x: bbox_rot90(x, 2), # Rotate 180 degrees
"r270": lambda x: bbox_rot90(x, 3), # Rotate 270 degrees
"v": lambda x: bbox_vflip(x, rows, cols), # Vertical flip
"hvt": lambda x: bbox_transpose(bbox_rot90(x, 2)), # Reflect over anti-diagonal
"h": lambda x: bbox_hflip(x), # Horizontal flip
"t": lambda x: bbox_transpose(x), # Transpose (reflect over main diagonal)
}
# Execute the appropriate transformation
if group_member in transformations:
return transformations[group_member](bbox)
raise ValueError(f"Invalid group member: {group_member}")
def bbox_flip (bbox, d, rows=None, cols=None)
[view source on GitHub]¶
Flip a bounding box either vertically, horizontally or both depending on the value of d
.
Parameters:
Name | Type | Description |
---|---|---|
bbox | BoxInternalType | A bounding box |
d | int | dimension. 0 for vertical flip, 1 for horizontal, -1 for transpose |
rows | int | None | Image rows. |
cols | int | None | Image cols. |
Returns:
Type | Description |
---|---|
BoxInternalType | A bounding box |
Exceptions:
Type | Description |
---|---|
ValueError | if value of |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_flip(bbox: BoxInternalType, d: int, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Flip a bounding box either vertically, horizontally or both depending on the value of `d`.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
d: dimension. 0 for vertical flip, 1 for horizontal, -1 for transpose
rows: Image rows.
cols: Image cols.
Returns:
A bounding box `(x_min, y_min, x_max, y_max)`.
Raises:
ValueError: if value of `d` is not -1, 0 or 1.
"""
if d == 0:
bbox = bbox_vflip(bbox)
elif d == 1:
bbox = bbox_hflip(bbox)
elif d == -1:
bbox = bbox_hflip(bbox)
bbox = bbox_vflip(bbox)
else:
raise ValueError(f"Invalid d value {d}. Valid values are -1, 0 and 1")
return bbox
def bbox_hflip (bbox, rows=None, cols=None)
[view source on GitHub]¶
Flip a bounding box horizontally around the y-axis.
Parameters:
Name | Type | Description |
---|---|---|
bbox | BoxInternalType | A bounding box |
rows | int | None | Image rows. |
cols | int | None | Image cols. |
Returns:
Type | Description |
---|---|
BoxInternalType | A bounding box |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_hflip(bbox: BoxInternalType, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Flip a bounding box horizontally around the y-axis.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image rows.
cols: Image cols.
Returns:
A bounding box `(x_min, y_min, x_max, y_max)`.
"""
x_min, y_min, x_max, y_max = bbox[:4]
return 1 - x_max, y_min, 1 - x_min, y_max
def bbox_rot90 (bbox, factor, rows=None, cols=None)
[view source on GitHub]¶
Rotates a bounding box by 90 degrees CCW (see np.rot90)
Parameters:
Name | Type | Description |
---|---|---|
bbox | BoxInternalType | A bounding box tuple (x_min, y_min, x_max, y_max). |
factor | int | Number of CCW rotations. Must be in set {0, 1, 2, 3} See np.rot90. |
rows | int | None | Image rows. |
cols | int | None | Image cols. |
Returns:
Type | Description |
---|---|
tuple | A bounding box tuple (x_min, y_min, x_max, y_max). |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_rot90(bbox: BoxInternalType, factor: int, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Rotates a bounding box by 90 degrees CCW (see np.rot90)
Args:
bbox: A bounding box tuple (x_min, y_min, x_max, y_max).
factor: Number of CCW rotations. Must be in set {0, 1, 2, 3} See np.rot90.
rows: Image rows.
cols: Image cols.
Returns:
tuple: A bounding box tuple (x_min, y_min, x_max, y_max).
"""
if factor not in {0, 1, 2, 3}:
msg = "Parameter n must be in set {0, 1, 2, 3}"
raise ValueError(msg)
x_min, y_min, x_max, y_max = bbox[:4]
if factor == 1:
bbox = y_min, 1 - x_max, y_max, 1 - x_min
elif factor == ROT90_180_FACTOR:
bbox = 1 - x_max, 1 - y_max, 1 - x_min, 1 - y_min
elif factor == ROT90_270_FACTOR:
bbox = 1 - y_max, x_min, 1 - y_min, x_max
return bbox
def bbox_rotate (bbox, angle, method, rows, cols)
[view source on GitHub]¶
Rotates a bounding box by angle degrees.
Parameters:
Name | Type | Description |
---|---|---|
bbox | BoxInternalType | A bounding box |
angle | float | Angle of rotation in degrees. |
method | str | Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box". |
rows | int | Image rows. |
cols | int | Image cols. |
Returns:
Type | Description |
---|---|
BoxInternalType | A bounding box |
Reference
Source code in albumentations/augmentations/geometric/functional.py
def bbox_rotate(bbox: BoxInternalType, angle: float, method: str, rows: int, cols: int) -> BoxInternalType:
"""Rotates a bounding box by angle degrees.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
angle: Angle of rotation in degrees.
method: Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box".
rows: Image rows.
cols: Image cols.
Returns:
A bounding box `(x_min, y_min, x_max, y_max)`.
Reference:
https://arxiv.org/abs/2109.13488
"""
x_min, y_min, x_max, y_max = bbox[:4]
scale = cols / float(rows)
if method == "largest_box":
x = np.array([x_min, x_max, x_max, x_min]) - 0.5
y = np.array([y_min, y_min, y_max, y_max]) - 0.5
elif method == "ellipse":
w = (x_max - x_min) / 2
h = (y_max - y_min) / 2
data = np.arange(0, 360, dtype=np.float32)
x = w * np.sin(np.radians(data)) + (w + x_min - 0.5)
y = h * np.cos(np.radians(data)) + (h + y_min - 0.5)
else:
raise ValueError(f"Method {method} is not a valid rotation method.")
angle = np.deg2rad(angle)
x_t = (np.cos(angle) * x * scale + np.sin(angle) * y) / scale
y_t = -np.sin(angle) * x * scale + np.cos(angle) * y
x_t = x_t + 0.5
y_t = y_t + 0.5
x_min, x_max = min(x_t), max(x_t)
y_min, y_max = min(y_t), max(y_t)
return x_min, y_min, x_max, y_max
def bbox_transpose (bbox, rows=None, cols=None)
[view source on GitHub]¶
Transposes a bounding box along given axis.
Parameters:
Name | Type | Description |
---|---|---|
bbox | KeypointInternalType | A bounding box |
rows | int | None | Image rows. |
cols | int | None | Image cols. |
Returns:
Type | Description |
---|---|
KeypointInternalType | A bounding box tuple |
Exceptions:
Type | Description |
---|---|
ValueError | If axis not equal to 0 or 1. |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_transpose(
bbox: KeypointInternalType,
rows: int | None = None,
cols: int | None = None,
) -> KeypointInternalType:
"""Transposes a bounding box along given axis.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image rows.
cols: Image cols.
Returns:
A bounding box tuple `(x_min, y_min, x_max, y_max)`.
Raises:
ValueError: If axis not equal to 0 or 1.
"""
x_min, y_min, x_max, y_max = bbox[:4]
return (y_min, x_min, y_max, x_max)
def bbox_vflip (bbox, rows=None, cols=None)
[view source on GitHub]¶
Flip a bounding box vertically around the x-axis.
Parameters:
Name | Type | Description |
---|---|---|
bbox | BoxInternalType | A bounding box |
rows | int | None | Image rows. |
cols | int | None | Image cols. |
Returns:
Type | Description |
---|---|
tuple | A bounding box |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_vflip(bbox: BoxInternalType, rows: int | None = None, cols: int | None = None) -> BoxInternalType:
"""Flip a bounding box vertically around the x-axis.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image rows.
cols: Image cols.
Returns:
tuple: A bounding box `(x_min, y_min, x_max, y_max)`.
"""
x_min, y_min, x_max, y_max = bbox[:4]
return x_min, 1 - y_max, x_max, 1 - y_min
def d4 (img, group_member)
[view source on GitHub]¶
Applies a D_4
symmetry group transformation to an image array.
This function manipulates an image using transformations such as rotations and flips, corresponding to the D_4
dihedral group symmetry operations. Each transformation is identified by a unique group member code.
- img (np.ndarray): The input image array to transform.
- group_member (D4Type): A string identifier indicating the specific transformation to apply. Valid codes include:
- 'e': Identity (no transformation).
- 'r90': Rotate 90 degrees counterclockwise.
- 'r180': Rotate 180 degrees.
- 'r270': Rotate 270 degrees counterclockwise.
- 'v': Vertical flip.
- 'hvt': Transpose over second diagonal
- 'h': Horizontal flip.
- 't': Transpose (reflect over the main diagonal).
- np.ndarray: The transformed image array.
- ValueError: If an invalid group member is specified.
Examples:
- Rotating an image by 90 degrees:
transformed_image = d4(original_image, 'r90')
- Applying a horizontal flip to an image:
transformed_image = d4(original_image, 'h')
Source code in albumentations/augmentations/geometric/functional.py
def d4(img: np.ndarray, group_member: D4Type) -> np.ndarray:
"""Applies a `D_4` symmetry group transformation to an image array.
This function manipulates an image using transformations such as rotations and flips,
corresponding to the `D_4` dihedral group symmetry operations.
Each transformation is identified by a unique group member code.
Parameters:
- img (np.ndarray): The input image array to transform.
- group_member (D4Type): A string identifier indicating the specific transformation to apply. Valid codes include:
- 'e': Identity (no transformation).
- 'r90': Rotate 90 degrees counterclockwise.
- 'r180': Rotate 180 degrees.
- 'r270': Rotate 270 degrees counterclockwise.
- 'v': Vertical flip.
- 'hvt': Transpose over second diagonal
- 'h': Horizontal flip.
- 't': Transpose (reflect over the main diagonal).
Returns:
- np.ndarray: The transformed image array.
Raises:
- ValueError: If an invalid group member is specified.
Examples:
- Rotating an image by 90 degrees:
`transformed_image = d4(original_image, 'r90')`
- Applying a horizontal flip to an image:
`transformed_image = d4(original_image, 'h')`
"""
transformations = {
"e": lambda x: x, # Identity transformation
"r90": lambda x: rot90(x, 1), # Rotate 90 degrees
"r180": lambda x: rot90(x, 2), # Rotate 180 degrees
"r270": lambda x: rot90(x, 3), # Rotate 270 degrees
"v": vflip, # Vertical flip
"hvt": lambda x: transpose(rot90(x, 2)), # Reflect over anti-diagonal
"h": hflip, # Horizontal flip
"t": transpose, # Transpose (reflect over main diagonal)
}
# Execute the appropriate transformation
if group_member in transformations:
return np.ascontiguousarray(transformations[group_member](img))
raise ValueError(f"Invalid group member: {group_member}")
def elastic_transform (img, alpha, sigma, interpolation, border_mode, value=None, random_state=None, approximate=False, same_dxdy=False)
[view source on GitHub]¶
Apply an elastic transformation to an image.
Source code in albumentations/augmentations/geometric/functional.py
@preserve_channel_dim
def elastic_transform(
img: np.ndarray,
alpha: float,
sigma: float,
interpolation: int,
border_mode: int,
value: ColorType | None = None,
random_state: np.random.RandomState | None = None,
approximate: bool = False,
same_dxdy: bool = False,
) -> np.ndarray:
"""Apply an elastic transformation to an image."""
if approximate:
return elastic_transform_approximate(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
)
return elastic_transform_precise(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
)
def elastic_transform_approximate (img, alpha, sigma, interpolation, border_mode, value, random_state, same_dxdy=False)
[view source on GitHub]¶
Apply an approximate elastic transformation to an image.
Source code in albumentations/augmentations/geometric/functional.py
def elastic_transform_approximate(
img: np.ndarray,
alpha: float,
sigma: float,
interpolation: int,
border_mode: int,
value: ColorType | None,
random_state: np.random.RandomState | None,
same_dxdy: bool = False,
) -> np.ndarray:
"""Apply an approximate elastic transformation to an image."""
return elastic_transform_helper(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
kernel_size=(17, 17),
)
def elastic_transform_precise (img, alpha, sigma, interpolation, border_mode, value, random_state, same_dxdy=False)
[view source on GitHub]¶
Apply a precise elastic transformation to an image.
This function applies an elastic deformation to the input image using a precise method. The transformation involves creating random displacement fields, smoothing them using Gaussian blur with adaptive kernel size, and then remapping the image according to the smoothed displacement fields.
Parameters:
Name | Type | Description |
---|---|---|
img | np.ndarray | Input image. |
alpha | float | Scaling factor for the random displacement fields. |
sigma | float | Standard deviation for Gaussian blur applied to the displacement fields. |
interpolation | int | Interpolation method to be used (e.g., cv2.INTER_LINEAR). |
border_mode | int | Pixel extrapolation method (e.g., cv2.BORDER_CONSTANT). |
value | ColorType | None | Border value if border_mode is cv2.BORDER_CONSTANT. |
random_state | np.random.RandomState | None | Random state for reproducibility. |
same_dxdy | bool | If True, use the same displacement field for both x and y directions. |
Returns:
Type | Description |
---|---|
np.ndarray | Transformed image with precise elastic deformation applied. |
Source code in albumentations/augmentations/geometric/functional.py
def elastic_transform_precise(
img: np.ndarray,
alpha: float,
sigma: float,
interpolation: int,
border_mode: int,
value: ColorType | None,
random_state: np.random.RandomState | None,
same_dxdy: bool = False,
) -> np.ndarray:
"""Apply a precise elastic transformation to an image.
This function applies an elastic deformation to the input image using a precise method.
The transformation involves creating random displacement fields, smoothing them using Gaussian
blur with adaptive kernel size, and then remapping the image according to the smoothed displacement fields.
Args:
img (np.ndarray): Input image.
alpha (float): Scaling factor for the random displacement fields.
sigma (float): Standard deviation for Gaussian blur applied to the displacement fields.
interpolation (int): Interpolation method to be used (e.g., cv2.INTER_LINEAR).
border_mode (int): Pixel extrapolation method (e.g., cv2.BORDER_CONSTANT).
value (ColorType | None): Border value if border_mode is cv2.BORDER_CONSTANT.
random_state (np.random.RandomState | None): Random state for reproducibility.
same_dxdy (bool, optional): If True, use the same displacement field for both x and y directions.
Returns:
np.ndarray: Transformed image with precise elastic deformation applied.
"""
return elastic_transform_helper(
img,
alpha,
sigma,
interpolation,
border_mode,
value,
random_state,
same_dxdy,
kernel_size=(0, 0),
)
def find_keypoint (position, distance_map, threshold, inverted)
[view source on GitHub]¶
Determine if a valid keypoint can be found at the given position.
Source code in albumentations/augmentations/geometric/functional.py
def find_keypoint(
position: tuple[int, int],
distance_map: np.ndarray,
threshold: float | None,
inverted: bool,
) -> tuple[float, float] | None:
"""Determine if a valid keypoint can be found at the given position."""
y, x = position
value = distance_map[y, x]
if not inverted and threshold is not None and value >= threshold:
return None
if inverted and threshold is not None and value < threshold:
return None
return float(x), float(y)
def from_distance_maps (distance_maps, inverted, if_not_found_coords, threshold)
[view source on GitHub]¶
Convert outputs of to_distance_maps
to KeypointsOnImage
. This is the inverse of to_distance_maps
.
Source code in albumentations/augmentations/geometric/functional.py
def from_distance_maps(
distance_maps: np.ndarray,
inverted: bool,
if_not_found_coords: Sequence[int] | dict[str, Any] | None,
threshold: float | None,
) -> list[tuple[float, float]]:
"""Convert outputs of `to_distance_maps` to `KeypointsOnImage`.
This is the inverse of `to_distance_maps`.
"""
if distance_maps.ndim != NUM_MULTI_CHANNEL_DIMENSIONS:
msg = f"Expected three-dimensional input, got {distance_maps.ndim} dimensions and shape {distance_maps.shape}."
raise ValueError(msg)
height, width, nb_keypoints = distance_maps.shape
drop_if_not_found, if_not_found_x, if_not_found_y = validate_if_not_found_coords(if_not_found_coords)
keypoints = []
for i in range(nb_keypoints):
hitidx_flat = np.argmax(distance_maps[..., i]) if inverted else np.argmin(distance_maps[..., i])
hitidx_ndim = np.unravel_index(hitidx_flat, (height, width))
keypoint = find_keypoint(hitidx_ndim, distance_maps[:, :, i], threshold, inverted)
if keypoint:
keypoints.append(keypoint)
elif not drop_if_not_found:
keypoints.append((if_not_found_x, if_not_found_y))
return keypoints
def keypoint_d4 (keypoint, group_member, rows, cols, ** params)
[view source on GitHub]¶
Applies a D_4
symmetry group transformation to a keypoint.
This function adjusts a keypoint's coordinates according to the specified D_4
group transformation, which includes rotations and reflections suitable for image processing tasks. These transformations account for the dimensions of the image to ensure the keypoint remains within its boundaries.
- keypoint (KeypointInternalType): The keypoint to transform. T his should be a structure or tuple specifying coordinates like (x, y, [additional parameters]).
- group_member (D4Type): A string identifier for the
D_4
group transformation to apply. Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hv', 'h', 't'. - rows (int): The number of rows in the image.
- cols (int): The number of columns in the image.
- params (Any): Not used
- KeypointInternalType: The transformed keypoint.
- ValueError: If an invalid group member is specified, indicating that the specified transformation does not exist.
Examples:
- Rotating a keypoint by 90 degrees in a 100x100 image:
keypoint_d4((50, 30), 'r90', 100, 100)
This would move the keypoint from (50, 30) to (70, 50) assuming standard coordinate transformations.
Source code in albumentations/augmentations/geometric/functional.py
def keypoint_d4(
keypoint: KeypointInternalType,
group_member: D4Type,
rows: int,
cols: int,
**params: Any,
) -> KeypointInternalType:
"""Applies a `D_4` symmetry group transformation to a keypoint.
This function adjusts a keypoint's coordinates according to the specified `D_4` group transformation,
which includes rotations and reflections suitable for image processing tasks. These transformations account
for the dimensions of the image to ensure the keypoint remains within its boundaries.
Parameters:
- keypoint (KeypointInternalType): The keypoint to transform. T
his should be a structure or tuple specifying coordinates
like (x, y, [additional parameters]).
- group_member (D4Type): A string identifier for the `D_4` group transformation to apply.
Valid values are 'e', 'r90', 'r180', 'r270', 'v', 'hv', 'h', 't'.
- rows (int): The number of rows in the image.
- cols (int): The number of columns in the image.
- params (Any): Not used
Returns:
- KeypointInternalType: The transformed keypoint.
Raises:
- ValueError: If an invalid group member is specified, indicating that the specified transformation does not exist.
Examples:
- Rotating a keypoint by 90 degrees in a 100x100 image:
`keypoint_d4((50, 30), 'r90', 100, 100)`
This would move the keypoint from (50, 30) to (70, 50) assuming standard coordinate transformations.
"""
transformations = {
"e": lambda x: x, # Identity transformation
"r90": lambda x: keypoint_rot90(x, 1, rows, cols), # Rotate 90 degrees
"r180": lambda x: keypoint_rot90(x, 2, rows, cols), # Rotate 180 degrees
"r270": lambda x: keypoint_rot90(x, 3, rows, cols), # Rotate 270 degrees
"v": lambda x: keypoint_vflip(x, rows, cols), # Vertical flip
"hvt": lambda x: keypoint_transpose(keypoint_rot90(x, 2, rows, cols), rows, cols), # Reflect over anti diagonal
"h": lambda x: keypoint_hflip(x, rows, cols), # Horizontal flip
"t": lambda x: keypoint_transpose(x, rows, cols), # Transpose (reflect over main diagonal)
}
# Execute the appropriate transformation
if group_member in transformations:
return transformations[group_member](keypoint)
raise ValueError(f"Invalid group member: {group_member}")
def keypoint_flip (keypoint, d, rows, cols)
[view source on GitHub]¶
Flip a keypoint either vertically, horizontally or both depending on the value of d
.
Parameters:
Name | Type | Description |
---|---|---|
keypoint | KeypointInternalType | A keypoint |
d | int | Number of flip. Must be -1, 0 or 1: * 0 - vertical flip, * 1 - horizontal flip, * -1 - vertical and horizontal flip. |
rows | int | Image height. |
cols | int | Image width. |
Returns:
Type | Description |
---|---|
KeypointInternalType | A keypoint |
Exceptions:
Type | Description |
---|---|
ValueError | if value of |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_flip(keypoint: KeypointInternalType, d: int, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint either vertically, horizontally or both depending on the value of `d`.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
d: Number of flip. Must be -1, 0 or 1:
* 0 - vertical flip,
* 1 - horizontal flip,
* -1 - vertical and horizontal flip.
rows: Image height.
cols: Image width.
Returns:
A keypoint `(x, y, angle, scale)`.
Raises:
ValueError: if value of `d` is not -1, 0 or 1.
"""
if d == 0:
keypoint = keypoint_vflip(keypoint, rows, cols)
elif d == 1:
keypoint = keypoint_hflip(keypoint, rows, cols)
elif d == -1:
keypoint = keypoint_hflip(keypoint, rows, cols)
keypoint = keypoint_vflip(keypoint, rows, cols)
else:
raise ValueError(f"Invalid d value {d}. Valid values are -1, 0 and 1")
return keypoint
def keypoint_hflip (keypoint, rows, cols)
[view source on GitHub]¶
Flip a keypoint horizontally around the y-axis.
Parameters:
Name | Type | Description |
---|---|---|
keypoint | KeypointInternalType | A keypoint |
rows | int | Image height. |
cols | int | Image width. |
Returns:
Type | Description |
---|---|
KeypointInternalType | A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_hflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint horizontally around the y-axis.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
rows: Image height.
cols: Image width.
Returns:
A keypoint `(x, y, angle, scale)`.
"""
x, y, angle, scale = keypoint[:4]
angle = math.pi - angle
return (cols - 1) - x, y, angle, scale
def keypoint_rot90 (keypoint, factor, rows, cols, ** params)
[view source on GitHub]¶
Rotate a keypoint by 90 degrees counter-clockwise (CCW) a specified number of times.
Parameters:
Name | Type | Description |
---|---|---|
keypoint | KeypointInternalType | A keypoint in the format |
factor | int | The number of 90 degree CCW rotations to apply. Must be in the range [0, 3]. |
rows | int | The height of the image the keypoint belongs to. |
cols | int | The width of the image the keypoint belongs to. |
**params | Any | Additional parameters. |
Returns:
Type | Description |
---|---|
KeypointInternalType | The rotated keypoint in the format |
Exceptions:
Type | Description |
---|---|
ValueError | If the factor is not in the set {0, 1, 2, 3}. |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_rot90(
keypoint: KeypointInternalType,
factor: int,
rows: int,
cols: int,
**params: Any,
) -> KeypointInternalType:
"""Rotate a keypoint by 90 degrees counter-clockwise (CCW) a specified number of times.
Args:
keypoint (KeypointInternalType): A keypoint in the format `(x, y, angle, scale)`.
factor (int): The number of 90 degree CCW rotations to apply. Must be in the range [0, 3].
rows (int): The height of the image the keypoint belongs to.
cols (int): The width of the image the keypoint belongs to.
**params: Additional parameters.
Returns:
KeypointInternalType: The rotated keypoint in the format `(x, y, angle, scale)`.
Raises:
ValueError: If the factor is not in the set {0, 1, 2, 3}.
"""
x, y, angle, scale = keypoint
if factor not in {0, 1, 2, 3}:
raise ValueError("Parameter factor must be in set {0, 1, 2, 3}")
if factor == 1:
x, y, angle = y, (cols - 1) - x, angle - math.pi / 2
elif factor == ROT90_180_FACTOR:
x, y, angle = (cols - 1) - x, (rows - 1) - y, angle - math.pi
elif factor == ROT90_270_FACTOR:
x, y, angle = (rows - 1) - y, x, angle + math.pi / 2
return x, y, angle, scale
def keypoint_rotate (keypoint, angle, rows, cols, ** params)
[view source on GitHub]¶
Rotate a keypoint by a specified angle.
Parameters:
Name | Type | Description |
---|---|---|
keypoint | KeypointInternalType | A keypoint in the format |
angle | float | The angle by which to rotate the keypoint, in degrees. |
rows | int | The height of the image the keypoint belongs to. |
cols | int | The width of the image the keypoint belongs to. |
**params | Any | Additional parameters. |
Returns:
Type | Description |
---|---|
KeypointInternalType | The rotated keypoint in the format |
Note
The rotation is performed around the center of the image.
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_rotate(
keypoint: KeypointInternalType,
angle: float,
rows: int,
cols: int,
**params: Any,
) -> KeypointInternalType:
"""Rotate a keypoint by a specified angle.
Args:
keypoint (KeypointInternalType): A keypoint in the format `(x, y, angle, scale)`.
angle (float): The angle by which to rotate the keypoint, in degrees.
rows (int): The height of the image the keypoint belongs to.
cols (int): The width of the image the keypoint belongs to.
**params: Additional parameters.
Returns:
KeypointInternalType: The rotated keypoint in the format `(x, y, angle, scale)`.
Note:
The rotation is performed around the center of the image.
"""
image_center = center(cols, rows)
matrix = cv2.getRotationMatrix2D(image_center, angle, 1.0)
x, y, a, s = keypoint[:4]
x, y = cv2.transform(np.array([[[x, y]]]), matrix).squeeze()
return x, y, a + math.radians(angle), s
def keypoint_scale (keypoint, scale_x, scale_y)
[view source on GitHub]¶
Scales a keypoint by scale_x and scale_y.
Parameters:
Name | Type | Description |
---|---|---|
keypoint | KeypointInternalType | A keypoint |
scale_x | float | Scale coefficient x-axis. |
scale_y | float | Scale coefficient y-axis. |
Returns:
Type | Description |
---|---|
KeypointInternalType | A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
def keypoint_scale(keypoint: KeypointInternalType, scale_x: float, scale_y: float) -> KeypointInternalType:
"""Scales a keypoint by scale_x and scale_y.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
scale_x: Scale coefficient x-axis.
scale_y: Scale coefficient y-axis.
Returns:
A keypoint `(x, y, angle, scale)`.
"""
x, y, angle, scale = keypoint[:4]
return x * scale_x, y * scale_y, angle, scale * max(scale_x, scale_y)
def keypoint_transpose (keypoint, rows, cols)
[view source on GitHub]¶
Transposes a keypoint along a specified axis: main diagonal
Parameters:
Name | Type | Description |
---|---|---|
keypoint | KeypointInternalType | A keypoint |
rows | int | Total number of rows (height) in the image. |
cols | int | Total number of columns (width) in the image. |
Returns:
Type | Description |
---|---|
KeypointInternalType | A transformed keypoint |
Exceptions:
Type | Description |
---|---|
ValueError | If axis is not 0 or 1. |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_transpose(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Transposes a keypoint along a specified axis: main diagonal
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
rows: Total number of rows (height) in the image.
cols: Total number of columns (width) in the image.
Returns:
A transformed keypoint `(x, y, angle, scale)`.
Raises:
ValueError: If axis is not 0 or 1.
"""
x, y, angle, scale = keypoint[:4]
# Transpose over the main diagonal: swap x and y.
new_x, new_y = y, x
# Adjust angle to reflect the coordinate swap.
angle = np.pi / 2 - angle if angle <= np.pi else 3 * np.pi / 2 - angle
return new_x, new_y, angle, scale
def keypoint_vflip (keypoint, rows, cols)
[view source on GitHub]¶
Flip a keypoint vertically around the x-axis.
Parameters:
Name | Type | Description |
---|---|---|
keypoint | KeypointInternalType | A keypoint |
rows | int | Image height. |
cols | int | Image width. |
Returns:
Type | Description |
---|---|
tuple | A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_vflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint vertically around the x-axis.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
rows: Image height.
cols: Image width.
Returns:
tuple: A keypoint `(x, y, angle, scale)`.
"""
x, y, angle, scale = keypoint[:4]
angle = -angle
return x, (rows - 1) - y, angle, scale
def optical_distortion (img, k, dx, dy, interpolation, border_mode, value=None)
[view source on GitHub]¶
Barrel / pincushion distortion. Unconventional augment.
Reference
| https://stackoverflow.com/questions/6199636/formulas-for-barrel-pincushion-distortion | https://stackoverflow.com/questions/10364201/image-transformation-in-opencv | https://stackoverflow.com/questions/2477774/correcting-fisheye-distortion-programmatically | http://www.coldvision.io/2017/03/02/advanced-lane-finding-using-opencv/
Source code in albumentations/augmentations/geometric/functional.py
@preserve_channel_dim
def optical_distortion(
img: np.ndarray,
k: int,
dx: int,
dy: int,
interpolation: int,
border_mode: int,
value: ColorType | None = None,
) -> np.ndarray:
"""Barrel / pincushion distortion. Unconventional augment.
Reference:
| https://stackoverflow.com/questions/6199636/formulas-for-barrel-pincushion-distortion
| https://stackoverflow.com/questions/10364201/image-transformation-in-opencv
| https://stackoverflow.com/questions/2477774/correcting-fisheye-distortion-programmatically
| http://www.coldvision.io/2017/03/02/advanced-lane-finding-using-opencv/
"""
height, width = img.shape[:2]
fx = width
fy = height
cx = width * 0.5 + dx
cy = height * 0.5 + dy
camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
map1, map2 = cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)
return cv2.remap(img, map1, map2, interpolation=interpolation, borderMode=border_mode, borderValue=value)
def rotation2d_matrix_to_euler_angles (matrix, y_up)
[view source on GitHub]¶
matrix (np.ndarray): Rotation matrix y_up (bool): is Y axis looks up or down
Source code in albumentations/augmentations/geometric/functional.py
def to_distance_maps (keypoints, height, width, inverted=False)
[view source on GitHub]¶
Generate a (H,W,N)
array of distance maps for N
keypoints.
The n
-th distance map contains at every location (y, x)
the euclidean distance to the n
-th keypoint.
This function can be used as a helper when augmenting keypoints with a method that only supports the augmentation of images.
Parameters:
Name | Type | Description |
---|---|---|
keypoints | Sequence[tuple[float, float]] | keypoint coordinates |
height | int | image height |
width | int | image width |
inverted | bool | If |
Returns:
Type | Description |
---|---|
np.ndarray | (H, W, N) ndarray A |
Source code in albumentations/augmentations/geometric/functional.py
def to_distance_maps(
keypoints: Sequence[tuple[float, float]],
height: int,
width: int,
inverted: bool = False,
) -> np.ndarray:
"""Generate a ``(H,W,N)`` array of distance maps for ``N`` keypoints.
The ``n``-th distance map contains at every location ``(y, x)`` the
euclidean distance to the ``n``-th keypoint.
This function can be used as a helper when augmenting keypoints with a
method that only supports the augmentation of images.
Args:
keypoints: keypoint coordinates
height: image height
width: image width
inverted (bool): If ``True``, inverted distance maps are returned where each
distance value d is replaced by ``d/(d+1)``, i.e. the distance
maps have values in the range ``(0.0, 1.0]`` with ``1.0`` denoting
exactly the position of the respective keypoint.
Returns:
(H, W, N) ndarray
A ``float32`` array containing ``N`` distance maps for ``N``
keypoints. Each location ``(y, x, n)`` in the array denotes the
euclidean distance at ``(y, x)`` to the ``n``-th keypoint.
If `inverted` is ``True``, the distance ``d`` is replaced
by ``d/(d+1)``. The height and width of the array match the
height and width in ``KeypointsOnImage.shape``.
"""
distance_maps = np.zeros((height, width, len(keypoints)), dtype=np.float32)
yy = np.arange(0, height)
xx = np.arange(0, width)
grid_xx, grid_yy = np.meshgrid(xx, yy)
for i, (x, y) in enumerate(keypoints):
distance_maps[:, :, i] = (grid_xx - x) ** 2 + (grid_yy - y) ** 2
distance_maps = np.sqrt(distance_maps)
if inverted:
return 1 / (distance_maps + 1)
return distance_maps
def transpose (img)
[view source on GitHub]¶
Transposes the first two dimensions of an array of any dimensionality. Retains the order of any additional dimensions.
Parameters:
Name | Type | Description |
---|---|---|
img | np.ndarray | Input array. |
Returns:
Type | Description |
---|---|
np.ndarray | Transposed array. |
Source code in albumentations/augmentations/geometric/functional.py
def transpose(img: np.ndarray) -> np.ndarray:
"""Transposes the first two dimensions of an array of any dimensionality.
Retains the order of any additional dimensions.
Args:
img (np.ndarray): Input array.
Returns:
np.ndarray: Transposed array.
"""
# Generate the new axes order
new_axes = list(range(img.ndim))
new_axes[0], new_axes[1] = 1, 0 # Swap the first two dimensions
# Transpose the array using the new axes order
return img.transpose(new_axes)
def validate_if_not_found_coords (if_not_found_coords)
[view source on GitHub]¶
Validate and process if_not_found_coords
parameter.
Source code in albumentations/augmentations/geometric/functional.py
def validate_if_not_found_coords(
if_not_found_coords: Sequence[int] | dict[str, Any] | None,
) -> tuple[bool, int, int]:
"""Validate and process `if_not_found_coords` parameter."""
if if_not_found_coords is None:
return True, -1, -1
if isinstance(if_not_found_coords, (tuple, list)):
if len(if_not_found_coords) != TWO:
msg = "Expected tuple/list 'if_not_found_coords' to contain exactly two entries."
raise ValueError(msg)
return False, if_not_found_coords[0], if_not_found_coords[1]
if isinstance(if_not_found_coords, dict):
return False, if_not_found_coords["x"], if_not_found_coords["y"]
msg = "Expected if_not_found_coords to be None, tuple, list, or dict."
raise ValueError(msg)