Geometric functional transforms (augmentations.geometric.functional)¶
def
bbox_flip (bbox, d, rows, cols)
[view source on GitHub]¶
Flip a bounding box either vertically, horizontally or both depending on the value of d
.
Parameters:
Name | Type | Description |
---|---|---|
bbox |
Tuple[float, float, float, float] |
A bounding box |
d |
int |
dimension. 0 for vertical flip, 1 for horizontal, -1 for transpose |
rows |
int |
Image rows. |
cols |
int |
Image cols. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A bounding box |
Exceptions:
Type | Description |
---|---|
ValueError |
if value of |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_flip(bbox: BoxInternalType, d: int, rows: int, cols: int) -> BoxInternalType:
"""Flip a bounding box either vertically, horizontally or both depending on the value of `d`.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
d: dimension. 0 for vertical flip, 1 for horizontal, -1 for transpose
rows: Image rows.
cols: Image cols.
Returns:
A bounding box `(x_min, y_min, x_max, y_max)`.
Raises:
ValueError: if value of `d` is not -1, 0 or 1.
"""
if d == 0:
bbox = bbox_vflip(bbox, rows, cols)
elif d == 1:
bbox = bbox_hflip(bbox, rows, cols)
elif d == -1:
bbox = bbox_hflip(bbox, rows, cols)
bbox = bbox_vflip(bbox, rows, cols)
else:
raise ValueError(f"Invalid d value {d}. Valid values are -1, 0 and 1")
return bbox
def
bbox_hflip (bbox, rows, cols)
[view source on GitHub]¶
Flip a bounding box horizontally around the y-axis.
Parameters:
Name | Type | Description |
---|---|---|
bbox |
Tuple[float, float, float, float] |
A bounding box |
rows |
int |
Image rows. |
cols |
int |
Image cols. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A bounding box |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_hflip(bbox: BoxInternalType, rows: int, cols: int) -> BoxInternalType:
"""Flip a bounding box horizontally around the y-axis.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image rows.
cols: Image cols.
Returns:
A bounding box `(x_min, y_min, x_max, y_max)`.
"""
x_min, y_min, x_max, y_max = bbox[:4]
return 1 - x_max, y_min, 1 - x_min, y_max
def
bbox_rot90 (bbox, factor, rows, cols)
[view source on GitHub]¶
Rotates a bounding box by 90 degrees CCW (see np.rot90)
Parameters:
Name | Type | Description |
---|---|---|
bbox |
Tuple[float, float, float, float] |
A bounding box tuple (x_min, y_min, x_max, y_max). |
factor |
int |
Number of CCW rotations. Must be in set {0, 1, 2, 3} See np.rot90. |
rows |
int |
Image rows. |
cols |
int |
Image cols. |
Returns:
Type | Description |
---|---|
tuple |
A bounding box tuple (x_min, y_min, x_max, y_max). |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_rot90(bbox: BoxInternalType, factor: int, rows: int, cols: int) -> BoxInternalType:
"""Rotates a bounding box by 90 degrees CCW (see np.rot90)
Args:
bbox: A bounding box tuple (x_min, y_min, x_max, y_max).
factor: Number of CCW rotations. Must be in set {0, 1, 2, 3} See np.rot90.
rows: Image rows.
cols: Image cols.
Returns:
tuple: A bounding box tuple (x_min, y_min, x_max, y_max).
"""
if factor not in {0, 1, 2, 3}:
msg = "Parameter n must be in set {0, 1, 2, 3}"
raise ValueError(msg)
x_min, y_min, x_max, y_max = bbox[:4]
if factor == 1:
bbox = y_min, 1 - x_max, y_max, 1 - x_min
elif factor == TWO:
bbox = 1 - x_max, 1 - y_max, 1 - x_min, 1 - y_min
elif factor == THREE:
bbox = 1 - y_max, x_min, 1 - y_min, x_max
return bbox
def
bbox_rotate (bbox, angle, method, rows, cols)
[view source on GitHub]¶
Rotates a bounding box by angle degrees.
Parameters:
Name | Type | Description |
---|---|---|
bbox |
Tuple[float, float, float, float] |
A bounding box |
angle |
float |
Angle of rotation in degrees. |
method |
str |
Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box". |
rows |
int |
Image rows. |
cols |
int |
Image cols. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A bounding box |
References
Source code in albumentations/augmentations/geometric/functional.py
def bbox_rotate(bbox: BoxInternalType, angle: float, method: str, rows: int, cols: int) -> BoxInternalType:
"""Rotates a bounding box by angle degrees.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
angle: Angle of rotation in degrees.
method: Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box".
rows: Image rows.
cols: Image cols.
Returns:
A bounding box `(x_min, y_min, x_max, y_max)`.
References:
https://arxiv.org/abs/2109.13488
"""
x_min, y_min, x_max, y_max = bbox[:4]
scale = cols / float(rows)
if method == "largest_box":
x = np.array([x_min, x_max, x_max, x_min]) - 0.5
y = np.array([y_min, y_min, y_max, y_max]) - 0.5
elif method == "ellipse":
w = (x_max - x_min) / 2
h = (y_max - y_min) / 2
data = np.arange(0, 360, dtype=np.float32)
x = w * np.sin(np.radians(data)) + (w + x_min - 0.5)
y = h * np.cos(np.radians(data)) + (h + y_min - 0.5)
else:
raise ValueError(f"Method {method} is not a valid rotation method.")
angle = np.deg2rad(angle)
x_t = (np.cos(angle) * x * scale + np.sin(angle) * y) / scale
y_t = -np.sin(angle) * x * scale + np.cos(angle) * y
x_t = x_t + 0.5
y_t = y_t + 0.5
x_min, x_max = min(x_t), max(x_t)
y_min, y_max = min(y_t), max(y_t)
return x_min, y_min, x_max, y_max
def
bbox_shift_scale_rotate (bbox, angle, scale, dx, dy, rotate_method, rows, cols, **
kwargs)
[view source on GitHub]¶
Rotates, shifts and scales a bounding box. Rotation is made by angle degrees, scaling is made by scale factor and shifting is made by dx and dy.
Parameters:
Name | Type | Description |
---|---|---|
bbox |
tuple |
A bounding box |
angle |
int |
Angle of rotation in degrees. |
scale |
int |
Scale factor. |
dx |
int |
Shift along x-axis in pixel units. |
dy |
int |
Shift along y-axis in pixel units. |
rotate_method(str) |
Rotation method used. Should be one of: "largest_box", "ellipse". Default: "largest_box". |
|
rows |
int |
Image rows. |
cols |
int |
Image cols. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A bounding box |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_shift_scale_rotate(
bbox: BoxInternalType,
angle: float,
scale: float,
dx: int,
dy: int,
rotate_method: str,
rows: int,
cols: int,
**kwargs: Any,
) -> BoxInternalType:
"""Rotates, shifts and scales a bounding box. Rotation is made by angle degrees,
scaling is made by scale factor and shifting is made by dx and dy.
Args:
bbox (tuple): A bounding box `(x_min, y_min, x_max, y_max)`.
angle (int): Angle of rotation in degrees.
scale (int): Scale factor.
dx (int): Shift along x-axis in pixel units.
dy (int): Shift along y-axis in pixel units.
rotate_method(str): Rotation method used. Should be one of: "largest_box", "ellipse".
Default: "largest_box".
rows (int): Image rows.
cols (int): Image cols.
Returns:
A bounding box `(x_min, y_min, x_max, y_max)`.
"""
height, width = rows, cols
center = (width / 2, height / 2)
if rotate_method == "ellipse":
x_min, y_min, x_max, y_max = bbox_rotate(bbox, angle, rotate_method, rows, cols)
matrix = cv2.getRotationMatrix2D(center, 0, scale)
else:
x_min, y_min, x_max, y_max = bbox[:4]
matrix = cv2.getRotationMatrix2D(center, angle, scale)
matrix[0, 2] += dx * width
matrix[1, 2] += dy * height
x = np.array([x_min, x_max, x_max, x_min])
y = np.array([y_min, y_min, y_max, y_max])
ones = np.ones(shape=(len(x)))
points_ones = np.vstack([x, y, ones]).transpose()
points_ones[:, 0] *= width
points_ones[:, 1] *= height
tr_points = matrix.dot(points_ones.T).T
tr_points[:, 0] /= width
tr_points[:, 1] /= height
x_min, x_max = min(tr_points[:, 0]), max(tr_points[:, 0])
y_min, y_max = min(tr_points[:, 1]), max(tr_points[:, 1])
return x_min, y_min, x_max, y_max
def
bbox_transpose (bbox, axis, rows, cols)
[view source on GitHub]¶
Transposes a bounding box along given axis.
Parameters:
Name | Type | Description |
---|---|---|
bbox |
Tuple[float, float, float, float] |
A bounding box |
axis |
int |
0 - main axis, 1 - secondary axis. |
rows |
int |
Image rows. |
cols |
int |
Image cols. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A bounding box tuple |
Exceptions:
Type | Description |
---|---|
ValueError |
If axis not equal to 0 or 1. |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_transpose(bbox: KeypointInternalType, axis: int, rows: int, cols: int) -> KeypointInternalType:
"""Transposes a bounding box along given axis.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
axis: 0 - main axis, 1 - secondary axis.
rows: Image rows.
cols: Image cols.
Returns:
A bounding box tuple `(x_min, y_min, x_max, y_max)`.
Raises:
ValueError: If axis not equal to 0 or 1.
"""
x_min, y_min, x_max, y_max = bbox[:4]
if axis not in {0, 1}:
msg = "Axis must be either 0 or 1."
raise ValueError(msg)
if axis == 0:
bbox = (y_min, x_min, y_max, x_max)
if axis == 1:
bbox = (1 - y_max, 1 - x_max, 1 - y_min, 1 - x_min)
return bbox
def
bbox_vflip (bbox, rows, cols)
[view source on GitHub]¶
Flip a bounding box vertically around the x-axis.
Parameters:
Name | Type | Description |
---|---|---|
bbox |
Tuple[float, float, float, float] |
A bounding box |
rows |
int |
Image rows. |
cols |
int |
Image cols. |
Returns:
Type | Description |
---|---|
tuple |
A bounding box |
Source code in albumentations/augmentations/geometric/functional.py
def bbox_vflip(bbox: BoxInternalType, rows: int, cols: int) -> BoxInternalType:
"""Flip a bounding box vertically around the x-axis.
Args:
bbox: A bounding box `(x_min, y_min, x_max, y_max)`.
rows: Image rows.
cols: Image cols.
Returns:
tuple: A bounding box `(x_min, y_min, x_max, y_max)`.
"""
x_min, y_min, x_max, y_max = bbox[:4]
return x_min, 1 - y_max, x_max, 1 - y_min
def
elastic_transform (img, alpha, sigma, alpha_affine, interpolation=1, border_mode=4, value=None, random_state=None, approximate=False, same_dxdy=False)
[view source on GitHub]¶
Elastic deformation of images as described in [Simard2003]_ (with modifications). Based on https://gist.github.com/ernestum/601cdf56d2b424757de5
.. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for Convolutional Neural Networks applied to Visual Document Analysis", in Proc. of the International Conference on Document Analysis and Recognition, 2003.
Source code in albumentations/augmentations/geometric/functional.py
@preserve_shape
def elastic_transform(
img: np.ndarray,
alpha: float,
sigma: float,
alpha_affine: float,
interpolation: int = cv2.INTER_LINEAR,
border_mode: int = cv2.BORDER_REFLECT_101,
value: Optional[ImageColorType] = None,
random_state: Optional[np.random.RandomState] = None,
approximate: bool = False,
same_dxdy: bool = False,
) -> np.ndarray:
"""Elastic deformation of images as described in [Simard2003]_ (with modifications).
Based on https://gist.github.com/ernestum/601cdf56d2b424757de5
.. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
Convolutional Neural Networks applied to Visual Document Analysis", in
Proc. of the International Conference on Document Analysis and
Recognition, 2003.
"""
height, width = img.shape[:2]
# Random affine
center_square = np.array((height, width), dtype=np.float32) // 2
square_size = min((height, width)) // 3
alpha = float(alpha)
sigma = float(sigma)
alpha_affine = float(alpha_affine)
pts1 = np.array(
[
center_square + square_size,
[center_square[0] + square_size, center_square[1] - square_size],
center_square - square_size,
],
dtype=np.float32,
)
pts2 = pts1 + random_utils.uniform(-alpha_affine, alpha_affine, size=pts1.shape, random_state=random_state).astype(
np.float32
)
matrix = cv2.getAffineTransform(pts1, pts2)
warp_fn = _maybe_process_in_chunks(
cv2.warpAffine, M=matrix, dsize=(width, height), flags=interpolation, borderMode=border_mode, borderValue=value
)
img = warp_fn(img)
if approximate:
# Approximate computation smooth displacement map with a large enough kernel.
# On large images (512+) this is approximately 2X times faster
dx = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
cv2.GaussianBlur(dx, (17, 17), sigma, dst=dx)
dx *= alpha
if same_dxdy:
# Speed up even more
dy = dx
else:
dy = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
cv2.GaussianBlur(dy, (17, 17), sigma, dst=dy)
dy *= alpha
else:
dx = np.float32(
gaussian_filter((random_utils.rand(height, width, random_state=random_state) * 2 - 1), sigma) * alpha
)
if same_dxdy:
# Speed up
dy = dx
else:
dy = np.float32(
gaussian_filter((random_utils.rand(height, width, random_state=random_state) * 2 - 1), sigma) * alpha
)
x, y = np.meshgrid(np.arange(width), np.arange(height))
map_x = np.float32(x + dx)
map_y = np.float32(y + dy)
remap_fn = _maybe_process_in_chunks(
cv2.remap, map1=map_x, map2=map_y, interpolation=interpolation, borderMode=border_mode, borderValue=value
)
return remap_fn(img)
def
elastic_transform_approx (img, alpha, sigma, alpha_affine, interpolation=1, border_mode=4, value=None, random_state=None)
[view source on GitHub]¶
Elastic deformation of images as described in [Simard2003]_ (with modifications for speed). Based on https://gist.github.com/ernestum/601cdf56d2b424757de5
.. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for Convolutional Neural Networks applied to Visual Document Analysis", in Proc. of the International Conference on Document Analysis and Recognition, 2003.
Source code in albumentations/augmentations/geometric/functional.py
@preserve_shape
def elastic_transform_approx(
img: np.ndarray,
alpha: float,
sigma: float,
alpha_affine: float,
interpolation: int = cv2.INTER_LINEAR,
border_mode: int = cv2.BORDER_REFLECT_101,
value: Optional[ImageColorType] = None,
random_state: Optional[np.random.RandomState] = None,
) -> np.ndarray:
"""Elastic deformation of images as described in [Simard2003]_ (with modifications for speed).
Based on https://gist.github.com/ernestum/601cdf56d2b424757de5
.. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
Convolutional Neural Networks applied to Visual Document Analysis", in
Proc. of the International Conference on Document Analysis and
Recognition, 2003.
"""
height, width = img.shape[:2]
# Random affine
center_square = np.array((height, width), dtype=np.float32) // 2
square_size = min((height, width)) // 3
alpha = float(alpha)
sigma = float(sigma)
alpha_affine = float(alpha_affine)
pts1 = np.array(
[
center_square + square_size,
[center_square[0] + square_size, center_square[1] - square_size],
center_square - square_size,
],
dtype=np.float32,
)
pts2 = pts1 + random_utils.uniform(-alpha_affine, alpha_affine, size=pts1.shape, random_state=random_state).astype(
np.float32
)
matrix = cv2.getAffineTransform(pts1, pts2)
warp_fn = _maybe_process_in_chunks(
cv2.warpAffine,
M=matrix,
dsize=(width, height),
flags=interpolation,
borderMode=border_mode,
borderValue=value,
)
img = warp_fn(img)
dx = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
cv2.GaussianBlur(dx, (17, 17), sigma, dst=dx)
dx *= alpha
dy = random_utils.rand(height, width, random_state=random_state).astype(np.float32) * 2 - 1
cv2.GaussianBlur(dy, (17, 17), sigma, dst=dy)
dy *= alpha
x, y = np.meshgrid(np.arange(width), np.arange(height))
map_x = np.float32(x + dx)
map_y = np.float32(y + dy)
remap_fn = _maybe_process_in_chunks(
cv2.remap,
map1=map_x,
map2=map_y,
interpolation=interpolation,
borderMode=border_mode,
borderValue=value,
)
return remap_fn(img)
def
find_keypoint (position, distance_map, threshold, inverted)
[view source on GitHub]¶
Determine if a valid keypoint can be found at the given position.
Source code in albumentations/augmentations/geometric/functional.py
def find_keypoint(
position: Tuple[int, int], distance_map: np.ndarray, threshold: Optional[float], inverted: bool
) -> Optional[Tuple[float, float]]:
"""Determine if a valid keypoint can be found at the given position."""
y, x = position
value = distance_map[y, x]
if not inverted and threshold is not None and value >= threshold:
return None
if inverted and threshold is not None and value < threshold:
return None
return float(x), float(y)
def
from_distance_maps (distance_maps, inverted, if_not_found_coords, threshold=None)
[view source on GitHub]¶
Convert outputs of to_distance_maps
to KeypointsOnImage
.
This is the inverse of to_distance_maps
.
Source code in albumentations/augmentations/geometric/functional.py
def from_distance_maps(
distance_maps: np.ndarray,
inverted: bool,
if_not_found_coords: Optional[Union[Sequence[int], Dict[str, Any]]],
threshold: Optional[float] = None,
) -> List[Tuple[float, float]]:
"""Convert outputs of `to_distance_maps` to `KeypointsOnImage`.
This is the inverse of `to_distance_maps`.
"""
if distance_maps.ndim != THREE:
msg = f"Expected three-dimensional input, got {distance_maps.ndim} dimensions and shape {distance_maps.shape}."
raise ValueError(msg)
height, width, nb_keypoints = distance_maps.shape
drop_if_not_found, if_not_found_x, if_not_found_y = validate_if_not_found_coords(if_not_found_coords)
keypoints = []
for i in range(nb_keypoints):
hitidx_flat = np.argmax(distance_maps[..., i]) if inverted else np.argmin(distance_maps[..., i])
hitidx_ndim = np.unravel_index(hitidx_flat, (height, width))
keypoint = find_keypoint(hitidx_ndim, distance_maps[:, :, i], threshold, inverted)
if keypoint:
keypoints.append(keypoint)
elif not drop_if_not_found:
keypoints.append((if_not_found_x, if_not_found_y))
return keypoints
def
grid_distortion (img, num_steps=10, xsteps=(), ysteps=(), interpolation=1, border_mode=4, value=None)
[view source on GitHub]¶
Perform a grid distortion of an input image.
Source code in albumentations/augmentations/geometric/functional.py
@preserve_shape
def grid_distortion(
img: np.ndarray,
num_steps: int = 10,
xsteps: Tuple[()] = (),
ysteps: Tuple[()] = (),
interpolation: int = cv2.INTER_LINEAR,
border_mode: int = cv2.BORDER_REFLECT_101,
value: Optional[ImageColorType] = None,
) -> np.ndarray:
"""Perform a grid distortion of an input image.
Reference:
http://pythology.blogspot.sg/2014/03/interpolation-on-regular-distorted-grid.html
"""
height, width = img.shape[:2]
x_step = width // num_steps
xx = np.zeros(width, np.float32)
prev = 0
for idx in range(num_steps + 1):
x = idx * x_step
start = int(x)
end = int(x) + x_step
if end > width:
end = width
cur = width
else:
cur = prev + x_step * xsteps[idx]
xx[start:end] = np.linspace(prev, cur, end - start)
prev = cur
y_step = height // num_steps
yy = np.zeros(height, np.float32)
prev = 0
for idx in range(num_steps + 1):
y = idx * y_step
start = int(y)
end = int(y) + y_step
if end > height:
end = height
cur = height
else:
cur = prev + y_step * ysteps[idx]
yy[start:end] = np.linspace(prev, cur, end - start)
prev = cur
map_x, map_y = np.meshgrid(xx, yy)
map_x = map_x.astype(np.float32)
map_y = map_y.astype(np.float32)
remap_fn = _maybe_process_in_chunks(
cv2.remap,
map1=map_x,
map2=map_y,
interpolation=interpolation,
borderMode=border_mode,
borderValue=value,
)
return remap_fn(img)
def
keypoint_flip (keypoint, d, rows, cols)
[view source on GitHub]¶
Flip a keypoint either vertically, horizontally or both depending on the value of d
.
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
Tuple[float, float, float, float] |
A keypoint |
d |
int |
Number of flip. Must be -1, 0 or 1: * 0 - vertical flip, * 1 - horizontal flip, * -1 - vertical and horizontal flip. |
rows |
int |
Image height. |
cols |
int |
Image width. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A keypoint |
Exceptions:
Type | Description |
---|---|
ValueError |
if value of |
Source code in albumentations/augmentations/geometric/functional.py
def keypoint_flip(keypoint: KeypointInternalType, d: int, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint either vertically, horizontally or both depending on the value of `d`.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
d: Number of flip. Must be -1, 0 or 1:
* 0 - vertical flip,
* 1 - horizontal flip,
* -1 - vertical and horizontal flip.
rows: Image height.
cols: Image width.
Returns:
A keypoint `(x, y, angle, scale)`.
Raises:
ValueError: if value of `d` is not -1, 0 or 1.
"""
if d == 0:
keypoint = keypoint_vflip(keypoint, rows, cols)
elif d == 1:
keypoint = keypoint_hflip(keypoint, rows, cols)
elif d == -1:
keypoint = keypoint_hflip(keypoint, rows, cols)
keypoint = keypoint_vflip(keypoint, rows, cols)
else:
raise ValueError(f"Invalid d value {d}. Valid values are -1, 0 and 1")
return keypoint
def
keypoint_hflip (keypoint, rows, cols)
[view source on GitHub]¶
Flip a keypoint horizontally around the y-axis.
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
Tuple[float, float, float, float] |
A keypoint |
rows |
int |
Image height. |
cols |
int |
Image width. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_hflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint horizontally around the y-axis.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
rows: Image height.
cols: Image width.
Returns:
A keypoint `(x, y, angle, scale)`.
"""
x, y, angle, scale = keypoint[:4]
angle = math.pi - angle
return (cols - 1) - x, y, angle, scale
def
keypoint_rot90 (keypoint, factor, rows, cols, **
params)
[view source on GitHub]¶
Rotates a keypoint by 90 degrees CCW (see np.rot90)
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
Tuple[float, float, float, float] |
A keypoint |
factor |
int |
Number of CCW rotations. Must be in range [0;3] See np.rot90. |
rows |
int |
Image height. |
cols |
int |
Image width. |
Returns:
Type | Description |
---|---|
tuple |
A keypoint |
Exceptions:
Type | Description |
---|---|
ValueError |
if factor not in set {0, 1, 2, 3} |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_rot90(
keypoint: KeypointInternalType, factor: int, rows: int, cols: int, **params: Any
) -> KeypointInternalType:
"""Rotates a keypoint by 90 degrees CCW (see np.rot90)
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
factor: Number of CCW rotations. Must be in range [0;3] See np.rot90.
rows: Image height.
cols: Image width.
Returns:
tuple: A keypoint `(x, y, angle, scale)`.
Raises:
ValueError: if factor not in set {0, 1, 2, 3}
"""
x, y, angle, scale = keypoint[:4]
if factor not in {0, 1, 2, 3}:
msg = "Parameter n must be in set {0, 1, 2, 3}"
raise ValueError(msg)
if factor == 1:
x, y, angle = y, (cols - 1) - x, angle - math.pi / 2
elif factor == TWO:
x, y, angle = (cols - 1) - x, (rows - 1) - y, angle - math.pi
elif factor == THREE:
x, y, angle = (rows - 1) - y, x, angle + math.pi / 2
return x, y, angle, scale
def
keypoint_rotate (keypoint, angle, rows, cols, **
params)
[view source on GitHub]¶
Rotate a keypoint by angle.
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
Tuple[float, float, float, float] |
A keypoint |
angle |
float |
Rotation angle. |
rows |
int |
Image height. |
cols |
int |
Image width. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_rotate(
keypoint: KeypointInternalType, angle: float, rows: int, cols: int, **params: Any
) -> KeypointInternalType:
"""Rotate a keypoint by angle.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
angle: Rotation angle.
rows: Image height.
cols: Image width.
Returns:
A keypoint `(x, y, angle, scale)`.
"""
center = (cols - 1) * 0.5, (rows - 1) * 0.5
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
x, y, a, s = keypoint[:4]
x, y = cv2.transform(np.array([[[x, y]]]), matrix).squeeze()
return x, y, a + math.radians(angle), s
def
keypoint_scale (keypoint, scale_x, scale_y)
[view source on GitHub]¶
Scales a keypoint by scale_x and scale_y.
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
Tuple[float, float, float, float] |
A keypoint |
scale_x |
float |
Scale coefficient x-axis. |
scale_y |
float |
Scale coefficient y-axis. |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
def keypoint_scale(keypoint: KeypointInternalType, scale_x: float, scale_y: float) -> KeypointInternalType:
"""Scales a keypoint by scale_x and scale_y.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
scale_x: Scale coefficient x-axis.
scale_y: Scale coefficient y-axis.
Returns:
A keypoint `(x, y, angle, scale)`.
"""
x, y, angle, scale = keypoint[:4]
return x * scale_x, y * scale_y, angle, scale * max(scale_x, scale_y)
def
keypoint_transpose (keypoint)
[view source on GitHub]¶
Rotate a keypoint by angle.
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
Tuple[float, float, float, float] |
A keypoint |
Returns:
Type | Description |
---|---|
Tuple[float, float, float, float] |
A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
def keypoint_transpose(keypoint: KeypointInternalType) -> KeypointInternalType:
"""Rotate a keypoint by angle.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
Returns:
A keypoint `(x, y, angle, scale)`.
"""
x, y, angle, scale = keypoint[:4]
angle = np.pi - angle if angle <= np.pi else 3 * np.pi - angle
return y, x, angle, scale
def
keypoint_vflip (keypoint, rows, cols)
[view source on GitHub]¶
Flip a keypoint vertically around the x-axis.
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
Tuple[float, float, float, float] |
A keypoint |
rows |
int |
Image height. |
cols |
int |
Image width. |
Returns:
Type | Description |
---|---|
tuple |
A keypoint |
Source code in albumentations/augmentations/geometric/functional.py
@angle_2pi_range
def keypoint_vflip(keypoint: KeypointInternalType, rows: int, cols: int) -> KeypointInternalType:
"""Flip a keypoint vertically around the x-axis.
Args:
keypoint: A keypoint `(x, y, angle, scale)`.
rows: Image height.
cols: Image width.
Returns:
tuple: A keypoint `(x, y, angle, scale)`.
"""
x, y, angle, scale = keypoint[:4]
angle = -angle
return x, (rows - 1) - y, angle, scale
def
optical_distortion (img, k=0, dx=0, dy=0, interpolation=1, border_mode=4, value=None)
[view source on GitHub]¶
Barrel / pincushion distortion. Unconventional augment.
Reference
| https://stackoverflow.com/questions/6199636/formulas-for-barrel-pincushion-distortion | https://stackoverflow.com/questions/10364201/image-transformation-in-opencv | https://stackoverflow.com/questions/2477774/correcting-fisheye-distortion-programmatically | http://www.coldvision.io/2017/03/02/advanced-lane-finding-using-opencv/
Source code in albumentations/augmentations/geometric/functional.py
@preserve_shape
def optical_distortion(
img: np.ndarray,
k: int = 0,
dx: int = 0,
dy: int = 0,
interpolation: int = cv2.INTER_LINEAR,
border_mode: int = cv2.BORDER_REFLECT_101,
value: Optional[ImageColorType] = None,
) -> np.ndarray:
"""Barrel / pincushion distortion. Unconventional augment.
Reference:
| https://stackoverflow.com/questions/6199636/formulas-for-barrel-pincushion-distortion
| https://stackoverflow.com/questions/10364201/image-transformation-in-opencv
| https://stackoverflow.com/questions/2477774/correcting-fisheye-distortion-programmatically
| http://www.coldvision.io/2017/03/02/advanced-lane-finding-using-opencv/
"""
height, width = img.shape[:2]
fx = width
fy = height
cx = width * 0.5 + dx
cy = height * 0.5 + dy
camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
distortion = np.array([k, k, 0, 0, 0], dtype=np.float32)
map1, map2 = cv2.initUndistortRectifyMap(camera_matrix, distortion, None, None, (width, height), cv2.CV_32FC1)
return cv2.remap(img, map1, map2, interpolation=interpolation, borderMode=border_mode, borderValue=value)
def
rotation2d_matrix_to_euler_angles (matrix, y_up=False)
[view source on GitHub]¶
matrix (np.ndarray): Rotation matrix y_up (bool): is Y axis looks up or down
Source code in albumentations/augmentations/geometric/functional.py
def
to_distance_maps (keypoints, height, width, inverted=False)
[view source on GitHub]¶
Generate a (H,W,N)
array of distance maps for N
keypoints.
The n
-th distance map contains at every location (y, x)
the
euclidean distance to the n
-th keypoint.
This function can be used as a helper when augmenting keypoints with a method that only supports the augmentation of images.
Parameters:
Name | Type | Description |
---|---|---|
keypoint |
keypoint coordinates |
|
height |
int |
image height |
width |
int |
image width |
inverted |
bool |
If |
Returns:
Type | Description |
---|---|
ndarray |
(H, W, N) ndarray
A |
Source code in albumentations/augmentations/geometric/functional.py
def to_distance_maps(
keypoints: Sequence[Tuple[float, float]], height: int, width: int, inverted: bool = False
) -> np.ndarray:
"""Generate a ``(H,W,N)`` array of distance maps for ``N`` keypoints.
The ``n``-th distance map contains at every location ``(y, x)`` the
euclidean distance to the ``n``-th keypoint.
This function can be used as a helper when augmenting keypoints with a
method that only supports the augmentation of images.
Args:
keypoint: keypoint coordinates
height: image height
width: image width
inverted (bool): If ``True``, inverted distance maps are returned where each
distance value d is replaced by ``d/(d+1)``, i.e. the distance
maps have values in the range ``(0.0, 1.0]`` with ``1.0`` denoting
exactly the position of the respective keypoint.
Returns:
(H, W, N) ndarray
A ``float32`` array containing ``N`` distance maps for ``N``
keypoints. Each location ``(y, x, n)`` in the array denotes the
euclidean distance at ``(y, x)`` to the ``n``-th keypoint.
If `inverted` is ``True``, the distance ``d`` is replaced
by ``d/(d+1)``. The height and width of the array match the
height and width in ``KeypointsOnImage.shape``.
"""
distance_maps = np.zeros((height, width, len(keypoints)), dtype=np.float32)
yy = np.arange(0, height)
xx = np.arange(0, width)
grid_xx, grid_yy = np.meshgrid(xx, yy)
for i, (x, y) in enumerate(keypoints):
distance_maps[:, :, i] = (grid_xx - x) ** 2 + (grid_yy - y) ** 2
distance_maps = np.sqrt(distance_maps)
if inverted:
return 1 / (distance_maps + 1)
return distance_maps
def
validate_if_not_found_coords (if_not_found_coords)
[view source on GitHub]¶
Validate and process if_not_found_coords
parameter.
Source code in albumentations/augmentations/geometric/functional.py
def validate_if_not_found_coords(
if_not_found_coords: Optional[Union[Sequence[int], Dict[str, Any]]],
) -> Tuple[bool, int, int]:
"""Validate and process `if_not_found_coords` parameter."""
if if_not_found_coords is None:
return True, -1, -1
if isinstance(if_not_found_coords, (tuple, list)):
if len(if_not_found_coords) != TWO:
msg = "Expected tuple/list 'if_not_found_coords' to contain exactly two entries."
raise ValueError(msg)
return False, if_not_found_coords[0], if_not_found_coords[1]
if isinstance(if_not_found_coords, dict):
return False, if_not_found_coords["x"], if_not_found_coords["y"]
msg = "Expected if_not_found_coords to be None, tuple, list, or dict."
raise ValueError(msg)