Functional transforms (augmentations.functional)¶
def add_fog (img, fog_coef, alpha_coef, haze_list)
[view source on GitHub]¶
Add fog to the image.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | Image. |
fog_coef | float | Fog coefficient. |
alpha_coef | float | Alpha coefficient. |
haze_list | List[Tuple[int, int]] |
Returns:
Type | Description |
---|---|
ndarray | Image. |
Source code in albumentations/augmentations/functional.py
@preserve_channel_dim
def add_fog(img: np.ndarray, fog_coef: float, alpha_coef: float, haze_list: List[Tuple[int, int]]) -> np.ndarray:
"""Add fog to the image.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Args:
img: Image.
fog_coef: Fog coefficient.
alpha_coef: Alpha coefficient.
haze_list:
Returns:
Image.
"""
non_rgb_warning(img)
input_dtype = img.dtype
needs_float = False
if input_dtype == np.float32:
img = from_float(img, dtype=np.dtype("uint8"))
needs_float = True
elif input_dtype not in (np.uint8, np.float32):
raise ValueError(f"Unexpected dtype {input_dtype} for RandomFog augmentation")
width = img.shape[1]
hw = max(int(width // 3 * fog_coef), 10)
for haze_points in haze_list:
x, y = haze_points
overlay = img.copy()
output = img.copy()
alpha = alpha_coef * fog_coef
rad = hw // 2
point = (x + hw // 2, y + hw // 2)
cv2.circle(overlay, point, int(rad), (255, 255, 255), -1)
cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)
img = output.copy()
image_rgb = cv2.blur(img, (hw // 10, hw // 10))
if needs_float:
image_rgb = to_float(image_rgb, max_value=255)
return image_rgb
def add_gravel (img, gravels)
[view source on GitHub]¶
Add gravel to the image.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Parameters:
Name | Type | Description |
---|---|---|
img | numpy.ndarray | image to add gravel to |
gravels | list | list of gravel parameters. (float, float, float, float): (top-left x, top-left y, bottom-right x, bottom right y) |
Returns:
Type | Description |
---|---|
numpy.ndarray |
Source code in albumentations/augmentations/functional.py
@ensure_contiguous
@preserve_channel_dim
def add_gravel(img: np.ndarray, gravels: List[Any]) -> np.ndarray:
"""Add gravel to the image.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Args:
img (numpy.ndarray): image to add gravel to
gravels (list): list of gravel parameters. (float, float, float, float):
(top-left x, top-left y, bottom-right x, bottom right y)
Returns:
numpy.ndarray:
"""
non_rgb_warning(img)
input_dtype = img.dtype
needs_float = False
if input_dtype == np.float32:
img = from_float(img, dtype=np.dtype("uint8"))
needs_float = True
elif input_dtype not in (np.uint8, np.float32):
raise ValueError(f"Unexpected dtype {input_dtype} for AddGravel augmentation")
image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
for gravel in gravels:
y1, y2, x1, x2, sat = gravel
image_hls[x1:x2, y1:y2, 1] = sat
image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)
if needs_float:
image_rgb = to_float(image_rgb, max_value=255)
return image_rgb
def add_rain (img, slant, drop_length, drop_width, drop_color, blur_value, brightness_coefficient, rain_drops)
[view source on GitHub]¶
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | Image. |
slant | int | |
drop_length | int | |
drop_width | int | |
drop_color | Tuple[int, int, int] | |
blur_value | int | Rainy view are blurry. |
brightness_coefficient | float | Rainy days are usually shady. |
rain_drops | List[Tuple[int, int]] |
Returns:
Type | Description |
---|---|
ndarray | Image |
Source code in albumentations/augmentations/functional.py
@preserve_channel_dim
def add_rain(
img: np.ndarray,
slant: int,
drop_length: int,
drop_width: int,
drop_color: Tuple[int, int, int],
blur_value: int,
brightness_coefficient: float,
rain_drops: List[Tuple[int, int]],
) -> np.ndarray:
"""From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Args:
img: Image.
slant:
drop_length:
drop_width:
drop_color:
blur_value: Rainy view are blurry.
brightness_coefficient: Rainy days are usually shady.
rain_drops:
Returns:
Image
"""
non_rgb_warning(img)
input_dtype = img.dtype
needs_float = False
if input_dtype == np.float32:
img = from_float(img, dtype=np.dtype("uint8"))
needs_float = True
elif input_dtype not in (np.uint8, np.float32):
raise ValueError(f"Unexpected dtype {input_dtype} for RandomRain augmentation")
image = img.copy()
for rain_drop_x0, rain_drop_y0 in rain_drops:
rain_drop_x1 = rain_drop_x0 + slant
rain_drop_y1 = rain_drop_y0 + drop_length
cv2.line(
image,
(rain_drop_x0, rain_drop_y0),
(rain_drop_x1, rain_drop_y1),
drop_color,
drop_width,
)
image = cv2.blur(image, (blur_value, blur_value)) # rainy view are blurry
image_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype(np.float32)
image_hsv[:, :, 2] *= brightness_coefficient
image_rgb = cv2.cvtColor(image_hsv.astype(np.uint8), cv2.COLOR_HSV2RGB)
if needs_float:
return to_float(image_rgb, max_value=255)
return image_rgb
def add_shadow (img, vertices_list)
[view source on GitHub]¶
Add shadows to the image.
Parameters:
Name | Type | Description |
---|---|---|
img | numpy.ndarray | |
vertices_list | list[numpy.ndarray] |
Returns:
Type | Description |
---|---|
numpy.ndarray |
Source code in albumentations/augmentations/functional.py
@ensure_contiguous
@preserve_channel_dim
def add_shadow(img: np.ndarray, vertices_list: List[np.ndarray]) -> np.ndarray:
"""Add shadows to the image.
Args:
img (numpy.ndarray):
vertices_list (list[numpy.ndarray]):
Returns:
numpy.ndarray:
Reference:
https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
"""
non_rgb_warning(img)
input_dtype = img.dtype
needs_float = False
if input_dtype == np.float32:
img = from_float(img, dtype=np.dtype("uint8"))
needs_float = True
elif input_dtype not in (np.uint8, np.float32):
raise ValueError(f"Unexpected dtype {input_dtype} for RandomShadow augmentation")
image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
mask = np.zeros_like(img)
# adding all shadow polygons on empty mask, single 255 denotes only red channel
cv2.fillPoly(mask, vertices_list, 255)
# if red channel is hot, image's "Lightness" channel's brightness is lowered
red_max_value_ind = mask[:, :, 0] == MAX_VALUES_BY_DTYPE[np.dtype("uint8")]
image_hls[:, :, 1][red_max_value_ind] = image_hls[:, :, 1][red_max_value_ind] * 0.5
image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)
if needs_float:
return to_float(image_rgb, max_value=255)
return image_rgb
def add_snow (img, snow_point, brightness_coeff)
[view source on GitHub]¶
Bleaches out pixels, imitation snow.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | Image. |
snow_point | float | Number of show points. |
brightness_coeff | float | Brightness coefficient. |
Returns:
Type | Description |
---|---|
ndarray | Image. |
Source code in albumentations/augmentations/functional.py
@preserve_channel_dim
def add_snow(img: np.ndarray, snow_point: float, brightness_coeff: float) -> np.ndarray:
"""Bleaches out pixels, imitation snow.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Args:
img: Image.
snow_point: Number of show points.
brightness_coeff: Brightness coefficient.
Returns:
Image.
"""
non_rgb_warning(img)
input_dtype = img.dtype
needs_float = False
snow_point *= 127.5 # = 255 / 2
snow_point += 85 # = 255 / 3
if input_dtype == np.float32:
img = from_float(img, dtype=np.dtype("uint8"))
needs_float = True
elif input_dtype not in (np.uint8, np.float32):
raise ValueError(f"Unexpected dtype {input_dtype} for RandomSnow augmentation")
image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
image_hls = np.array(image_hls, dtype=np.float32)
image_hls[:, :, 1][image_hls[:, :, 1] < snow_point] *= brightness_coeff
image_hls[:, :, 1] = clip(image_hls[:, :, 1], np.uint8, 255)
image_hls = np.array(image_hls, dtype=np.uint8)
image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)
if needs_float:
image_rgb = to_float(image_rgb, max_value=255)
return image_rgb
def add_sun_flare (img, flare_center_x, flare_center_y, src_radius, src_color, circles)
[view source on GitHub]¶
Add sun flare.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Parameters:
Name | Type | Description |
---|---|---|
img | numpy.ndarray | |
flare_center_x | float | |
flare_center_y | float | |
src_radius | int | |
src_color | int, int, int | |
circles | list |
Returns:
Type | Description |
---|---|
numpy.ndarray |
Source code in albumentations/augmentations/functional.py
@preserve_channel_dim
def add_sun_flare(
img: np.ndarray,
flare_center_x: float,
flare_center_y: float,
src_radius: int,
src_color: ColorType,
circles: List[Any],
) -> np.ndarray:
"""Add sun flare.
From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
Args:
img (numpy.ndarray):
flare_center_x (float):
flare_center_y (float):
src_radius:
src_color (int, int, int):
circles (list):
Returns:
numpy.ndarray:
"""
non_rgb_warning(img)
input_dtype = img.dtype
needs_float = False
if input_dtype == np.float32:
img = from_float(img, dtype=np.dtype("uint8"))
needs_float = True
elif input_dtype not in (np.uint8, np.float32):
raise ValueError(f"Unexpected dtype {input_dtype} for RandomSunFlareaugmentation")
overlay = img.copy()
output = img.copy()
for alpha, (x, y), rad3, (r_color, g_color, b_color) in circles:
cv2.circle(overlay, (x, y), rad3, (r_color, g_color, b_color), -1)
cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)
point = (int(flare_center_x), int(flare_center_y))
overlay = output.copy()
num_times = src_radius // 10
alpha = np.linspace(0.0, 1, num=num_times)
rad = np.linspace(1, src_radius, num=num_times)
for i in range(num_times):
cv2.circle(overlay, point, int(rad[i]), src_color, -1)
alp = alpha[num_times - i - 1] * alpha[num_times - i - 1] * alpha[num_times - i - 1]
cv2.addWeighted(overlay, alp, output, 1 - alp, 0, output)
image_rgb = output
if needs_float:
image_rgb = to_float(image_rgb, max_value=255)
return image_rgb
def almost_equal_intervals (n, parts)
[view source on GitHub]¶
Generates an array of nearly equal integer intervals that sum up to n
.
This function divides the number n
into parts
nearly equal parts. It ensures that the sum of all parts equals n
, and the difference between any two parts is at most one. This is useful for distributing a total amount into nearly equal discrete parts.
Parameters:
Name | Type | Description |
---|---|---|
n | int | The total value to be split. |
parts | int | The number of parts to split into. |
Returns:
Type | Description |
---|---|
np.ndarray | An array of integers where each integer represents the size of a part. |
Examples:
>>> almost_equal_intervals(20, 3)
array([7, 7, 6]) # Splits 20 into three parts: 7, 7, and 6
>>> almost_equal_intervals(16, 4)
array([4, 4, 4, 4]) # Splits 16 into four equal parts
Source code in albumentations/augmentations/functional.py
def almost_equal_intervals(n: int, parts: int) -> np.ndarray:
"""Generates an array of nearly equal integer intervals that sum up to `n`.
This function divides the number `n` into `parts` nearly equal parts. It ensures that
the sum of all parts equals `n`, and the difference between any two parts is at most one.
This is useful for distributing a total amount into nearly equal discrete parts.
Args:
n (int): The total value to be split.
parts (int): The number of parts to split into.
Returns:
np.ndarray: An array of integers where each integer represents the size of a part.
Example:
>>> almost_equal_intervals(20, 3)
array([7, 7, 6]) # Splits 20 into three parts: 7, 7, and 6
>>> almost_equal_intervals(16, 4)
array([4, 4, 4, 4]) # Splits 16 into four equal parts
"""
part_size, remainder = divmod(n, parts)
# Create an array with the base part size and adjust the first `remainder` parts by adding 1
return np.array([part_size + 1 if i < remainder else part_size for i in range(parts)])
def bbox_from_mask (mask)
[view source on GitHub]¶
Create bounding box from binary mask (fast version)
Parameters:
Name | Type | Description |
---|---|---|
mask | numpy.ndarray | binary mask. |
Returns:
Type | Description |
---|---|
tuple | A bounding box tuple |
Source code in albumentations/augmentations/functional.py
def bbox_from_mask(mask: np.ndarray) -> Tuple[int, int, int, int]:
"""Create bounding box from binary mask (fast version)
Args:
mask (numpy.ndarray): binary mask.
Returns:
tuple: A bounding box tuple `(x_min, y_min, x_max, y_max)`.
"""
rows = np.any(mask, axis=1)
if not rows.any():
return -1, -1, -1, -1
cols = np.any(mask, axis=0)
y_min, y_max = np.where(rows)[0][[0, -1]]
x_min, x_max = np.where(cols)[0][[0, -1]]
return x_min, y_min, x_max + 1, y_max + 1
def create_shape_groups (tiles)
[view source on GitHub]¶
Groups tiles by their shape and stores the indices for each shape.
Source code in albumentations/augmentations/functional.py
def create_shape_groups(tiles: np.ndarray) -> Dict[Tuple[int, int], List[int]]:
"""Groups tiles by their shape and stores the indices for each shape."""
shape_groups = defaultdict(list)
for index, (start_y, start_x, end_y, end_x) in enumerate(tiles):
shape = (end_y - start_y, end_x - start_x)
shape_groups[shape].append(index)
return shape_groups
def fancy_pca (img, alpha=0.1)
[view source on GitHub]¶
Perform 'Fancy PCA' augmentation from: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | numpy array with (h, w, rgb) shape, as ints between 0-255 |
alpha | float | how much to perturb/scale the eigen vecs and vals the paper used std=0.1 |
Returns:
Type | Description |
---|---|
ndarray | numpy image-like array as uint8 range(0, 255) |
Source code in albumentations/augmentations/functional.py
def fancy_pca(img: np.ndarray, alpha: float = 0.1) -> np.ndarray:
"""Perform 'Fancy PCA' augmentation from:
http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
Args:
img: numpy array with (h, w, rgb) shape, as ints between 0-255
alpha: how much to perturb/scale the eigen vecs and vals
the paper used std=0.1
Returns:
numpy image-like array as uint8 range(0, 255)
"""
if not is_rgb_image(img) or img.dtype != np.uint8:
msg = "Image must be RGB image in uint8 format."
raise TypeError(msg)
orig_img = img.astype(float).copy()
img = img / 255.0 # rescale to 0 to 1 range
# flatten image to columns of RGB
img_rs = img.reshape(-1, 3)
# img_rs shape (640000, 3)
# center mean
img_centered = img_rs - np.mean(img_rs, axis=0)
# paper says 3x3 covariance matrix
img_cov = np.cov(img_centered, rowvar=False)
# eigen values and eigen vectors
eig_vals, eig_vecs = np.linalg.eigh(img_cov)
# sort values and vector
sort_perm = eig_vals[::-1].argsort()
eig_vals[::-1].sort()
eig_vecs = eig_vecs[:, sort_perm]
# > get [p1, p2, p3]
m1 = np.column_stack(eig_vecs)
# get 3x1 matrix of eigen values multiplied by random variable draw from normal
# distribution with mean of 0 and standard deviation of 0.1
m2 = np.zeros((3, 1))
# according to the paper alpha should only be draw once per augmentation (not once per channel)
# > alpha = np.random.normal(0, alpha_std)
# broad cast to speed things up
m2[:, 0] = alpha * eig_vals[:]
# this is the vector that we're going to add to each pixel in a moment
add_vect = np.array(m1) @ np.array(m2)
for idx in range(3): # RGB
orig_img[..., idx] += add_vect[idx] * 255
# for image processing it was found that working with float 0.0 to 1.0
# was easier than integers between 0-255
# > orig_img /= 255.0
orig_img = np.clip(orig_img, 0.0, 255.0)
# > orig_img *= 255
return orig_img.astype(np.uint8)
def generate_shuffled_splits (size, divisions, random_state=None)
[view source on GitHub]¶
Generate shuffled splits for a given dimension size and number of divisions.
Parameters:
Name | Type | Description |
---|---|---|
size | int | Total size of the dimension (height or width). |
divisions | int | Number of divisions (rows or columns). |
random_state | Optional[np.random.RandomState] | Seed for the random number generator for reproducibility. |
Returns:
Type | Description |
---|---|
np.ndarray | Cumulative edges of the shuffled intervals. |
Source code in albumentations/augmentations/functional.py
def generate_shuffled_splits(
size: int,
divisions: int,
random_state: Optional[np.random.RandomState] = None,
) -> np.ndarray:
"""Generate shuffled splits for a given dimension size and number of divisions.
Args:
size (int): Total size of the dimension (height or width).
divisions (int): Number of divisions (rows or columns).
random_state (Optional[np.random.RandomState]): Seed for the random number generator for reproducibility.
Returns:
np.ndarray: Cumulative edges of the shuffled intervals.
"""
intervals = almost_equal_intervals(size, divisions)
intervals = random_utils.shuffle(intervals, random_state=random_state)
return np.insert(np.cumsum(intervals), 0, 0)
def iso_noise (image, color_shift=0.05, intensity=0.5, random_state=None, ** kwargs)
[view source on GitHub]¶
Apply poisson noise to image to simulate camera sensor noise.
Parameters:
Name | Type | Description |
---|---|---|
image | numpy.ndarray | Input image, currently, only RGB, uint8 images are supported. |
color_shift | float | |
intensity | float | Multiplication factor for noise values. Values of ~0.5 are produce noticeable, yet acceptable level of noise. |
random_state | Optional[int] | |
**kwargs | Any |
Returns:
Type | Description |
---|---|
numpy.ndarray | Noised image |
Source code in albumentations/augmentations/functional.py
@clipped
def iso_noise(
image: np.ndarray,
color_shift: float = 0.05,
intensity: float = 0.5,
random_state: Optional[int] = None,
**kwargs: Any,
) -> np.ndarray:
"""Apply poisson noise to image to simulate camera sensor noise.
Args:
image (numpy.ndarray): Input image, currently, only RGB, uint8 images are supported.
color_shift (float):
intensity (float): Multiplication factor for noise values. Values of ~0.5 are produce noticeable,
yet acceptable level of noise.
random_state:
**kwargs:
Returns:
numpy.ndarray: Noised image
"""
if image.dtype != np.uint8:
msg = "Image must have uint8 channel type"
raise TypeError(msg)
if not is_rgb_image(image):
msg = "Image must be RGB"
raise TypeError(msg)
one_over_255 = float(1.0 / 255.0)
image = np.multiply(image, one_over_255, dtype=np.float32)
hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
_, stddev = cv2.meanStdDev(hls)
luminance_noise = random_utils.poisson(stddev[1] * intensity * 255, size=hls.shape[:2], random_state=random_state)
color_noise = random_utils.normal(0, color_shift * 360 * intensity, size=hls.shape[:2], random_state=random_state)
hue = hls[..., 0]
hue += color_noise
hue %= 360
luminance = hls[..., 1]
luminance += (luminance_noise / 255) * (1.0 - luminance)
image = cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) * 255
return image.astype(np.uint8)
def mask_from_bbox (img, bbox)
[view source on GitHub]¶
Create binary mask from bounding box
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | input image |
bbox | Tuple[int, int, int, int] | A bounding box tuple |
Returns:
Type | Description |
---|---|
mask | binary mask |
Source code in albumentations/augmentations/functional.py
def mask_from_bbox(img: np.ndarray, bbox: Tuple[int, int, int, int]) -> np.ndarray:
"""Create binary mask from bounding box
Args:
img: input image
bbox: A bounding box tuple `(x_min, y_min, x_max, y_max)`
Returns:
mask: binary mask
"""
mask = np.zeros(img.shape[:2], dtype=np.uint8)
x_min, y_min, x_max, y_max = bbox
mask[y_min:y_max, x_min:x_max] = 1
return mask
def move_tone_curve (img, low_y, high_y)
[view source on GitHub]¶
Rescales the relationship between bright and dark areas of the image by manipulating its tone curve.
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | RGB or grayscale image. |
low_y | float | y-position of a Bezier control point used to adjust the tone curve, must be in range [0, 1] |
high_y | float | y-position of a Bezier control point used to adjust image tone curve, must be in range [0, 1] |
Source code in albumentations/augmentations/functional.py
@preserve_channel_dim
def move_tone_curve(img: np.ndarray, low_y: float, high_y: float) -> np.ndarray:
"""Rescales the relationship between bright and dark areas of the image by manipulating its tone curve.
Args:
img: RGB or grayscale image.
low_y: y-position of a Bezier control point used
to adjust the tone curve, must be in range [0, 1]
high_y: y-position of a Bezier control point used
to adjust image tone curve, must be in range [0, 1]
"""
input_dtype = img.dtype
if not 0 <= low_y <= 1:
msg = "low_shift must be in range [0, 1]"
raise ValueError(msg)
if not 0 <= high_y <= 1:
msg = "high_shift must be in range [0, 1]"
raise ValueError(msg)
if input_dtype != np.uint8:
raise ValueError(f"Unsupported image type {input_dtype}")
t = np.linspace(0.0, 1.0, 256)
# Defines response of a four-point Bezier curve
def evaluate_bez(t: np.ndarray) -> np.ndarray:
return 3 * (1 - t) ** 2 * t * low_y + 3 * (1 - t) * t**2 * high_y + t**3
evaluate_bez = np.vectorize(evaluate_bez)
remapping = np.rint(evaluate_bez(t) * 255).astype(np.uint8)
lut_fn = _maybe_process_in_chunks(cv2.LUT, lut=remapping)
return lut_fn(img)
def multiply (img, multiplier)
[view source on GitHub]¶
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | Image. |
multiplier | ndarray | Multiplier coefficient. |
Returns:
Type | Description |
---|---|
ndarray | Image multiplied by |
Source code in albumentations/augmentations/functional.py
def multiply(img: np.ndarray, multiplier: np.ndarray) -> np.ndarray:
"""Args:
img: Image.
multiplier: Multiplier coefficient.
Returns:
Image multiplied by `multiplier` coefficient.
"""
if img.dtype == np.uint8:
if len(multiplier.shape) == 1:
return _multiply_uint8_optimized(img, multiplier)
return _multiply_uint8(img, multiplier)
return _multiply_non_uint8(img, multiplier)
def normalize_per_image (img, normalization)
[view source on GitHub]¶
Apply per-image normalization based on the specified strategy.
Parameters:
Name | Type | Description |
---|---|---|
img | np.ndarray | The image to be normalized, expected to be in HWC format. |
normalization | str | The normalization strategy to apply. Options include: "image", "image_per_channel", "min_max", "min_max_per_channel". |
Returns:
Type | Description |
---|---|
np.ndarray | The normalized image. |
Reference
https://github.com/ChristofHenkel/kaggle-landmark-2021-1st-place/blob/main/data/ch_ds_1.py
Source code in albumentations/augmentations/functional.py
@preserve_channel_dim
def normalize_per_image(
img: np.ndarray,
normalization: Literal["image", "image_per_channel", "min_max", "min_max_per_channel"],
) -> np.ndarray:
"""Apply per-image normalization based on the specified strategy.
Args:
img (np.ndarray): The image to be normalized, expected to be in HWC format.
normalization (str): The normalization strategy to apply. Options include:
"image", "image_per_channel", "min_max", "min_max_per_channel".
Returns:
np.ndarray: The normalized image.
Reference:
https://github.com/ChristofHenkel/kaggle-landmark-2021-1st-place/blob/main/data/ch_ds_1.py
"""
img = img.astype(np.float32)
if img.ndim == GRAYSCALE_SHAPE_LENGTH:
img = np.expand_dims(img, axis=-1) # Ensure the image is at least 3D
if normalization == "image":
# Normalize the whole image based on its global mean and std
mean = img.mean()
std = img.std() + 1e-4 # Adding a small epsilon to avoid division by zero
normalized_img = (img - mean) / std
normalized_img = normalized_img.clip(-20, 20) # Clipping outliers
elif normalization == "image_per_channel":
# Normalize the image per channel based on each channel's mean and std
pixel_mean = img.mean(axis=(0, 1))
pixel_std = img.std(axis=(0, 1)) + 1e-4
normalized_img = (img - pixel_mean[None, None, :]) / pixel_std[None, None, :]
normalized_img = normalized_img.clip(-20, 20)
elif normalization == "min_max":
# Apply min-max normalization to the whole image
img_min = img.min()
img_max = img.max()
normalized_img = (img - img_min) / (img_max - img_min)
elif normalization == "min_max_per_channel":
# Apply min-max normalization per channel
img_min = img.min(axis=(0, 1), keepdims=True)
img_max = img.max(axis=(0, 1), keepdims=True)
normalized_img = (img - img_min) / (img_max - img_min)
else:
raise ValueError(f"Unknown normalization method: {normalization}")
return normalized_img
def posterize (img, bits)
[view source on GitHub]¶
Reduce the number of bits for each color channel.
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | image to posterize. |
bits | int | number of high bits. Must be in range [0, 8] |
Returns:
Type | Description |
---|---|
ndarray | Image with reduced color channels. |
Source code in albumentations/augmentations/functional.py
@preserve_channel_dim
def posterize(img: np.ndarray, bits: int) -> np.ndarray:
"""Reduce the number of bits for each color channel.
Args:
img: image to posterize.
bits: number of high bits. Must be in range [0, 8]
Returns:
Image with reduced color channels.
"""
bits_array = np.uint8(bits)
if img.dtype != np.uint8:
msg = "Image must have uint8 channel type"
raise TypeError(msg)
if np.any((bits_array < 0) | (bits_array > EIGHT)):
msg = "bits must be in range [0, 8]"
raise ValueError(msg)
if not bits_array.shape or len(bits_array) == 1:
if bits_array == 0:
return np.zeros_like(img)
if bits_array == EIGHT:
return img.copy()
lut = np.arange(0, 256, dtype=np.uint8)
mask = ~np.uint8(2 ** (8 - bits_array) - 1)
lut &= mask
return cv2.LUT(img, lut)
if not is_rgb_image(img):
msg = "If bits is iterable image must be RGB"
raise TypeError(msg)
result_img = np.empty_like(img)
for i, channel_bits in enumerate(bits_array):
if channel_bits == 0:
result_img[..., i] = np.zeros_like(img[..., i])
elif channel_bits == EIGHT:
result_img[..., i] = img[..., i].copy()
else:
lut = np.arange(0, 256, dtype=np.uint8)
mask = ~np.uint8(2 ** (8 - channel_bits) - 1)
lut &= mask
result_img[..., i] = cv2.LUT(img[..., i], lut)
return result_img
def shuffle_tiles_within_shape_groups (shape_groups, random_state=None)
[view source on GitHub]¶
Shuffles indices within each group of similar shapes and creates a list where each index points to the index of the tile it should be mapped to.
Parameters:
Name | Type | Description |
---|---|---|
shape_groups | Dict[Tuple[int, int], List[int]] | Groups of tile indices categorized by shape. |
random_state | Optional[np.random.RandomState] | Seed for the random number generator for reproducibility. |
Returns:
Type | Description |
---|---|
List[int] | A list where each index is mapped to the new index of the tile after shuffling. |
Source code in albumentations/augmentations/functional.py
def shuffle_tiles_within_shape_groups(
shape_groups: Dict[Tuple[int, int], List[int]],
random_state: Optional[np.random.RandomState] = None,
) -> List[int]:
"""Shuffles indices within each group of similar shapes and creates a list where each
index points to the index of the tile it should be mapped to.
Args:
shape_groups (Dict[Tuple[int, int], List[int]]): Groups of tile indices categorized by shape.
random_state (Optional[np.random.RandomState]): Seed for the random number generator for reproducibility.
Returns:
List[int]: A list where each index is mapped to the new index of the tile after shuffling.
"""
# Initialize the output list with the same size as the total number of tiles, filled with -1
num_tiles = sum(len(indices) for indices in shape_groups.values())
mapping = [-1] * num_tiles
# Prepare the random number generator
for indices in shape_groups.values():
shuffled_indices = random_utils.shuffle(indices.copy(), random_state=random_state)
for old, new in zip(indices, shuffled_indices):
mapping[old] = new
return mapping
def solarize (img, threshold=128)
[view source on GitHub]¶
Invert all pixel values above a threshold.
Parameters:
Name | Type | Description |
---|---|---|
img | ndarray | The image to solarize. |
threshold | int | All pixels above this grayscale level are inverted. |
Returns:
Type | Description |
---|---|
ndarray | Solarized image. |
Source code in albumentations/augmentations/functional.py
def solarize(img: np.ndarray, threshold: int = 128) -> np.ndarray:
"""Invert all pixel values above a threshold.
Args:
img: The image to solarize.
threshold: All pixels above this grayscale level are inverted.
Returns:
Solarized image.
"""
dtype = img.dtype
max_val = MAX_VALUES_BY_DTYPE[dtype]
if dtype == np.dtype("uint8"):
lut = [(i if i < threshold else max_val - i) for i in range(int(max_val) + 1)]
prev_shape = img.shape
img = cv2.LUT(img, np.array(lut, dtype=dtype))
if len(prev_shape) != len(img.shape):
img = np.expand_dims(img, -1)
return img
result_img = img.copy()
cond = img >= threshold
result_img[cond] = max_val - result_img[cond]
return result_img
def split_uniform_grid (image_shape, grid, random_state=None)
[view source on GitHub]¶
Splits an image shape into a uniform grid specified by the grid dimensions.
Parameters:
Name | Type | Description |
---|---|---|
image_shape | Tuple[int, int] | The shape of the image as (height, width). |
grid | Tuple[int, int] | The grid size as (rows, columns). |
Returns:
Type | Description |
---|---|
np.ndarray | An array containing the tiles' coordinates in the format (start_y, start_x, end_y, end_x). |
Source code in albumentations/augmentations/functional.py
def split_uniform_grid(
image_shape: Tuple[int, int],
grid: Tuple[int, int],
random_state: Optional[np.random.RandomState] = None,
) -> np.ndarray:
"""Splits an image shape into a uniform grid specified by the grid dimensions.
Args:
image_shape (Tuple[int, int]): The shape of the image as (height, width).
grid (Tuple[int, int]): The grid size as (rows, columns).
Returns:
np.ndarray: An array containing the tiles' coordinates in the format (start_y, start_x, end_y, end_x).
"""
n_rows, n_cols = grid
height_splits = generate_shuffled_splits(image_shape[0], grid[0], random_state)
width_splits = generate_shuffled_splits(image_shape[1], grid[1], random_state)
# Calculate tiles coordinates
tiles = [
(height_splits[i], width_splits[j], height_splits[i + 1], width_splits[j + 1])
for i in range(n_rows)
for j in range(n_cols)
]
return np.array(tiles)
def swap_tiles_on_image (image, tiles, mapping=None)
[view source on GitHub]¶
Swap tiles on the image according to the new format.
Parameters:
Name | Type | Description |
---|---|---|
image | ndarray | Input image. |
tiles | ndarray | Array of tiles with each tile as [start_y, start_x, end_y, end_x]. |
mapping | Optional[List[int]] | List of new tile indices. |
Returns:
Type | Description |
---|---|
np.ndarray | Output image with tiles swapped according to the random shuffle. |
Source code in albumentations/augmentations/functional.py
def swap_tiles_on_image(image: np.ndarray, tiles: np.ndarray, mapping: Optional[List[int]] = None) -> np.ndarray:
"""Swap tiles on the image according to the new format.
Args:
image: Input image.
tiles: Array of tiles with each tile as [start_y, start_x, end_y, end_x].
mapping: List of new tile indices.
Returns:
np.ndarray: Output image with tiles swapped according to the random shuffle.
"""
# If no tiles are provided, return a copy of the original image
if tiles.size == 0 or mapping is None:
return image.copy()
# Create a copy of the image to retain original for reference
new_image = np.empty_like(image)
for num, new_index in enumerate(mapping):
start_y, start_x, end_y, end_x = tiles[new_index]
start_y_orig, start_x_orig, end_y_orig, end_x_orig = tiles[num]
# Assign the corresponding tile from the original image to the new image
new_image[start_y:end_y, start_x:end_x] = image[start_y_orig:end_y_orig, start_x_orig:end_x_orig]
return new_image