Skip to content

Functional transforms (augmentations.functional)

def add_fog (img, fog_coef, alpha_coef, haze_list) [view source on GitHub]

Add fog to the image.

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
img ndarray

Image.

fog_coef float

Fog coefficient.

alpha_coef float

Alpha coefficient.

haze_list List[Tuple[int, int]]

Returns:

Type Description
ndarray

Image.

Source code in albumentations/augmentations/functional.py
Python
@preserve_channel_dim
def add_fog(img: np.ndarray, fog_coef: float, alpha_coef: float, haze_list: List[Tuple[int, int]]) -> np.ndarray:
    """Add fog to the image.

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        img: Image.
        fog_coef: Fog coefficient.
        alpha_coef: Alpha coefficient.
        haze_list:

    Returns:
        Image.

    """
    non_rgb_warning(img)

    input_dtype = img.dtype
    needs_float = False

    if input_dtype == np.float32:
        img = from_float(img, dtype=np.dtype("uint8"))
        needs_float = True
    elif input_dtype not in (np.uint8, np.float32):
        raise ValueError(f"Unexpected dtype {input_dtype} for RandomFog augmentation")

    width = img.shape[1]

    hw = max(int(width // 3 * fog_coef), 10)

    for haze_points in haze_list:
        x, y = haze_points
        overlay = img.copy()
        output = img.copy()
        alpha = alpha_coef * fog_coef
        rad = hw // 2
        point = (x + hw // 2, y + hw // 2)
        cv2.circle(overlay, point, int(rad), (255, 255, 255), -1)
        cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)

        img = output.copy()

    image_rgb = cv2.blur(img, (hw // 10, hw // 10))

    if needs_float:
        image_rgb = to_float(image_rgb, max_value=255)

    return image_rgb

def add_gravel (img, gravels) [view source on GitHub]

Add gravel to the image.

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
img numpy.ndarray

image to add gravel to

gravels list

list of gravel parameters. (float, float, float, float): (top-left x, top-left y, bottom-right x, bottom right y)

Returns:

Type Description
numpy.ndarray
Source code in albumentations/augmentations/functional.py
Python
@ensure_contiguous
@preserve_channel_dim
def add_gravel(img: np.ndarray, gravels: List[Any]) -> np.ndarray:
    """Add gravel to the image.

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        img (numpy.ndarray): image to add gravel to
        gravels (list): list of gravel parameters. (float, float, float, float):
            (top-left x, top-left y, bottom-right x, bottom right y)

    Returns:
        numpy.ndarray:

    """
    non_rgb_warning(img)
    input_dtype = img.dtype
    needs_float = False

    if input_dtype == np.float32:
        img = from_float(img, dtype=np.dtype("uint8"))
        needs_float = True
    elif input_dtype not in (np.uint8, np.float32):
        raise ValueError(f"Unexpected dtype {input_dtype} for AddGravel augmentation")

    image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)

    for gravel in gravels:
        y1, y2, x1, x2, sat = gravel
        image_hls[x1:x2, y1:y2, 1] = sat

    image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)

    if needs_float:
        image_rgb = to_float(image_rgb, max_value=255)

    return image_rgb

def add_rain (img, slant, drop_length, drop_width, drop_color, blur_value, brightness_coefficient, rain_drops) [view source on GitHub]

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
img ndarray

Image.

slant int
drop_length int
drop_width int
drop_color Tuple[int, int, int]
blur_value int

Rainy view are blurry.

brightness_coefficient float

Rainy days are usually shady.

rain_drops List[Tuple[int, int]]

Returns:

Type Description
ndarray

Image

Source code in albumentations/augmentations/functional.py
Python
@preserve_channel_dim
def add_rain(
    img: np.ndarray,
    slant: int,
    drop_length: int,
    drop_width: int,
    drop_color: Tuple[int, int, int],
    blur_value: int,
    brightness_coefficient: float,
    rain_drops: List[Tuple[int, int]],
) -> np.ndarray:
    """From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        img: Image.
        slant:
        drop_length:
        drop_width:
        drop_color:
        blur_value: Rainy view are blurry.
        brightness_coefficient: Rainy days are usually shady.
        rain_drops:

    Returns:
        Image

    """
    non_rgb_warning(img)

    input_dtype = img.dtype
    needs_float = False

    if input_dtype == np.float32:
        img = from_float(img, dtype=np.dtype("uint8"))
        needs_float = True
    elif input_dtype not in (np.uint8, np.float32):
        raise ValueError(f"Unexpected dtype {input_dtype} for RandomRain augmentation")

    image = img.copy()

    for rain_drop_x0, rain_drop_y0 in rain_drops:
        rain_drop_x1 = rain_drop_x0 + slant
        rain_drop_y1 = rain_drop_y0 + drop_length

        cv2.line(
            image,
            (rain_drop_x0, rain_drop_y0),
            (rain_drop_x1, rain_drop_y1),
            drop_color,
            drop_width,
        )

    image = cv2.blur(image, (blur_value, blur_value))  # rainy view are blurry
    image_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype(np.float32)
    image_hsv[:, :, 2] *= brightness_coefficient

    image_rgb = cv2.cvtColor(image_hsv.astype(np.uint8), cv2.COLOR_HSV2RGB)

    if needs_float:
        return to_float(image_rgb, max_value=255)

    return image_rgb

def add_shadow (img, vertices_list) [view source on GitHub]

Add shadows to the image.

Parameters:

Name Type Description
img numpy.ndarray
vertices_list list[numpy.ndarray]

Returns:

Type Description
numpy.ndarray
Source code in albumentations/augmentations/functional.py
Python
@ensure_contiguous
@preserve_channel_dim
def add_shadow(img: np.ndarray, vertices_list: List[np.ndarray]) -> np.ndarray:
    """Add shadows to the image.

    Args:
        img (numpy.ndarray):
        vertices_list (list[numpy.ndarray]):

    Returns:
        numpy.ndarray:

    Reference:
        https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
    """
    non_rgb_warning(img)
    input_dtype = img.dtype
    needs_float = False

    if input_dtype == np.float32:
        img = from_float(img, dtype=np.dtype("uint8"))
        needs_float = True
    elif input_dtype not in (np.uint8, np.float32):
        raise ValueError(f"Unexpected dtype {input_dtype} for RandomShadow augmentation")

    image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
    mask = np.zeros_like(img)

    # adding all shadow polygons on empty mask, single 255 denotes only red channel
    cv2.fillPoly(mask, vertices_list, 255)

    # if red channel is hot, image's "Lightness" channel's brightness is lowered
    red_max_value_ind = mask[:, :, 0] == MAX_VALUES_BY_DTYPE[np.dtype("uint8")]
    image_hls[:, :, 1][red_max_value_ind] = image_hls[:, :, 1][red_max_value_ind] * 0.5

    image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)

    if needs_float:
        return to_float(image_rgb, max_value=255)

    return image_rgb

def add_snow (img, snow_point, brightness_coeff) [view source on GitHub]

Bleaches out pixels, imitation snow.

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
img ndarray

Image.

snow_point float

Number of show points.

brightness_coeff float

Brightness coefficient.

Returns:

Type Description
ndarray

Image.

Source code in albumentations/augmentations/functional.py
Python
@preserve_channel_dim
def add_snow(img: np.ndarray, snow_point: float, brightness_coeff: float) -> np.ndarray:
    """Bleaches out pixels, imitation snow.

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        img: Image.
        snow_point: Number of show points.
        brightness_coeff: Brightness coefficient.

    Returns:
        Image.

    """
    non_rgb_warning(img)

    input_dtype = img.dtype
    needs_float = False

    snow_point *= 127.5  # = 255 / 2
    snow_point += 85  # = 255 / 3

    if input_dtype == np.float32:
        img = from_float(img, dtype=np.dtype("uint8"))
        needs_float = True
    elif input_dtype not in (np.uint8, np.float32):
        raise ValueError(f"Unexpected dtype {input_dtype} for RandomSnow augmentation")

    image_hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
    image_hls = np.array(image_hls, dtype=np.float32)

    image_hls[:, :, 1][image_hls[:, :, 1] < snow_point] *= brightness_coeff

    image_hls[:, :, 1] = clip(image_hls[:, :, 1], np.uint8, 255)

    image_hls = np.array(image_hls, dtype=np.uint8)

    image_rgb = cv2.cvtColor(image_hls, cv2.COLOR_HLS2RGB)

    if needs_float:
        image_rgb = to_float(image_rgb, max_value=255)

    return image_rgb

def add_sun_flare (img, flare_center_x, flare_center_y, src_radius, src_color, circles) [view source on GitHub]

Add sun flare.

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
img numpy.ndarray
flare_center_x float
flare_center_y float
src_radius int
src_color int, int, int
circles list

Returns:

Type Description
numpy.ndarray
Source code in albumentations/augmentations/functional.py
Python
@preserve_channel_dim
def add_sun_flare(
    img: np.ndarray,
    flare_center_x: float,
    flare_center_y: float,
    src_radius: int,
    src_color: ColorType,
    circles: List[Any],
) -> np.ndarray:
    """Add sun flare.

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        img (numpy.ndarray):
        flare_center_x (float):
        flare_center_y (float):
        src_radius:
        src_color (int, int, int):
        circles (list):

    Returns:
        numpy.ndarray:

    """
    non_rgb_warning(img)

    input_dtype = img.dtype
    needs_float = False

    if input_dtype == np.float32:
        img = from_float(img, dtype=np.dtype("uint8"))
        needs_float = True
    elif input_dtype not in (np.uint8, np.float32):
        raise ValueError(f"Unexpected dtype {input_dtype} for RandomSunFlareaugmentation")

    overlay = img.copy()
    output = img.copy()

    for alpha, (x, y), rad3, (r_color, g_color, b_color) in circles:
        cv2.circle(overlay, (x, y), rad3, (r_color, g_color, b_color), -1)

        cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)

    point = (int(flare_center_x), int(flare_center_y))

    overlay = output.copy()
    num_times = src_radius // 10
    alpha = np.linspace(0.0, 1, num=num_times)
    rad = np.linspace(1, src_radius, num=num_times)
    for i in range(num_times):
        cv2.circle(overlay, point, int(rad[i]), src_color, -1)
        alp = alpha[num_times - i - 1] * alpha[num_times - i - 1] * alpha[num_times - i - 1]
        cv2.addWeighted(overlay, alp, output, 1 - alp, 0, output)

    image_rgb = output

    if needs_float:
        image_rgb = to_float(image_rgb, max_value=255)

    return image_rgb

def almost_equal_intervals (n, parts) [view source on GitHub]

Generates an array of nearly equal integer intervals that sum up to n.

This function divides the number n into parts nearly equal parts. It ensures that the sum of all parts equals n, and the difference between any two parts is at most one. This is useful for distributing a total amount into nearly equal discrete parts.

Parameters:

Name Type Description
n int

The total value to be split.

parts int

The number of parts to split into.

Returns:

Type Description
np.ndarray

An array of integers where each integer represents the size of a part.

Examples:

Python
>>> almost_equal_intervals(20, 3)
array([7, 7, 6])  # Splits 20 into three parts: 7, 7, and 6
>>> almost_equal_intervals(16, 4)
array([4, 4, 4, 4])  # Splits 16 into four equal parts
Source code in albumentations/augmentations/functional.py
Python
def almost_equal_intervals(n: int, parts: int) -> np.ndarray:
    """Generates an array of nearly equal integer intervals that sum up to `n`.

    This function divides the number `n` into `parts` nearly equal parts. It ensures that
    the sum of all parts equals `n`, and the difference between any two parts is at most one.
    This is useful for distributing a total amount into nearly equal discrete parts.

    Args:
        n (int): The total value to be split.
        parts (int): The number of parts to split into.

    Returns:
        np.ndarray: An array of integers where each integer represents the size of a part.

    Example:
        >>> almost_equal_intervals(20, 3)
        array([7, 7, 6])  # Splits 20 into three parts: 7, 7, and 6
        >>> almost_equal_intervals(16, 4)
        array([4, 4, 4, 4])  # Splits 16 into four equal parts
    """
    part_size, remainder = divmod(n, parts)
    # Create an array with the base part size and adjust the first `remainder` parts by adding 1
    return np.array([part_size + 1 if i < remainder else part_size for i in range(parts)])

def bbox_from_mask (mask) [view source on GitHub]

Create bounding box from binary mask (fast version)

Parameters:

Name Type Description
mask numpy.ndarray

binary mask.

Returns:

Type Description
tuple

A bounding box tuple (x_min, y_min, x_max, y_max).

Source code in albumentations/augmentations/functional.py
Python
def bbox_from_mask(mask: np.ndarray) -> Tuple[int, int, int, int]:
    """Create bounding box from binary mask (fast version)

    Args:
        mask (numpy.ndarray): binary mask.

    Returns:
        tuple: A bounding box tuple `(x_min, y_min, x_max, y_max)`.

    """
    rows = np.any(mask, axis=1)
    if not rows.any():
        return -1, -1, -1, -1
    cols = np.any(mask, axis=0)
    y_min, y_max = np.where(rows)[0][[0, -1]]
    x_min, x_max = np.where(cols)[0][[0, -1]]
    return x_min, y_min, x_max + 1, y_max + 1

def create_shape_groups (tiles) [view source on GitHub]

Groups tiles by their shape and stores the indices for each shape.

Source code in albumentations/augmentations/functional.py
Python
def create_shape_groups(tiles: np.ndarray) -> Dict[Tuple[int, int], List[int]]:
    """Groups tiles by their shape and stores the indices for each shape."""
    shape_groups = defaultdict(list)
    for index, (start_y, start_x, end_y, end_x) in enumerate(tiles):
        shape = (end_y - start_y, end_x - start_x)
        shape_groups[shape].append(index)
    return shape_groups

def fancy_pca (img, alpha=0.1) [view source on GitHub]

Perform 'Fancy PCA' augmentation from: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf

Parameters:

Name Type Description
img ndarray

numpy array with (h, w, rgb) shape, as ints between 0-255

alpha float

how much to perturb/scale the eigen vecs and vals the paper used std=0.1

Returns:

Type Description
ndarray

numpy image-like array as uint8 range(0, 255)

Source code in albumentations/augmentations/functional.py
Python
def fancy_pca(img: np.ndarray, alpha: float = 0.1) -> np.ndarray:
    """Perform 'Fancy PCA' augmentation from:
    http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf

    Args:
        img: numpy array with (h, w, rgb) shape, as ints between 0-255
        alpha: how much to perturb/scale the eigen vecs and vals
                the paper used std=0.1

    Returns:
        numpy image-like array as uint8 range(0, 255)

    """
    if not is_rgb_image(img) or img.dtype != np.uint8:
        msg = "Image must be RGB image in uint8 format."
        raise TypeError(msg)

    orig_img = img.astype(float).copy()

    img = img / 255.0  # rescale to 0 to 1 range

    # flatten image to columns of RGB
    img_rs = img.reshape(-1, 3)
    # img_rs shape (640000, 3)

    # center mean
    img_centered = img_rs - np.mean(img_rs, axis=0)

    # paper says 3x3 covariance matrix
    img_cov = np.cov(img_centered, rowvar=False)

    # eigen values and eigen vectors
    eig_vals, eig_vecs = np.linalg.eigh(img_cov)

    # sort values and vector
    sort_perm = eig_vals[::-1].argsort()
    eig_vals[::-1].sort()
    eig_vecs = eig_vecs[:, sort_perm]

    # > get [p1, p2, p3]
    m1 = np.column_stack(eig_vecs)

    # get 3x1 matrix of eigen values multiplied by random variable draw from normal
    # distribution with mean of 0 and standard deviation of 0.1
    m2 = np.zeros((3, 1))
    # according to the paper alpha should only be draw once per augmentation (not once per channel)
    # > alpha = np.random.normal(0, alpha_std)

    # broad cast to speed things up
    m2[:, 0] = alpha * eig_vals[:]

    # this is the vector that we're going to add to each pixel in a moment
    add_vect = np.array(m1) @ np.array(m2)

    for idx in range(3):  # RGB
        orig_img[..., idx] += add_vect[idx] * 255

    # for image processing it was found that working with float 0.0 to 1.0
    # was easier than integers between 0-255
    # > orig_img /= 255.0
    orig_img = np.clip(orig_img, 0.0, 255.0)

    # > orig_img *= 255
    return orig_img.astype(np.uint8)

def generate_shuffled_splits (size, divisions, random_state=None) [view source on GitHub]

Generate shuffled splits for a given dimension size and number of divisions.

Parameters:

Name Type Description
size int

Total size of the dimension (height or width).

divisions int

Number of divisions (rows or columns).

random_state Optional[np.random.RandomState]

Seed for the random number generator for reproducibility.

Returns:

Type Description
np.ndarray

Cumulative edges of the shuffled intervals.

Source code in albumentations/augmentations/functional.py
Python
def generate_shuffled_splits(
    size: int,
    divisions: int,
    random_state: Optional[np.random.RandomState] = None,
) -> np.ndarray:
    """Generate shuffled splits for a given dimension size and number of divisions.

    Args:
        size (int): Total size of the dimension (height or width).
        divisions (int): Number of divisions (rows or columns).
        random_state (Optional[np.random.RandomState]): Seed for the random number generator for reproducibility.

    Returns:
        np.ndarray: Cumulative edges of the shuffled intervals.
    """
    intervals = almost_equal_intervals(size, divisions)
    intervals = random_utils.shuffle(intervals, random_state=random_state)
    return np.insert(np.cumsum(intervals), 0, 0)

def iso_noise (image, color_shift=0.05, intensity=0.5, random_state=None, ** kwargs) [view source on GitHub]

Apply poisson noise to image to simulate camera sensor noise.

Parameters:

Name Type Description
image numpy.ndarray

Input image, currently, only RGB, uint8 images are supported.

color_shift float
intensity float

Multiplication factor for noise values. Values of ~0.5 are produce noticeable, yet acceptable level of noise.

random_state Optional[int]
**kwargs Any

Returns:

Type Description
numpy.ndarray

Noised image

Source code in albumentations/augmentations/functional.py
Python
@clipped
def iso_noise(
    image: np.ndarray,
    color_shift: float = 0.05,
    intensity: float = 0.5,
    random_state: Optional[int] = None,
    **kwargs: Any,
) -> np.ndarray:
    """Apply poisson noise to image to simulate camera sensor noise.

    Args:
        image (numpy.ndarray): Input image, currently, only RGB, uint8 images are supported.
        color_shift (float):
        intensity (float): Multiplication factor for noise values. Values of ~0.5 are produce noticeable,
                   yet acceptable level of noise.
        random_state:
        **kwargs:

    Returns:
        numpy.ndarray: Noised image

    """
    if image.dtype != np.uint8:
        msg = "Image must have uint8 channel type"
        raise TypeError(msg)
    if not is_rgb_image(image):
        msg = "Image must be RGB"
        raise TypeError(msg)

    one_over_255 = float(1.0 / 255.0)
    image = np.multiply(image, one_over_255, dtype=np.float32)
    hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    _, stddev = cv2.meanStdDev(hls)

    luminance_noise = random_utils.poisson(stddev[1] * intensity * 255, size=hls.shape[:2], random_state=random_state)
    color_noise = random_utils.normal(0, color_shift * 360 * intensity, size=hls.shape[:2], random_state=random_state)

    hue = hls[..., 0]
    hue += color_noise
    hue %= 360

    luminance = hls[..., 1]
    luminance += (luminance_noise / 255) * (1.0 - luminance)

    image = cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) * 255
    return image.astype(np.uint8)

def mask_from_bbox (img, bbox) [view source on GitHub]

Create binary mask from bounding box

Parameters:

Name Type Description
img ndarray

input image

bbox Tuple[int, int, int, int]

A bounding box tuple (x_min, y_min, x_max, y_max)

Returns:

Type Description
mask

binary mask

Source code in albumentations/augmentations/functional.py
Python
def mask_from_bbox(img: np.ndarray, bbox: Tuple[int, int, int, int]) -> np.ndarray:
    """Create binary mask from bounding box

    Args:
        img: input image
        bbox: A bounding box tuple `(x_min, y_min, x_max, y_max)`

    Returns:
        mask: binary mask

    """
    mask = np.zeros(img.shape[:2], dtype=np.uint8)
    x_min, y_min, x_max, y_max = bbox
    mask[y_min:y_max, x_min:x_max] = 1
    return mask

def move_tone_curve (img, low_y, high_y) [view source on GitHub]

Rescales the relationship between bright and dark areas of the image by manipulating its tone curve.

Parameters:

Name Type Description
img ndarray

RGB or grayscale image.

low_y float

y-position of a Bezier control point used to adjust the tone curve, must be in range [0, 1]

high_y float

y-position of a Bezier control point used to adjust image tone curve, must be in range [0, 1]

Source code in albumentations/augmentations/functional.py
Python
@preserve_channel_dim
def move_tone_curve(img: np.ndarray, low_y: float, high_y: float) -> np.ndarray:
    """Rescales the relationship between bright and dark areas of the image by manipulating its tone curve.

    Args:
        img: RGB or grayscale image.
        low_y: y-position of a Bezier control point used
            to adjust the tone curve, must be in range [0, 1]
        high_y: y-position of a Bezier control point used
            to adjust image tone curve, must be in range [0, 1]

    """
    input_dtype = img.dtype

    if not 0 <= low_y <= 1:
        msg = "low_shift must be in range [0, 1]"
        raise ValueError(msg)
    if not 0 <= high_y <= 1:
        msg = "high_shift must be in range [0, 1]"
        raise ValueError(msg)

    if input_dtype != np.uint8:
        raise ValueError(f"Unsupported image type {input_dtype}")

    t = np.linspace(0.0, 1.0, 256)

    # Defines response of a four-point Bezier curve
    def evaluate_bez(t: np.ndarray) -> np.ndarray:
        return 3 * (1 - t) ** 2 * t * low_y + 3 * (1 - t) * t**2 * high_y + t**3

    evaluate_bez = np.vectorize(evaluate_bez)
    remapping = np.rint(evaluate_bez(t) * 255).astype(np.uint8)

    lut_fn = _maybe_process_in_chunks(cv2.LUT, lut=remapping)
    return lut_fn(img)

def multiply (img, multiplier) [view source on GitHub]

Parameters:

Name Type Description
img ndarray

Image.

multiplier ndarray

Multiplier coefficient.

Returns:

Type Description
ndarray

Image multiplied by multiplier coefficient.

Source code in albumentations/augmentations/functional.py
Python
def multiply(img: np.ndarray, multiplier: np.ndarray) -> np.ndarray:
    """Args:

        img: Image.
        multiplier: Multiplier coefficient.

    Returns:
        Image multiplied by `multiplier` coefficient.

    """
    if img.dtype == np.uint8:
        if len(multiplier.shape) == 1:
            return _multiply_uint8_optimized(img, multiplier)

        return _multiply_uint8(img, multiplier)

    return _multiply_non_uint8(img, multiplier)

def normalize_per_image (img, normalization) [view source on GitHub]

Apply per-image normalization based on the specified strategy.

Parameters:

Name Type Description
img np.ndarray

The image to be normalized, expected to be in HWC format.

normalization str

The normalization strategy to apply. Options include: "image", "image_per_channel", "min_max", "min_max_per_channel".

Returns:

Type Description
np.ndarray

The normalized image.

Source code in albumentations/augmentations/functional.py
Python
@preserve_channel_dim
def normalize_per_image(
    img: np.ndarray,
    normalization: Literal["image", "image_per_channel", "min_max", "min_max_per_channel"],
) -> np.ndarray:
    """Apply per-image normalization based on the specified strategy.

    Args:
        img (np.ndarray): The image to be normalized, expected to be in HWC format.
        normalization (str): The normalization strategy to apply. Options include:
                             "image", "image_per_channel", "min_max", "min_max_per_channel".

    Returns:
        np.ndarray: The normalized image.

    Reference:
        https://github.com/ChristofHenkel/kaggle-landmark-2021-1st-place/blob/main/data/ch_ds_1.py
    """
    img = img.astype(np.float32)

    if img.ndim == GRAYSCALE_SHAPE_LENGTH:
        img = np.expand_dims(img, axis=-1)  # Ensure the image is at least 3D

    if normalization == "image":
        # Normalize the whole image based on its global mean and std
        mean = img.mean()
        std = img.std() + 1e-4  # Adding a small epsilon to avoid division by zero
        normalized_img = (img - mean) / std
        normalized_img = normalized_img.clip(-20, 20)  # Clipping outliers

    elif normalization == "image_per_channel":
        # Normalize the image per channel based on each channel's mean and std
        pixel_mean = img.mean(axis=(0, 1))
        pixel_std = img.std(axis=(0, 1)) + 1e-4
        normalized_img = (img - pixel_mean[None, None, :]) / pixel_std[None, None, :]
        normalized_img = normalized_img.clip(-20, 20)

    elif normalization == "min_max":
        # Apply min-max normalization to the whole image
        img_min = img.min()
        img_max = img.max()
        normalized_img = (img - img_min) / (img_max - img_min)

    elif normalization == "min_max_per_channel":
        # Apply min-max normalization per channel
        img_min = img.min(axis=(0, 1), keepdims=True)
        img_max = img.max(axis=(0, 1), keepdims=True)
        normalized_img = (img - img_min) / (img_max - img_min)

    else:
        raise ValueError(f"Unknown normalization method: {normalization}")

    return normalized_img

def posterize (img, bits) [view source on GitHub]

Reduce the number of bits for each color channel.

Parameters:

Name Type Description
img ndarray

image to posterize.

bits int

number of high bits. Must be in range [0, 8]

Returns:

Type Description
ndarray

Image with reduced color channels.

Source code in albumentations/augmentations/functional.py
Python
@preserve_channel_dim
def posterize(img: np.ndarray, bits: int) -> np.ndarray:
    """Reduce the number of bits for each color channel.

    Args:
        img: image to posterize.
        bits: number of high bits. Must be in range [0, 8]

    Returns:
        Image with reduced color channels.

    """
    bits_array = np.uint8(bits)

    if img.dtype != np.uint8:
        msg = "Image must have uint8 channel type"
        raise TypeError(msg)
    if np.any((bits_array < 0) | (bits_array > EIGHT)):
        msg = "bits must be in range [0, 8]"
        raise ValueError(msg)

    if not bits_array.shape or len(bits_array) == 1:
        if bits_array == 0:
            return np.zeros_like(img)
        if bits_array == EIGHT:
            return img.copy()

        lut = np.arange(0, 256, dtype=np.uint8)
        mask = ~np.uint8(2 ** (8 - bits_array) - 1)
        lut &= mask

        return cv2.LUT(img, lut)

    if not is_rgb_image(img):
        msg = "If bits is iterable image must be RGB"
        raise TypeError(msg)

    result_img = np.empty_like(img)
    for i, channel_bits in enumerate(bits_array):
        if channel_bits == 0:
            result_img[..., i] = np.zeros_like(img[..., i])
        elif channel_bits == EIGHT:
            result_img[..., i] = img[..., i].copy()
        else:
            lut = np.arange(0, 256, dtype=np.uint8)
            mask = ~np.uint8(2 ** (8 - channel_bits) - 1)
            lut &= mask

            result_img[..., i] = cv2.LUT(img[..., i], lut)

    return result_img

def shuffle_tiles_within_shape_groups (shape_groups, random_state=None) [view source on GitHub]

Shuffles indices within each group of similar shapes and creates a list where each index points to the index of the tile it should be mapped to.

Parameters:

Name Type Description
shape_groups Dict[Tuple[int, int], List[int]]

Groups of tile indices categorized by shape.

random_state Optional[np.random.RandomState]

Seed for the random number generator for reproducibility.

Returns:

Type Description
List[int]

A list where each index is mapped to the new index of the tile after shuffling.

Source code in albumentations/augmentations/functional.py
Python
def shuffle_tiles_within_shape_groups(
    shape_groups: Dict[Tuple[int, int], List[int]],
    random_state: Optional[np.random.RandomState] = None,
) -> List[int]:
    """Shuffles indices within each group of similar shapes and creates a list where each
    index points to the index of the tile it should be mapped to.

    Args:
        shape_groups (Dict[Tuple[int, int], List[int]]): Groups of tile indices categorized by shape.
        random_state (Optional[np.random.RandomState]): Seed for the random number generator for reproducibility.

    Returns:
        List[int]: A list where each index is mapped to the new index of the tile after shuffling.
    """
    # Initialize the output list with the same size as the total number of tiles, filled with -1
    num_tiles = sum(len(indices) for indices in shape_groups.values())
    mapping = [-1] * num_tiles

    # Prepare the random number generator

    for indices in shape_groups.values():
        shuffled_indices = random_utils.shuffle(indices.copy(), random_state=random_state)
        for old, new in zip(indices, shuffled_indices):
            mapping[old] = new

    return mapping

def solarize (img, threshold=128) [view source on GitHub]

Invert all pixel values above a threshold.

Parameters:

Name Type Description
img ndarray

The image to solarize.

threshold int

All pixels above this grayscale level are inverted.

Returns:

Type Description
ndarray

Solarized image.

Source code in albumentations/augmentations/functional.py
Python
def solarize(img: np.ndarray, threshold: int = 128) -> np.ndarray:
    """Invert all pixel values above a threshold.

    Args:
        img: The image to solarize.
        threshold: All pixels above this grayscale level are inverted.

    Returns:
        Solarized image.

    """
    dtype = img.dtype
    max_val = MAX_VALUES_BY_DTYPE[dtype]

    if dtype == np.dtype("uint8"):
        lut = [(i if i < threshold else max_val - i) for i in range(int(max_val) + 1)]

        prev_shape = img.shape
        img = cv2.LUT(img, np.array(lut, dtype=dtype))

        if len(prev_shape) != len(img.shape):
            img = np.expand_dims(img, -1)
        return img

    result_img = img.copy()
    cond = img >= threshold
    result_img[cond] = max_val - result_img[cond]
    return result_img

def split_uniform_grid (image_shape, grid, random_state=None) [view source on GitHub]

Splits an image shape into a uniform grid specified by the grid dimensions.

Parameters:

Name Type Description
image_shape Tuple[int, int]

The shape of the image as (height, width).

grid Tuple[int, int]

The grid size as (rows, columns).

Returns:

Type Description
np.ndarray

An array containing the tiles' coordinates in the format (start_y, start_x, end_y, end_x).

Source code in albumentations/augmentations/functional.py
Python
def split_uniform_grid(
    image_shape: Tuple[int, int],
    grid: Tuple[int, int],
    random_state: Optional[np.random.RandomState] = None,
) -> np.ndarray:
    """Splits an image shape into a uniform grid specified by the grid dimensions.

    Args:
        image_shape (Tuple[int, int]): The shape of the image as (height, width).
        grid (Tuple[int, int]): The grid size as (rows, columns).

    Returns:
        np.ndarray: An array containing the tiles' coordinates in the format (start_y, start_x, end_y, end_x).
    """
    n_rows, n_cols = grid

    height_splits = generate_shuffled_splits(image_shape[0], grid[0], random_state)
    width_splits = generate_shuffled_splits(image_shape[1], grid[1], random_state)

    # Calculate tiles coordinates
    tiles = [
        (height_splits[i], width_splits[j], height_splits[i + 1], width_splits[j + 1])
        for i in range(n_rows)
        for j in range(n_cols)
    ]

    return np.array(tiles)

def swap_tiles_on_image (image, tiles, mapping=None) [view source on GitHub]

Swap tiles on the image according to the new format.

Parameters:

Name Type Description
image ndarray

Input image.

tiles ndarray

Array of tiles with each tile as [start_y, start_x, end_y, end_x].

mapping Optional[List[int]]

List of new tile indices.

Returns:

Type Description
np.ndarray

Output image with tiles swapped according to the random shuffle.

Source code in albumentations/augmentations/functional.py
Python
def swap_tiles_on_image(image: np.ndarray, tiles: np.ndarray, mapping: Optional[List[int]] = None) -> np.ndarray:
    """Swap tiles on the image according to the new format.

    Args:
        image: Input image.
        tiles: Array of tiles with each tile as [start_y, start_x, end_y, end_x].
        mapping: List of new tile indices.

    Returns:
        np.ndarray: Output image with tiles swapped according to the random shuffle.
    """
    # If no tiles are provided, return a copy of the original image
    if tiles.size == 0 or mapping is None:
        return image.copy()

    # Create a copy of the image to retain original for reference
    new_image = np.empty_like(image)
    for num, new_index in enumerate(mapping):
        start_y, start_x, end_y, end_x = tiles[new_index]
        start_y_orig, start_x_orig, end_y_orig, end_x_orig = tiles[num]
        # Assign the corresponding tile from the original image to the new image
        new_image[start_y:end_y, start_x:end_x] = image[start_y_orig:end_y_orig, start_x_orig:end_x_orig]

    return new_image