Crop transforms (augmentations.crops.transforms)¶
class BBoxSafeRandomCrop
(erosion_rate=0.0, p=1.0, always_apply=None)
[view source on GitHub] ¶
Crop a random part of the input without loss of bboxes.
Parameters:
Name | Type | Description |
---|---|---|
erosion_rate | float | erosion rate applied on input image height before crop. |
p | float | probability of applying the transform. Default: 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class BBoxSafeRandomCrop(_BaseCrop):
"""Crop a random part of the input without loss of bboxes.
Args:
erosion_rate: erosion rate applied on input image height before crop.
p: probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.KEYPOINTS)
class InitSchema(BaseTransformInitSchema):
erosion_rate: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="Erosion rate applied on input image height before crop.",
)
p: ProbabilityType = 1
def __init__(self, erosion_rate: float = 0.0, p: float = 1.0, always_apply: bool | None = None):
super().__init__(p=p, always_apply=always_apply)
self.erosion_rate = erosion_rate
def _get_coords_no_bbox(self, image_height: int, image_width: int) -> tuple[int, int, int, int]:
erosive_h = int(image_height * (1.0 - self.erosion_rate))
crop_height = image_height if erosive_h >= image_height else random.randint(erosive_h, image_height)
crop_width = int(crop_height * image_width / image_height)
h_start = random.random()
w_start = random.random()
return fcrops.get_crop_coords(image_height, image_width, crop_height, crop_width, h_start, w_start)
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
if len(data["bboxes"]) == 0: # less likely, this class is for use with bboxes.
crop_coords = self._get_coords_no_bbox(image_height, image_width)
return {"crop_coords": crop_coords}
bbox_union = union_of_bboxes(bboxes=data["bboxes"], erosion_rate=self.erosion_rate)
if bbox_union is None:
crop_coords = self._get_coords_no_bbox(image_height, image_width)
return {"crop_coords": crop_coords}
x_min, y_min, x_max, y_max = bbox_union
x_min = np.clip(x_min, 0, 1)
y_min = np.clip(y_min, 0, 1)
x_max = np.clip(x_max, x_min, 1)
y_max = np.clip(y_max, y_min, 1)
crop_x_min = int(x_min * random.random() * image_width)
crop_y_min = int(y_min * random.random() * image_height)
bbox_xmax = x_max + (1 - x_max) * random.random()
bbox_ymax = y_max + (1 - y_max) * random.random()
crop_x_max = int(bbox_xmax * image_width)
crop_y_max = int(bbox_ymax * image_height)
return {"crop_coords": (crop_x_min, crop_y_min, crop_x_max, crop_y_max)}
@property
def targets_as_params(self) -> list[str]:
return ["bboxes"]
def get_transform_init_args_names(self) -> tuple[str, ...]:
return ("erosion_rate",)
targets_as_params: list[str]
property
readonly
¶
Targets used to get params dependent on targets. This is used to check input has all required targets.
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
if len(data["bboxes"]) == 0: # less likely, this class is for use with bboxes.
crop_coords = self._get_coords_no_bbox(image_height, image_width)
return {"crop_coords": crop_coords}
bbox_union = union_of_bboxes(bboxes=data["bboxes"], erosion_rate=self.erosion_rate)
if bbox_union is None:
crop_coords = self._get_coords_no_bbox(image_height, image_width)
return {"crop_coords": crop_coords}
x_min, y_min, x_max, y_max = bbox_union
x_min = np.clip(x_min, 0, 1)
y_min = np.clip(y_min, 0, 1)
x_max = np.clip(x_max, x_min, 1)
y_max = np.clip(y_max, y_min, 1)
crop_x_min = int(x_min * random.random() * image_width)
crop_y_min = int(y_min * random.random() * image_height)
bbox_xmax = x_max + (1 - x_max) * random.random()
bbox_ymax = y_max + (1 - y_max) * random.random()
crop_x_max = int(bbox_xmax * image_width)
crop_y_max = int(bbox_ymax * image_height)
return {"crop_coords": (crop_x_min, crop_y_min, crop_x_max, crop_y_max)}
get_transform_init_args_names (self)
¶
class CenterCrop
(height, width, p=1.0, always_apply=None)
[view source on GitHub] ¶
Crop the central part of the input.
Parameters:
Name | Type | Description |
---|---|---|
height | int | height of the crop. |
width | int | width of the crop. |
p | float | probability of applying the transform. Default: 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class CenterCrop(_BaseCrop):
"""Crop the central part of the input.
Args:
height: height of the crop.
width: width of the crop.
p: probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
class InitSchema(CropInitSchema):
pass
def __init__(self, height: int, width: int, p: float = 1.0, always_apply: bool | None = None):
super().__init__(p, always_apply)
self.height = height
self.width = width
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "height", "width"
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
crop_coords = fcrops.get_center_crop_coords(image_height, image_width, self.height, self.width)
return {"crop_coords": crop_coords}
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
crop_coords = fcrops.get_center_crop_coords(image_height, image_width, self.height, self.width)
return {"crop_coords": crop_coords}
get_transform_init_args_names (self)
¶
class Crop
(x_min=0, y_min=0, x_max=1024, y_max=1024, always_apply=None, p=1.0)
[view source on GitHub] ¶
Crop region from image.
Parameters:
Name | Type | Description |
---|---|---|
x_min | int | Minimum upper left x coordinate. |
y_min | int | Minimum upper left y coordinate. |
x_max | int | Maximum lower right x coordinate. |
y_max | int | Maximum lower right y coordinate. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class Crop(_BaseCrop):
"""Crop region from image.
Args:
x_min: Minimum upper left x coordinate.
y_min: Minimum upper left y coordinate.
x_max: Maximum lower right x coordinate.
y_max: Maximum lower right y coordinate.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
class InitSchema(BaseTransformInitSchema):
x_min: Annotated[int, Field(ge=0, description="Minimum upper left x coordinate")]
y_min: Annotated[int, Field(ge=0, description="Minimum upper left y coordinate")]
x_max: Annotated[int, Field(gt=0, description="Maximum lower right x coordinate")]
y_max: Annotated[int, Field(gt=0, description="Maximum lower right y coordinate")]
p: ProbabilityType = 1
@model_validator(mode="after")
def validate_coordinates(self) -> Self:
if not self.x_min < self.x_max:
msg = "x_max must be greater than x_min"
raise ValueError(msg)
if not self.y_min < self.y_max:
msg = "y_max must be greater than y_min"
raise ValueError(msg)
return self
def __init__(
self,
x_min: int = 0,
y_min: int = 0,
x_max: int = 1024,
y_max: int = 1024,
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(p=p, always_apply=always_apply)
self.x_min = x_min
self.y_min = y_min
self.x_max = x_max
self.y_max = y_max
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "x_min", "y_min", "x_max", "y_max"
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
return {"crop_coords": (self.x_min, self.y_min, self.x_max, self.y_max)}
class CropAndPad
(px=None, percent=None, pad_mode=0, pad_cval=0, pad_cval_mask=0, keep_size=True, sample_independently=True, interpolation=1, always_apply=None, p=1.0)
[view source on GitHub] ¶
Crop and pad images by pixel amounts or fractions of image sizes. Cropping removes pixels at the sides (i.e., extracts a subimage from a given full image). Padding adds pixels to the sides (e.g., black pixels). This transformation will never crop images below a height or width of 1.
Note
This transformation automatically resizes images back to their original size. To deactivate this, add the parameter keep_size=False
.
Parameters:
Name | Type | Description |
---|---|---|
px | int, tuple[int, int], tuple[int, int, int, int], tuple[Union[int, tuple[int, int], list[int]], Union[int, tuple[int, int], list[int]], Union[int, tuple[int, int], list[int]], Union[int, tuple[int, int], list[int]]] | The number of pixels to crop (negative values) or pad (positive values) on each side of the image. Either this or the parameter
|
percent | float, tuple[float, float], tuple[float, float, float, float], tuple[Union[float, tuple[float, float], list[float]], Union[float, tuple[float, float], list[float]], Union[float, tuple[float, float], list[float]], Union[float, tuple[float, float], list[float]]] | The number of pixels to crop (negative values) or pad (positive values) on each side of the image given as a fraction of the image height/width. E.g. if this is set to
|
pad_mode | int | OpenCV border mode. |
pad_cval | Union[int, float, tuple[Union[int, float], Union[int, float]], list[Union[int, float]]] | The constant value to use if the pad mode is |
pad_cval_mask | Union[int, float, tuple[Union[int, float], Union[int, float]], list[Union[int, float]]] | Same as |
keep_size | bool | After cropping and padding, the resulting image will usually have a different height/width compared to the original input image. If this parameter is set to |
sample_independently | bool | If |
interpolation | int | OpenCV flag that is used to specify the interpolation algorithm for images. Should be one of: |
Targets
image, mask, bboxes, keypoints
Image types: unit8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class CropAndPad(DualTransform):
"""Crop and pad images by pixel amounts or fractions of image sizes.
Cropping removes pixels at the sides (i.e., extracts a subimage from a given full image).
Padding adds pixels to the sides (e.g., black pixels).
This transformation will never crop images below a height or width of 1.
Note:
This transformation automatically resizes images back to their original size. To deactivate this, add the
parameter `keep_size=False`.
Args:
px (int,
tuple[int, int],
tuple[int, int, int, int],
tuple[Union[int, tuple[int, int], list[int]],
Union[int, tuple[int, int], list[int]],
Union[int, tuple[int, int], list[int]],
Union[int, tuple[int, int], list[int]]]):
The number of pixels to crop (negative values) or pad (positive values) on each side of the image.
Either this or the parameter `percent` may be set, not both at the same time.
* If `None`, then pixel-based cropping/padding will not be used.
* If `int`, then that exact number of pixels will always be cropped/padded.
* If a `tuple` of two `int`s with values `a` and `b`, then each side will be cropped/padded by a
random amount sampled uniformly per image and side from the interval `[a, b]`.
If `sample_independently` is set to `False`, only one value will be sampled per
image and used for all sides.
* If a `tuple` of four entries, then the entries represent top, right, bottom, and left.
Each entry may be:
- A single `int` (always crop/pad by exactly that value).
- A `tuple` of two `int`s `a` and `b` (crop/pad by an amount within `[a, b]`).
- A `list` of `int`s (crop/pad by a random value that is contained in the `list`).
percent (float,
tuple[float, float],
tuple[float, float, float, float],
tuple[Union[float, tuple[float, float], list[float]],
Union[float, tuple[float, float], list[float]],
Union[float, tuple[float, float], list[float]],
Union[float, tuple[float, float], list[float]]]):
The number of pixels to crop (negative values) or pad (positive values) on each side of the image given
as a *fraction* of the image height/width. E.g. if this is set to `-0.1`, the transformation will
always crop away `10%` of the image's height at both the top and the bottom (both `10%` each),
as well as `10%` of the width at the right and left. Expected value range is `(-1.0, inf)`.
Either this or the parameter `px` may be set, not both at the same time.
* If `None`, then fraction-based cropping/padding will not be used.
* If `float`, then that fraction will always be cropped/padded.
* If a `tuple` of two `float`s with values `a` and `b`, then each side will be cropped/padded by a
random fraction sampled uniformly per image and side from the interval `[a, b]`.
If `sample_independently` is set to `False`, only one value will be sampled per image and used
for all sides.
* If a `tuple` of four entries, then the entries represent top, right, bottom, and left.
Each entry may be:
- A single `float` (always crop/pad by exactly that percent value).
- A `tuple` of two `float`s `a` and `b` (crop/pad by a fraction from `[a, b]`).
- A `list` of `float`s (crop/pad by a random value that is contained in the `list`).
pad_mode (int): OpenCV border mode.
pad_cval (Union[int, float, tuple[Union[int, float], Union[int, float]], list[Union[int, float]]]):
The constant value to use if the pad mode is `BORDER_CONSTANT`.
* If `number`, then that value will be used.
* If a `tuple` of two numbers and at least one of them is a `float`, then a random number
will be uniformly sampled per image from the continuous interval `[a, b]` and used as the value.
If both numbers are `int`s, the interval is discrete.
* If a `list` of numbers, then a random value will be chosen from the elements of the `list` and
used as the value.
pad_cval_mask (Union[int, float, tuple[Union[int, float], Union[int, float]], list[Union[int, float]]]):
Same as `pad_cval` but only for masks.
keep_size (bool):
After cropping and padding, the resulting image will usually have a different height/width compared to
the original input image. If this parameter is set to `True`, then the cropped/padded image will be
resized to the input image's size, i.e., the output shape is always identical to the input shape.
sample_independently (bool):
If `False` and the values for `px`/`percent` result in exactly one probability distribution for all
image sides, only one single value will be sampled from that probability distribution and used for
all sides. I.e., the crop/pad amount then is the same for all sides. If `True`, four values
will be sampled independently, one per side.
interpolation (int):
OpenCV flag that is used to specify the interpolation algorithm for images. Should be one of:
`cv2.INTER_NEAREST`, `cv2.INTER_LINEAR`, `cv2.INTER_CUBIC`, `cv2.INTER_AREA`, `cv2.INTER_LANCZOS4`.
Default: `cv2.INTER_LINEAR`.
Targets:
image, mask, bboxes, keypoints
Image types:
unit8, float32
"""
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.KEYPOINTS)
class InitSchema(BaseTransformInitSchema):
px: PxType | None = Field(
default=None,
description="Number of pixels to crop (negative) or pad (positive).",
)
percent: PercentType | None = Field(
default=None,
description="Fraction of image size to crop (negative) or pad (positive).",
)
pad_mode: BorderModeType = cv2.BORDER_CONSTANT
pad_cval: ScalarType | tuple[ScalarType, ScalarType] | list[ScalarType] = Field(
default=0,
description="Padding value if pad_mode is BORDER_CONSTANT.",
)
pad_cval_mask: ScalarType | tuple[ScalarType, ScalarType] | list[ScalarType] = Field(
default=0,
description="Padding value for masks if pad_mode is BORDER_CONSTANT.",
)
keep_size: bool = Field(
default=True,
description="Whether to resize the image back to the original size after cropping and padding.",
)
sample_independently: bool = Field(
default=True,
description="Whether to sample the crop/pad size independently for each side.",
)
interpolation: InterpolationType = cv2.INTER_LINEAR
p: ProbabilityType = 1
@model_validator(mode="after")
def check_px_percent(self) -> Self:
if self.px is None and self.percent is None:
msg = "Both px and percent parameters cannot be None simultaneously."
raise ValueError(msg)
if self.px is not None and self.percent is not None:
msg = "Only px or percent may be set!"
raise ValueError(msg)
return self
def __init__(
self,
px: int | list[int] | None = None,
percent: float | list[float] | None = None,
pad_mode: int = cv2.BORDER_CONSTANT,
pad_cval: ScalarType | tuple[ScalarType, ScalarType] | list[ScalarType] = 0,
pad_cval_mask: ScalarType | tuple[ScalarType, ScalarType] | list[ScalarType] = 0,
keep_size: bool = True,
sample_independently: bool = True,
interpolation: int = cv2.INTER_LINEAR,
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(p=p, always_apply=always_apply)
self.px = px
self.percent = percent
self.pad_mode = pad_mode
self.pad_cval = pad_cval
self.pad_cval_mask = pad_cval_mask
self.keep_size = keep_size
self.sample_independently = sample_independently
self.interpolation = interpolation
def apply(
self,
img: np.ndarray,
crop_params: Sequence[int],
pad_params: Sequence[int],
pad_value: ColorType,
rows: int,
cols: int,
interpolation: int,
**params: Any,
) -> np.ndarray:
return fcrops.crop_and_pad(
img,
crop_params,
pad_params,
pad_value,
rows,
cols,
interpolation,
self.pad_mode,
self.keep_size,
)
def apply_to_mask(
self,
mask: np.ndarray,
crop_params: Sequence[int],
pad_params: Sequence[int],
pad_value_mask: float,
rows: int,
cols: int,
interpolation: int,
**params: Any,
) -> np.ndarray:
return fcrops.crop_and_pad(
mask,
crop_params,
pad_params,
pad_value_mask,
rows,
cols,
interpolation,
self.pad_mode,
self.keep_size,
)
def apply_to_bbox(
self,
bbox: BoxInternalType,
crop_params: Sequence[int],
pad_params: Sequence[int],
rows: int,
cols: int,
result_rows: int,
result_cols: int,
**params: Any,
) -> BoxInternalType:
return fcrops.crop_and_pad_bbox(bbox, crop_params, pad_params, rows, cols, result_rows, result_cols)
def apply_to_keypoint(
self,
keypoint: KeypointInternalType,
crop_params: Sequence[int],
pad_params: Sequence[int],
rows: int,
cols: int,
result_rows: int,
result_cols: int,
**params: Any,
) -> KeypointInternalType:
return fcrops.crop_and_pad_keypoint(
keypoint,
crop_params,
pad_params,
rows,
cols,
result_rows,
result_cols,
self.keep_size,
)
@staticmethod
def __prevent_zero(val1: int, val2: int, max_val: int) -> tuple[int, int]:
regain = abs(max_val) + 1
regain1 = regain // 2
regain2 = regain // 2
if regain1 + regain2 < regain:
regain1 += 1
if regain1 > val1:
diff = regain1 - val1
regain1 = val1
regain2 += diff
elif regain2 > val2:
diff = regain2 - val2
regain2 = val2
regain1 += diff
return val1 - regain1, val2 - regain2
@staticmethod
def _prevent_zero(crop_params: list[int], height: int, width: int) -> list[int]:
top, right, bottom, left = crop_params
remaining_height = height - (top + bottom)
remaining_width = width - (left + right)
if remaining_height < 1:
top, bottom = CropAndPad.__prevent_zero(top, bottom, height)
if remaining_width < 1:
left, right = CropAndPad.__prevent_zero(left, right, width)
return [max(top, 0), max(right, 0), max(bottom, 0), max(left, 0)]
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
height, width = params["shape"][:2]
if self.px is not None:
new_params = self._get_px_params()
else:
percent_params = self._get_percent_params()
new_params = [
int(percent_params[0] * height),
int(percent_params[1] * width),
int(percent_params[2] * height),
int(percent_params[3] * width),
]
pad_params = [max(i, 0) for i in new_params]
crop_params = self._prevent_zero([-min(i, 0) for i in new_params], height, width)
top, right, bottom, left = crop_params
crop_params = [left, top, width - right, height - bottom]
result_rows = crop_params[3] - crop_params[1]
result_cols = crop_params[2] - crop_params[0]
if result_cols == width and result_rows == height:
crop_params = []
top, right, bottom, left = pad_params
pad_params = [top, bottom, left, right]
if any(pad_params):
result_rows += top + bottom
result_cols += left + right
else:
pad_params = []
return {
"crop_params": crop_params or None,
"pad_params": pad_params or None,
"pad_value": None if pad_params is None else self._get_pad_value(self.pad_cval),
"pad_value_mask": None if pad_params is None else self._get_pad_value(self.pad_cval_mask),
"result_rows": result_rows,
"result_cols": result_cols,
}
def _get_px_params(self) -> list[int]:
if self.px is None:
msg = "px is not set"
raise ValueError(msg)
if isinstance(self.px, int):
params = [self.px] * 4
elif len(self.px) == PAIR:
if self.sample_independently:
params = [random.randrange(*self.px) for _ in range(4)]
else:
px = random.randrange(*self.px)
params = [px] * 4
elif isinstance(self.px[0], int):
params = self.px
elif len(self.px[0]) == PAIR:
params = [random.randrange(*i) for i in self.px]
else:
params = [random.choice(i) for i in self.px]
return params
def _get_percent_params(self) -> list[float]:
if self.percent is None:
msg = "percent is not set"
raise ValueError(msg)
if isinstance(self.percent, float):
params = [self.percent] * 4
elif len(self.percent) == PAIR:
if self.sample_independently:
params = [random.uniform(*self.percent) for _ in range(4)]
else:
px = random.uniform(*self.percent)
params = [px] * 4
elif isinstance(self.percent[0], (int, float)):
params = self.percent
elif len(self.percent[0]) == PAIR:
params = [random.uniform(*i) for i in self.percent]
else:
params = [random.choice(i) for i in self.percent]
return params # params = [top, right, bottom, left]
@staticmethod
def _get_pad_value(
pad_value: ScalarType | tuple[ScalarType, ScalarType] | list[ScalarType],
) -> ScalarType:
if isinstance(pad_value, (int, float)):
return pad_value
if len(pad_value) == PAIR:
a, b = pad_value
if isinstance(a, int) and isinstance(b, int):
return random.randint(a, b)
return random.uniform(a, b)
return random.choice(pad_value)
def get_transform_init_args_names(self) -> tuple[str, ...]:
return (
"px",
"percent",
"pad_mode",
"pad_cval",
"pad_cval_mask",
"keep_size",
"sample_independently",
"interpolation",
)
apply (self, img, crop_params, pad_params, pad_value, rows, cols, interpolation, **params)
¶
Apply transform on image.
Source code in albumentations/augmentations/crops/transforms.py
def apply(
self,
img: np.ndarray,
crop_params: Sequence[int],
pad_params: Sequence[int],
pad_value: ColorType,
rows: int,
cols: int,
interpolation: int,
**params: Any,
) -> np.ndarray:
return fcrops.crop_and_pad(
img,
crop_params,
pad_params,
pad_value,
rows,
cols,
interpolation,
self.pad_mode,
self.keep_size,
)
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(self, params: dict[str, Any], data: dict[str, Any]) -> dict[str, Any]:
height, width = params["shape"][:2]
if self.px is not None:
new_params = self._get_px_params()
else:
percent_params = self._get_percent_params()
new_params = [
int(percent_params[0] * height),
int(percent_params[1] * width),
int(percent_params[2] * height),
int(percent_params[3] * width),
]
pad_params = [max(i, 0) for i in new_params]
crop_params = self._prevent_zero([-min(i, 0) for i in new_params], height, width)
top, right, bottom, left = crop_params
crop_params = [left, top, width - right, height - bottom]
result_rows = crop_params[3] - crop_params[1]
result_cols = crop_params[2] - crop_params[0]
if result_cols == width and result_rows == height:
crop_params = []
top, right, bottom, left = pad_params
pad_params = [top, bottom, left, right]
if any(pad_params):
result_rows += top + bottom
result_cols += left + right
else:
pad_params = []
return {
"crop_params": crop_params or None,
"pad_params": pad_params or None,
"pad_value": None if pad_params is None else self._get_pad_value(self.pad_cval),
"pad_value_mask": None if pad_params is None else self._get_pad_value(self.pad_cval_mask),
"result_rows": result_rows,
"result_cols": result_cols,
}
get_transform_init_args_names (self)
¶
Returns names of arguments that are used in init method of the transform.
class CropNonEmptyMaskIfExists
(height, width, ignore_values=None, ignore_channels=None, always_apply=None, p=1.0)
[view source on GitHub] ¶
Crop area with mask if mask is non-empty, else make random crop.
Parameters:
Name | Type | Description |
---|---|---|
height | int | vertical size of crop in pixels |
width | int | horizontal size of crop in pixels |
ignore_values | list of int | values to ignore in mask, |
ignore_channels | list of int | channels to ignore in mask (e.g. if background is a first channel set |
p | float | probability of applying the transform. Default: 1.0. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class CropNonEmptyMaskIfExists(_BaseCrop):
"""Crop area with mask if mask is non-empty, else make random crop.
Args:
height: vertical size of crop in pixels
width: horizontal size of crop in pixels
ignore_values (list of int): values to ignore in mask, `0` values are always ignored
(e.g. if background value is 5 set `ignore_values=[5]` to ignore)
ignore_channels (list of int): channels to ignore in mask
(e.g. if background is a first channel set `ignore_channels=[0]` to ignore)
p: probability of applying the transform. Default: 1.0.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
class InitSchema(CropInitSchema):
ignore_values: list[int] | None = Field(
default=None,
description="Values to ignore in mask, `0` values are always ignored",
)
ignore_channels: list[int] | None = Field(default=None, description="Channels to ignore in mask")
def __init__(
self,
height: int,
width: int,
ignore_values: list[int] | None = None,
ignore_channels: list[int] | None = None,
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(p, always_apply)
self.height = height
self.width = width
self.ignore_values = ignore_values
self.ignore_channels = ignore_channels
def _preprocess_mask(self, mask: np.ndarray) -> np.ndarray:
mask_height, mask_width = mask.shape[:2]
if self.ignore_values is not None:
ignore_values_np = np.array(self.ignore_values)
mask = np.where(np.isin(mask, ignore_values_np), 0, mask)
if mask.ndim == NUM_MULTI_CHANNEL_DIMENSIONS and self.ignore_channels is not None:
target_channels = np.array([ch for ch in range(mask.shape[-1]) if ch not in self.ignore_channels])
mask = np.take(mask, target_channels, axis=-1)
if self.height > mask_height or self.width > mask_width:
raise ValueError(
f"Crop size ({self.height},{self.width}) is larger than image ({mask_height},{mask_width})",
)
return mask
def update_params(self, params: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
super().update_params(params, **kwargs)
if "mask" in kwargs:
mask = self._preprocess_mask(kwargs["mask"])
elif "masks" in kwargs and len(kwargs["masks"]):
masks = kwargs["masks"]
mask = self._preprocess_mask(np.copy(masks[0])) # need copy as we perform in-place mod afterwards
for m in masks[1:]:
mask |= self._preprocess_mask(m)
else:
msg = "Can not find mask for CropNonEmptyMaskIfExists"
raise RuntimeError(msg)
mask_height, mask_width = mask.shape[:2]
if mask.any():
mask = mask.sum(axis=-1) if mask.ndim == NUM_MULTI_CHANNEL_DIMENSIONS else mask
non_zero_yx = np.argwhere(mask)
y, x = random.choice(non_zero_yx)
x_min = x - random.randint(0, self.width - 1)
y_min = y - random.randint(0, self.height - 1)
x_min = np.clip(x_min, 0, mask_width - self.width)
y_min = np.clip(y_min, 0, mask_height - self.height)
else:
x_min = random.randint(0, mask_width - self.width)
y_min = random.randint(0, mask_height - self.height)
x_max = x_min + self.width
y_max = y_min + self.height
crop_coords = x_min, y_min, x_max, y_max
params["crop_coords"] = crop_coords
return params
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "height", "width", "ignore_values", "ignore_channels"
get_transform_init_args_names (self)
¶
update_params (self, params, **kwargs)
¶
Update parameters with transform specific params. This method is deprecated, use: - get_params
for transform specific params like interpolation and - update_params_shape
for data like shape.
Source code in albumentations/augmentations/crops/transforms.py
def update_params(self, params: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
super().update_params(params, **kwargs)
if "mask" in kwargs:
mask = self._preprocess_mask(kwargs["mask"])
elif "masks" in kwargs and len(kwargs["masks"]):
masks = kwargs["masks"]
mask = self._preprocess_mask(np.copy(masks[0])) # need copy as we perform in-place mod afterwards
for m in masks[1:]:
mask |= self._preprocess_mask(m)
else:
msg = "Can not find mask for CropNonEmptyMaskIfExists"
raise RuntimeError(msg)
mask_height, mask_width = mask.shape[:2]
if mask.any():
mask = mask.sum(axis=-1) if mask.ndim == NUM_MULTI_CHANNEL_DIMENSIONS else mask
non_zero_yx = np.argwhere(mask)
y, x = random.choice(non_zero_yx)
x_min = x - random.randint(0, self.width - 1)
y_min = y - random.randint(0, self.height - 1)
x_min = np.clip(x_min, 0, mask_width - self.width)
y_min = np.clip(y_min, 0, mask_height - self.height)
else:
x_min = random.randint(0, mask_width - self.width)
y_min = random.randint(0, mask_height - self.height)
x_max = x_min + self.width
y_max = y_min + self.height
crop_coords = x_min, y_min, x_max, y_max
params["crop_coords"] = crop_coords
return params
class RandomCrop
(height, width, p=1.0, always_apply=None)
[view source on GitHub] ¶
Crop a random part of the input.
Parameters:
Name | Type | Description |
---|---|---|
height | int | height of the crop. |
width | int | width of the crop. |
p | float | probability of applying the transform. Default: 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class RandomCrop(_BaseCrop):
"""Crop a random part of the input.
Args:
height: height of the crop.
width: width of the crop.
p: probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
class InitSchema(CropInitSchema):
pass
def __init__(self, height: int, width: int, p: float = 1.0, always_apply: bool | None = None):
super().__init__(p=p, always_apply=always_apply)
self.height = height
self.width = width
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
shape = params["shape"]
image_height, image_width = shape[:2]
if self.height > image_height or self.width > image_width:
raise CropSizeError(
f"Crop size (height, width) exceeds image dimensions (height, width):"
f" {(self.height, self.width)} vs {shape[:2]}",
)
h_start = random.random()
w_start = random.random()
crop_coords = fcrops.get_crop_coords(image_height, image_width, self.height, self.width, h_start, w_start)
return {"crop_coords": crop_coords}
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "height", "width"
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
shape = params["shape"]
image_height, image_width = shape[:2]
if self.height > image_height or self.width > image_width:
raise CropSizeError(
f"Crop size (height, width) exceeds image dimensions (height, width):"
f" {(self.height, self.width)} vs {shape[:2]}",
)
h_start = random.random()
w_start = random.random()
crop_coords = fcrops.get_crop_coords(image_height, image_width, self.height, self.width, h_start, w_start)
return {"crop_coords": crop_coords}
get_transform_init_args_names (self)
¶
class RandomCropFromBorders
(crop_left=0.1, crop_right=0.1, crop_top=0.1, crop_bottom=0.1, always_apply=None, p=1.0)
[view source on GitHub] ¶
Randomly crops parts of the image from the borders without resizing at the end. The cropped regions are defined as fractions of the original image dimensions, specified for each side of the image (left, right, top, bottom).
Parameters:
Name | Type | Description |
---|---|---|
crop_left | float | Fraction of the width to randomly crop from the left side. Must be in the range [0.0, 1.0]. Default is 0.1. |
crop_right | float | Fraction of the width to randomly crop from the right side. Must be in the range [0.0, 1.0]. Default is 0.1. |
crop_top | float | Fraction of the height to randomly crop from the top side. Must be in the range [0.0, 1.0]. Default is 0.1. |
crop_bottom | float | Fraction of the height to randomly crop from the bottom side. Must be in the range [0.0, 1.0]. Default is 0.1. |
p | float | Probability of applying the transform. Default is 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class RandomCropFromBorders(_BaseCrop):
"""Randomly crops parts of the image from the borders without resizing at the end. The cropped regions are defined
as fractions of the original image dimensions, specified for each side of the image (left, right, top, bottom).
Args:
crop_left (float): Fraction of the width to randomly crop from the left side. Must be in the range [0.0, 1.0].
Default is 0.1.
crop_right (float): Fraction of the width to randomly crop from the right side. Must be in the range [0.0, 1.0].
Default is 0.1.
crop_top (float): Fraction of the height to randomly crop from the top side. Must be in the range [0.0, 1.0].
Default is 0.1.
crop_bottom (float): Fraction of the height to randomly crop from the bottom side.
Must be in the range [0.0, 1.0]. Default is 0.1.
p (float): Probability of applying the transform. Default is 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.KEYPOINTS)
class InitSchema(BaseTransformInitSchema):
crop_left: float = Field(
default=0.1,
ge=0.0,
le=1.0,
description="Fraction of width to randomly crop from the left side.",
)
crop_right: float = Field(
default=0.1,
ge=0.0,
le=1.0,
description="Fraction of width to randomly crop from the right side.",
)
crop_top: float = Field(
default=0.1,
ge=0.0,
le=1.0,
description="Fraction of height to randomly crop from the top side.",
)
crop_bottom: float = Field(
default=0.1,
ge=0.0,
le=1.0,
description="Fraction of height to randomly crop from the bottom side.",
)
p: ProbabilityType = 1
@model_validator(mode="after")
def validate_crop_values(self) -> Self:
if self.crop_left + self.crop_right > 1.0:
msg = "The sum of crop_left and crop_right must be <= 1."
raise ValueError(msg)
if self.crop_top + self.crop_bottom > 1.0:
msg = "The sum of crop_top and crop_bottom must be <= 1."
raise ValueError(msg)
return self
def __init__(
self,
crop_left: float = 0.1,
crop_right: float = 0.1,
crop_top: float = 0.1,
crop_bottom: float = 0.1,
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(p, always_apply)
self.crop_left = crop_left
self.crop_right = crop_right
self.crop_top = crop_top
self.crop_bottom = crop_bottom
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
height, width = params["shape"][:2]
x_min = random.randint(0, int(self.crop_left * width))
x_max = random.randint(max(x_min + 1, int((1 - self.crop_right) * width)), width)
y_min = random.randint(0, int(self.crop_top * height))
y_max = random.randint(max(y_min + 1, int((1 - self.crop_bottom) * height)), height)
crop_coords = x_min, y_min, x_max, y_max
return {"crop_coords": crop_coords}
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "crop_left", "crop_right", "crop_top", "crop_bottom"
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
height, width = params["shape"][:2]
x_min = random.randint(0, int(self.crop_left * width))
x_max = random.randint(max(x_min + 1, int((1 - self.crop_right) * width)), width)
y_min = random.randint(0, int(self.crop_top * height))
y_max = random.randint(max(y_min + 1, int((1 - self.crop_bottom) * height)), height)
crop_coords = x_min, y_min, x_max, y_max
return {"crop_coords": crop_coords}
get_transform_init_args_names (self)
¶
class RandomCropNearBBox
(max_part_shift=(0, 0.3), cropping_bbox_key='cropping_bbox', cropping_box_key=None, always_apply=None, p=1.0)
[view source on GitHub] ¶
Crop bbox from image with random shift by x,y coordinates
Parameters:
Name | Type | Description |
---|---|---|
max_part_shift | float, (float, float | Max shift in |
cropping_bbox_key | str | Additional target key for cropping box. Default |
cropping_box_key | str | [Deprecated] Use |
p | float | probability of applying the transform. Default: 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Examples:
>>> aug = Compose([RandomCropNearBBox(max_part_shift=(0.1, 0.5), cropping_bbox_key='test_bbox')],
>>> bbox_params=BboxParams("pascal_voc"))
>>> result = aug(image=image, bboxes=bboxes, test_bbox=[0, 5, 10, 20])
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class RandomCropNearBBox(_BaseCrop):
"""Crop bbox from image with random shift by x,y coordinates
Args:
max_part_shift (float, (float, float)): Max shift in `height` and `width` dimensions relative
to `cropping_bbox` dimension.
If max_part_shift is a single float, the range will be (0, max_part_shift).
Default (0, 0.3).
cropping_bbox_key (str): Additional target key for cropping box. Default `cropping_bbox`.
cropping_box_key (str): [Deprecated] Use `cropping_bbox_key` instead.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
Examples:
>>> aug = Compose([RandomCropNearBBox(max_part_shift=(0.1, 0.5), cropping_bbox_key='test_bbox')],
>>> bbox_params=BboxParams("pascal_voc"))
>>> result = aug(image=image, bboxes=bboxes, test_bbox=[0, 5, 10, 20])
"""
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.KEYPOINTS)
class InitSchema(BaseTransformInitSchema):
max_part_shift: ZeroOneRangeType = (0, 0.3)
cropping_bbox_key: str = Field(default="cropping_bbox", description="Additional target key for cropping box.")
p: ProbabilityType = 1
def __init__(
self,
max_part_shift: ScaleFloatType = (0, 0.3),
cropping_bbox_key: str = "cropping_bbox",
cropping_box_key: str | None = None, # Deprecated
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(p=p, always_apply=always_apply)
# Check for deprecated parameter and issue warning
if cropping_box_key is not None:
warn(
"The parameter 'cropping_box_key' is deprecated and will be removed in future versions. "
"Use 'cropping_bbox_key' instead.",
DeprecationWarning,
stacklevel=2,
)
# Ensure the new parameter is used even if the old one is passed
cropping_bbox_key = cropping_box_key
self.max_part_shift = cast(Tuple[float, float], max_part_shift)
self.cropping_bbox_key = cropping_bbox_key
@staticmethod
def _clip_bbox(bbox: BoxInternalType, height: int, width: int) -> BoxInternalType:
x_min, y_min, x_max, y_max = bbox
x_min = np.clip(x_min, 0, width)
y_min = np.clip(y_min, 0, height)
x_max = np.clip(x_max, x_min, width)
y_max = np.clip(y_max, y_min, height)
return x_min, y_min, x_max, y_max
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[float, ...]]:
bbox = data[self.cropping_bbox_key]
height, width = params["shape"][:2]
bbox = self._clip_bbox(bbox, height, width)
h_max_shift = round((bbox[3] - bbox[1]) * self.max_part_shift[0])
w_max_shift = round((bbox[2] - bbox[0]) * self.max_part_shift[1])
x_min = bbox[0] - random.randint(-w_max_shift, w_max_shift)
x_max = bbox[2] + random.randint(-w_max_shift, w_max_shift)
y_min = bbox[1] - random.randint(-h_max_shift, h_max_shift)
y_max = bbox[3] + random.randint(-h_max_shift, h_max_shift)
crop_coords = self._clip_bbox((x_min, y_min, x_max, y_max), height, width)
if crop_coords[0] == crop_coords[2] or crop_coords[1] == crop_coords[3]:
crop_coords = fcrops.get_center_crop_coords(height, width, bbox[3] - bbox[1], bbox[2] - bbox[0])
return {"crop_coords": crop_coords}
@property
def targets_as_params(self) -> list[str]:
return [self.cropping_bbox_key]
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "max_part_shift", "cropping_bbox_key"
targets_as_params: list[str]
property
readonly
¶
Targets used to get params dependent on targets. This is used to check input has all required targets.
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[float, ...]]:
bbox = data[self.cropping_bbox_key]
height, width = params["shape"][:2]
bbox = self._clip_bbox(bbox, height, width)
h_max_shift = round((bbox[3] - bbox[1]) * self.max_part_shift[0])
w_max_shift = round((bbox[2] - bbox[0]) * self.max_part_shift[1])
x_min = bbox[0] - random.randint(-w_max_shift, w_max_shift)
x_max = bbox[2] + random.randint(-w_max_shift, w_max_shift)
y_min = bbox[1] - random.randint(-h_max_shift, h_max_shift)
y_max = bbox[3] + random.randint(-h_max_shift, h_max_shift)
crop_coords = self._clip_bbox((x_min, y_min, x_max, y_max), height, width)
if crop_coords[0] == crop_coords[2] or crop_coords[1] == crop_coords[3]:
crop_coords = fcrops.get_center_crop_coords(height, width, bbox[3] - bbox[1], bbox[2] - bbox[0])
return {"crop_coords": crop_coords}
get_transform_init_args_names (self)
¶
class RandomResizedCrop
(size=None, width=None, height=None, *, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=1, always_apply=None, p=1.0)
[view source on GitHub] ¶
Torchvision's variant of crop a random part of the input and rescale it to some size.
Parameters:
Name | Type | Description |
---|---|---|
size | int, int | expected output size of the crop, for each edge. If size is an int instead of sequence like (height, width), a square output size (size, size) is made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). |
scale | float, float | Specifies the lower and upper bounds for the random area of the crop, before resizing. The scale is defined with respect to the area of the original image. |
ratio | float, float | lower and upper bounds for the random aspect ratio of the crop, before resizing. |
interpolation | OpenCV flag | flag that is used to specify the interpolation algorithm. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. Default: cv2.INTER_LINEAR. |
p | float | probability of applying the transform. Default: 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class RandomResizedCrop(_BaseRandomSizedCrop):
"""Torchvision's variant of crop a random part of the input and rescale it to some size.
Args:
size (int, int): expected output size of the crop, for each edge. If size is an int instead of sequence
like (height, width), a square output size (size, size) is made. If provided a sequence of length 1,
it will be interpreted as (size[0], size[0]).
scale ((float, float)): Specifies the lower and upper bounds for the random area of the crop, before resizing.
The scale is defined with respect to the area of the original image.
ratio ((float, float)): lower and upper bounds for the random aspect ratio of the crop, before resizing.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.KEYPOINTS)
class InitSchema(BaseTransformInitSchema):
scale: Annotated[tuple[float, float], AfterValidator(check_01)] = (0.08, 1.0)
ratio: Annotated[tuple[float, float], AfterValidator(check_0plus)] = (0.75, 1.3333333333333333)
width: int | None = Field(
None,
deprecated="Initializing with 'height' and 'width' is deprecated. Use size instead.",
)
height: int | None = Field(
None,
deprecated="Initializing with 'height' and 'width' is deprecated. Use size instead.",
)
size: ScaleIntType | None = None
p: ProbabilityType = 1
interpolation: InterpolationType = cv2.INTER_LINEAR
@model_validator(mode="after")
def process(self) -> Self:
if isinstance(self.size, int):
if isinstance(self.width, int):
self.size = (self.size, self.width)
else:
msg = "If size is an integer, width as integer must be specified."
raise TypeError(msg)
if self.size is None:
if self.height is None or self.width is None:
message = "If 'size' is not provided, both 'height' and 'width' must be specified."
raise ValueError(message)
self.size = (self.height, self.width)
return self
def __init__(
self,
# NOTE @zetyquickly: when (width, height) are deprecated, make 'size' non optional
size: ScaleIntType | None = None,
width: int | None = None,
height: int | None = None,
*,
scale: tuple[float, float] = (0.08, 1.0),
ratio: tuple[float, float] = (0.75, 1.3333333333333333),
interpolation: int = cv2.INTER_LINEAR,
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(size=cast(Tuple[int, int], size), interpolation=interpolation, p=p, always_apply=always_apply)
self.scale = scale
self.ratio = ratio
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
area = image_height * image_width
for _ in range(10):
target_area = random.uniform(*self.scale) * area
log_ratio = (math.log(self.ratio[0]), math.log(self.ratio[1]))
aspect_ratio = math.exp(random.uniform(*log_ratio))
width = int(round(math.sqrt(target_area * aspect_ratio)))
height = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < width <= image_width and 0 < height <= image_height:
i = random.randint(0, image_height - height)
j = random.randint(0, image_width - width)
h_start = i * 1.0 / (image_height - height + 1e-10)
w_start = j * 1.0 / (image_width - width + 1e-10)
crop_coords = fcrops.get_crop_coords(image_height, image_width, height, width, h_start, w_start)
return {"crop_coords": crop_coords}
# Fallback to central crop
in_ratio = image_width / image_height
if in_ratio < min(self.ratio):
width = image_width
height = int(round(image_width / min(self.ratio)))
elif in_ratio > max(self.ratio):
height = image_height
width = int(round(height * max(self.ratio)))
else: # whole image
width = image_width
height = image_height
i = (image_height - height) // 2
j = (image_width - width) // 2
h_start = i * 1.0 / (image_height - height + 1e-10)
w_start = j * 1.0 / (image_width - width + 1e-10)
crop_coords = fcrops.get_crop_coords(image_height, image_width, height, width, h_start, w_start)
return {"crop_coords": crop_coords}
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "size", "scale", "ratio", "interpolation"
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
area = image_height * image_width
for _ in range(10):
target_area = random.uniform(*self.scale) * area
log_ratio = (math.log(self.ratio[0]), math.log(self.ratio[1]))
aspect_ratio = math.exp(random.uniform(*log_ratio))
width = int(round(math.sqrt(target_area * aspect_ratio)))
height = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < width <= image_width and 0 < height <= image_height:
i = random.randint(0, image_height - height)
j = random.randint(0, image_width - width)
h_start = i * 1.0 / (image_height - height + 1e-10)
w_start = j * 1.0 / (image_width - width + 1e-10)
crop_coords = fcrops.get_crop_coords(image_height, image_width, height, width, h_start, w_start)
return {"crop_coords": crop_coords}
# Fallback to central crop
in_ratio = image_width / image_height
if in_ratio < min(self.ratio):
width = image_width
height = int(round(image_width / min(self.ratio)))
elif in_ratio > max(self.ratio):
height = image_height
width = int(round(height * max(self.ratio)))
else: # whole image
width = image_width
height = image_height
i = (image_height - height) // 2
j = (image_width - width) // 2
h_start = i * 1.0 / (image_height - height + 1e-10)
w_start = j * 1.0 / (image_width - width + 1e-10)
crop_coords = fcrops.get_crop_coords(image_height, image_width, height, width, h_start, w_start)
return {"crop_coords": crop_coords}
get_transform_init_args_names (self)
¶
class RandomSizedBBoxSafeCrop
(height, width, erosion_rate=0.0, interpolation=1, always_apply=None, p=1.0)
[view source on GitHub] ¶
Crop a random part of the input and rescale it to some size without loss of bboxes.
Parameters:
Name | Type | Description |
---|---|---|
height | int | height after crop and resize. |
width | int | width after crop and resize. |
erosion_rate | float | erosion rate applied on input image height before crop. |
interpolation | OpenCV flag | flag that is used to specify the interpolation algorithm. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. Default: cv2.INTER_LINEAR. |
p | float | probability of applying the transform. Default: 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class RandomSizedBBoxSafeCrop(BBoxSafeRandomCrop):
"""Crop a random part of the input and rescale it to some size without loss of bboxes.
Args:
height: height after crop and resize.
width: width after crop and resize.
erosion_rate: erosion rate applied on input image height before crop.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.KEYPOINTS)
class InitSchema(CropInitSchema):
erosion_rate: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="Erosion rate applied on input image height before crop.",
)
interpolation: InterpolationType = cv2.INTER_LINEAR
def __init__(
self,
height: int,
width: int,
erosion_rate: float = 0.0,
interpolation: int = cv2.INTER_LINEAR,
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(erosion_rate=erosion_rate, p=p, always_apply=always_apply)
self.height = height
self.width = width
self.interpolation = interpolation
def apply(
self,
img: np.ndarray,
crop_coords: tuple[int, int, int, int],
**params: Any,
) -> np.ndarray:
crop = fcrops.crop(img, *crop_coords)
return fgeometric.resize(crop, self.height, self.width, self.interpolation)
def apply_to_keypoint(
self,
keypoint: KeypointInternalType,
crop_coords: tuple[int, int, int, int],
**params: Any,
) -> KeypointInternalType:
keypoint = fcrops.crop_keypoint_by_coords(keypoint, crop_coords)
crop_height = crop_coords[3] - crop_coords[1]
crop_width = crop_coords[2] - crop_coords[0]
scale_y = self.height / crop_height
scale_x = self.width / crop_width
return fgeometric.keypoint_scale(keypoint, scale_x=scale_x, scale_y=scale_y)
def get_transform_init_args_names(self) -> tuple[str, ...]:
return (*super().get_transform_init_args_names(), "height", "width", "interpolation")
class RandomSizedCrop
(min_max_height, size=None, width=None, height=None, *, w2h_ratio=1.0, interpolation=1, always_apply=None, p=1.0)
[view source on GitHub] ¶
Crop a random portion of the input and rescale it to a specific size.
Parameters:
Name | Type | Description |
---|---|---|
min_max_height | int, int | crop size limits. |
size | int, int | target size for the output image, i.e. (height, width) after crop and resize |
w2h_ratio | float | aspect ratio of crop. |
interpolation | OpenCV flag | flag that is used to specify the interpolation algorithm. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. Default: cv2.INTER_LINEAR. |
p | float | probability of applying the transform. Default: 1. |
Targets
image, mask, bboxes, keypoints
Image types: uint8, float32
Interactive Tool Available!
Explore this transform visually and adjust parameters interactively using this tool:
Source code in albumentations/augmentations/crops/transforms.py
class RandomSizedCrop(_BaseRandomSizedCrop):
"""Crop a random portion of the input and rescale it to a specific size.
Args:
min_max_height ((int, int)): crop size limits.
size ((int, int)): target size for the output image, i.e. (height, width) after crop and resize
w2h_ratio (float): aspect ratio of crop.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 1.
Targets:
image, mask, bboxes, keypoints
Image types:
uint8, float32
"""
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.KEYPOINTS)
class InitSchema(BaseTransformInitSchema):
interpolation: InterpolationType = cv2.INTER_LINEAR
p: ProbabilityType = 1
min_max_height: OnePlusIntRangeType
w2h_ratio: Annotated[float, Field(gt=0, description="Aspect ratio of crop.")]
width: int | None = Field(
None,
deprecated=(
"Initializing with 'size' as an integer and a separate 'width' is deprecated. "
"Please use a tuple (height, width) for the 'size' argument."
),
)
height: int | None = Field(
None,
deprecated=(
"Initializing with 'height' and 'width' is deprecated. "
"Please use a tuple (height, width) for the 'size' argument."
),
)
size: ScaleIntType | None = None
@model_validator(mode="after")
def process(self) -> Self:
if isinstance(self.size, int):
if isinstance(self.width, int):
self.size = (self.size, self.width)
else:
msg = "If size is an integer, width as integer must be specified."
raise TypeError(msg)
if self.size is None:
if self.height is None or self.width is None:
message = "If 'size' is not provided, both 'height' and 'width' must be specified."
raise ValueError(message)
self.size = (self.height, self.width)
return self
def __init__(
self,
min_max_height: tuple[int, int],
# NOTE @zetyquickly: when (width, height) are deprecated, make 'size' non optional
size: ScaleIntType | None = None,
width: int | None = None,
height: int | None = None,
*,
w2h_ratio: float = 1.0,
interpolation: int = cv2.INTER_LINEAR,
always_apply: bool | None = None,
p: float = 1.0,
):
super().__init__(size=cast(Tuple[int, int], size), interpolation=interpolation, p=p, always_apply=always_apply)
self.min_max_height = min_max_height
self.w2h_ratio = w2h_ratio
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
crop_height = random.randint(self.min_max_height[0], self.min_max_height[1])
crop_width = int(crop_height * self.w2h_ratio)
h_start = random.random()
w_start = random.random()
crop_coords = fcrops.get_crop_coords(image_height, image_width, crop_height, crop_width, h_start, w_start)
return {"crop_coords": crop_coords}
def get_transform_init_args_names(self) -> tuple[str, ...]:
return "min_max_height", "size", "w2h_ratio", "interpolation"
get_params_dependent_on_data (self, params, data)
¶
Returns parameters dependent on input.
Source code in albumentations/augmentations/crops/transforms.py
def get_params_dependent_on_data(
self,
params: dict[str, Any],
data: dict[str, Any],
) -> dict[str, tuple[int, int, int, int]]:
image_height, image_width = params["shape"][:2]
crop_height = random.randint(self.min_max_height[0], self.min_max_height[1])
crop_width = int(crop_height * self.w2h_ratio)
h_start = random.random()
w_start = random.random()
crop_coords = fcrops.get_crop_coords(image_height, image_width, crop_height, crop_width, h_start, w_start)
return {"crop_coords": crop_coords}