Source code for caer.transforms.position

#    _____           ______  _____ 
#  / ____/    /\    |  ____ |  __ \
# | |        /  \   | |__   | |__) | Caer - Modern Computer Vision
# | |       / /\ \  |  __|  |  _  /  Languages: Python, C, C++, Cuda
# | |___   / ____ \ | |____ | | \ \  http://github.com/jasmcaus/caer
#  \_____\/_/    \_ \______ |_|  \_\

# Licensed under the MIT License <http://opensource.org/licenses/MIT>
# SPDX-License-Identifier: MIT
# Copyright (c) 2020-2021 The Caer Authors <http://github.com/jasmcaus>

import numpy as np 
import cv2 as cv 
import random 
import collections

from ..coreten import Tensor, to_tensor
from .._internal import _check_target_size
from ..globals import (
    INTER_AREA, INTER_CUBIC, INTER_NEAREST, INTER_LINEAR
)
from ..annotations import Union,Tuple,Optional

pad_to_str = {
    "constant":  0,
    "edge":      1,
    "reflect":   4,
    "symmetric": 2
}

MAX_VALUES_BY_DTYPE = {
    np.dtype("uint8"): 255,
    np.dtype("uint16"): 65535,
    np.dtype("uint32"): 4294967295,
    np.dtype("float32"): 1.0,
}

__all__ = [
    "hflip",
    "vflip",
    "hvflip",
    "rand_flip",
    "transpose",
    "scale",
    "rotate",
    "translate",
    "solarize",
    "posterize",
    "equalize",
    "clip",
    "pad"
]

def _is_rgb_image(tens):
    tens = to_tensor(tens, override_checks=True)
    return tens.is_rgb()
    # return len(tens.shape) == 3 and tens.shape[-1] == 3


def _is_gray_image(tens):
    tens = to_tensor(tens, override_checks=True)
    return tens.is_gray()
    # return (len(tens.shape) == 2) or (len(tens.shape) == 3 and tens.shape[-1] == 1)


def _get_num_channels(tens):
    return tens.shape[2] if len(tens.shape) == 3 else 1


[docs]def hflip(tens: Tensor) -> Tensor:
    r"""
        Flip an image horizontally. 
    Args:
        tens (Tensor): Image to be flipped.

    Returns:
        Flipped image.

    """

    tens = np.ascontiguousarray(tens[:, ::-1, ...])
    return to_tensor(tens, override_checks=True)


[docs]def vflip(tens: Tensor) -> Tensor:
    r"""
        Flip an image vertically. 
    Args:
        tens (Tensor): Image to be flipped.

    Returns:
        Flipped image.
        
    """
    tens = np.ascontiguousarray(tens[::-1, ...])
    return to_tensor(tens, override_checks=True)


[docs]def hvflip(tens: Tensor) -> Tensor:
    r"""
        Flip an image both horizontally and vertically. 

    Args:
        tens (Tensor): Image to be flipped.

    Returns:
        Flipped image.
        
    """
    return hflip(vflip(tens))


[docs]def rand_flip(tens: Tensor) -> Tensor: 
    r"""
        Randomly flip an image vertically or horizontally. 

    Args:
        tens (Tensor): Image to be flipped.

    Returns:
        Flipped image.
        
    """
    p = random.uniform(0, 1)

    if p > 0.5:
        return vflip(tens)
    else:
        return hflip(tens)


def transpose(tens: Tensor) -> Tensor:
    if len(tens.shape) > 2:
        return tens.transpose(1, 0, 2)
    else:
        return tens.transpose(1, 0)


[docs]def rotate(tens: Tensor, angle, rotPoint=None) -> Tensor:
    r"""
        Rotates an given image by an angle around a particular rotation point (if provided) or centre otherwise.
        
    """
    # h, w = image.shape[:2]
    # (cX, cY) = (w/2, h/2)

    # # Computing the sine and cosine (rotation components of the matrix)
    # transMat = cv.getRotationMatrix2D((cX, cY), angle, scale=1.0)
    # cos = np.abs(transMat[0, 0])
    # sin = np.abs(transMat[0, 1])

    # # compute the new bounding dimensions of the image
    # nW = int((h*sin) + (w*cos))
    # nH = int((h*cos) + (w*sin))

    # # Adjusts the rotation matrix to take into account translation
    # transMat[0, 2] += (nW/2) - cX
    # transMat[1, 2] += (nH/2) - cY

    # # Performs the actual rotation and returns the image
    # return cv.warpAffine(image, transMat, (nW, nH))

    height, width = tens.shape[:2]

    # If no rotPoint is specified, we assume the rotation point to be around the centre
    if rotPoint is None:
        rotPoint = (width//2, height//2)

    rotMat = cv.getRotationMatrix2D(rotPoint, angle, scale=1.0)

    tens = cv.warpAffine(tens, rotMat, (width, height))
    return to_tensor(tens, override_checks=True)


[docs]def translate(image:Tensor, x:int, y:int) -> Tensor:
    r"""Translates a given image across the x-axis and the y-axis

    Args:
        x (int): shifts the image right (positive) or left (negative)
        y (int): shifts the image down (positive) or up (negative)
    
    Returns:
        The translated image

    """
    transMat = np.float32([[1, 0, x], [0, 1, y]])
    return cv.warpAffine(image, transMat, (image.shape[1], image.shape[0]))


def scale(tens: Tensor, scale_factor:int, interpolation="bilinear") -> Tensor:
    interpolation_methods = {
        "nearest": INTER_NEAREST, "0": INTER_NEAREST, 0: INTER_NEAREST, # 0
        "bilinear": INTER_LINEAR, "1": INTER_LINEAR,  1: INTER_LINEAR,  # 1
        "bicubic": INTER_CUBIC,   "2": INTER_CUBIC,   2: INTER_CUBIC,   # 2
        "area": INTER_AREA,       "3": INTER_AREA,    3: INTER_AREA     # 3
    }
    if interpolation not in interpolation_methods:
        raise ValueError("Specify a valid interpolation type - area/nearest/bicubic/bilinear")

    if scale_factor > 1:
        # Neater, more precise
        interpolation = "bicubic"

    height, width = tens.shape[:2]
    new_height, new_width = int(height * scale_factor), int(width * scale_factor)

    tens = cv.resize(tens, (new_width,new_height), interpolation=interpolation)
    return to_tensor(tens, override_checks=True)


[docs]def pad(tens: Tensor, padding:Union[int,Tuple], fill:int=0, padding_mode="constant") -> Tensor:
    r"""
        Pad the given image on all sides with specified padding mode and fill value.

    Args:
        tens (Tensor): image to be padded.
        padding (int or tuple): Padding on each border. If a single int is provided this 
            is used to pad all borders. If tuple of length 2 is provided this is the padding
            on left/right and top/bottom respectively. If a tuple of length 4 is provided
            this is the padding for the left, top, right and bottom borders
            respectively.
        fill: Pixel fill value for constant fill. Default is 0. If a tuple of
            length 3, it is used to fill R, G, B channels respectively.
            This value is only used when the padding_mode is constant
        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
            - constant: pads with a constant value, this value is specified with fill
            - edge: pads with the last value on the edge of the image
            - reflect: pads with reflection of image (without repeating the last value on the edge)
                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
                       will result in [3, 2, 1, 2, 3, 4, 3, 2]
            - symmetric: pads with reflection of image (repeating the last value on the edge)
                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
                         
    Returns:
        Tensor of shape ``(height, width, channels)``.

    """
    if not isinstance(padding, (tuple, list)):
        raise TypeError("Got inappropriate padding argument")

    if not isinstance(fill, (str, tuple)):
        raise TypeError("Got inappropriate fill argument")

    if not isinstance(padding_mode, str):
        raise TypeError("Got inappropriate padding_mode argument")

    if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
        raise ValueError(f"Padding must be an int or a 2, or 4 element tuple, not a {len(padding)} element tuple")

    assert padding_mode in ["constant", "edge", "reflect", "symmetric"], \
        "Padding mode should be either constant, edge, reflect or symmetric"

    if isinstance(padding, int):
        pad_left = pad_right = pad_top = pad_bottom = padding

    if isinstance(padding, collections.Sequence) and len(padding) == 2:
        pad_left = pad_right = padding[0]
        pad_top = pad_bottom = padding[1]

    if isinstance(padding, collections.Sequence) and len(padding) == 4:
        pad_left = padding[0]
        pad_top = padding[1]
        pad_right = padding[2]
        pad_bottom = padding[3]


        tens = cv.copyMakeBorder(tens,
                                top = pad_top,
                                bottom = pad_bottom,
                                left = pad_left,
                                right = pad_right,
                                borderType = pad_to_str[padding_mode],
                                value = fill)

        return to_tensor(tens, override_checks=True)

                                
def crop(tens: Tensor, x_min, y_min, x_max, y_max) -> Tensor:
    height, width = tens.shape[:2]
    if x_max <= x_min or y_max <= y_min:
        raise ValueError(
            "We should have x_min < x_max and y_min < y_max. But we got"
            " (x_min = {x_min}, y_min = {y_min}, x_max = {x_max}, y_max = {y_max})".format(
                x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max
            )
        )

    if x_min < 0 or x_max > width or y_min < 0 or y_max > height:
        raise ValueError(
            "Values for crop should be non negative and equal or smaller than image sizes"
            "(x_min = {x_min}, y_min = {y_min}, x_max = {x_max}, y_max = {y_max}, "
            "height = {height}, width = {width})".format(
                x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max, height=height, width=width
            )
        )

    return to_tensor(tens[y_min:y_max, x_min:x_max], override_checks=True)


def center_crop(image:Tensor, target_size:Optional[Tuple]=None) -> Tensor:
    r"""Computes the centre crop of an image using `target_size`

    Args:
        image (Tensor): Valid image Tensor
        target_size (tuple): Size of the centre crop. Must be in the format `(width,height)`
    
    Returns:
        Cropped Centre (Tensor)
    
    Examples::

        >> tens = caer.data.bear() # Standard 640x427 image
        >> cropped = caer.center_crop(tens, target_size=(200,200))
        >> cropped.shape
        (200,200,3)

    """
    return _compute_centre_crop(image, target_size)


def rand_crop(tens: Tensor, crop_height, crop_width, h_start, w_start) -> Tensor:
    height, width = tens.shape[:2]
    if height < crop_height or width < crop_width:
        raise ValueError(
            "Requested crop size ({crop_height}, {crop_width}) is "
            "larger than the image size ({height}, {width})".format(
                crop_height=crop_height, crop_width=crop_width, height=height, width=width
            )
        )
    x1, y1, x2, y2 = _get_random_crop_coords(height, width, crop_height, crop_width, h_start, w_start)

    return to_tensor(tens[y1:y2, x1:x2], override_checks=True)


def _compute_centre_crop(tens, target_size) -> Tensor:
    _check_target_size(target_size)

    # Getting org height and target
    org_h, org_w = tens.shape[:2]
    target_w, target_h = target_size

    # The following line is actually the right way of accessing height and width of an opencv-specific image (height, width). However for some reason, while the code runs, this is flipped (it now becomes (width,height)). Testing needs to be done to catch this little bug
    # org_h, org_w = tens.shape[:2]


    if target_h > org_h or target_w > org_w:
        raise ValueError("To compute centre crop, target size dimensions must be <= tens dimensions")

    diff_h = (org_h - target_h) // 2
    diff_w = (org_w - target_w ) // 2
    
    # tens[y:y+h, x:x+h]
    return to_tensor(tens[diff_h:diff_h + target_h, diff_w:diff_w + target_w], override_checks=True)


def _get_random_crop_coords(height, width, crop_height, crop_width, h_start, w_start):
    y1 = int((height - crop_height) * h_start)
    y2 = y1 + crop_height
    x1 = int((width - crop_width) * w_start)
    x2 = x1 + crop_width
    return x1, y1, x2, y2


[docs]def solarize(tens: Tensor, threshold:int=128) -> Tensor:
    r"""
        Invert all pixel values above a threshold.

    Args:
        tens (Tensor): The image to solarize.
        threshold (int): All pixels above this grayscale level are inverted.

    Returns:
        Solarized image (Tensor)
    
    Examples::

        >> tens = caer.data.sunrise()
        >> solarized = caer.solarize(tens, threshold=128)
        >> solarized.shape
        (427,640,3)

    """
    tens = to_tensor(tens, override_checks=True)
    max_val = MAX_VALUES_BY_DTYPE[tens.dtype]

    if tens.dtype == np.dtype("uint8"):
        lut = [(i if i < threshold else max_val - i) for i in range(max_val + 1)] # type: ignore[call-overload]

        prev_shape = tens.shape
        tens = cv.LUT(tens, np.array(lut, dtype=tens.dtype))

        if len(prev_shape) != len(tens.shape):
            tens = np.expand_dims(tens, -1)

        return to_tensor(tens, override_checks=True)

    result_tens = tens.copy()
    cond = tens >= threshold
    result_tens[cond] = max_val - result_tens[cond]
    return to_tensor(result_tens, override_checks=True)


[docs]def posterize(tens: Tensor, bits: int) -> Tensor:
    r"""Reduce the number of bits for each color channel in the image.

    Args:
        tens (Tensor): Image to posterize.
        bits (int): Number of high bits. Must be in range [0, 8]

    Returns:
        Image with reduced color channels (Tensor)
    
    Examples::

        >> tens = caer.data.sunrise()
        >> posterized = caer.posterize(tens, bits=4)
        >> posterized.shape
        (427,640,3)

    """
    tens = to_tensor(tens, override_checks=True)
    bits = np.uint8(bits)

    if tens.dtype != np.uint8:
        raise TypeError("Image must have uint8 channel type")

    if np.any((bits < 0) | (bits > 8)):
        raise ValueError("bits must be in range [0, 8]")

    if not bits.shape or len(bits) == 1: # type: ignore[arg-type, attr-defined]
        if bits == 0:
            return np.zeros_like(tens)
        if bits == 8:
            return tens.copy()

        lut = np.arange(0, 256, dtype=np.uint8)
        mask = ~np.uint8(2 ** (8 - bits) - 1)
        lut &= mask

        return to_tensor(cv.LUT(tens, lut), override_checks=True)
        
    if not _is_rgb_image(tens):
        raise TypeError("If `bits` is iterable, image must be RGB")

    result_tens = np.empty_like(tens)
    for i, channel_bits in enumerate(bits): # type: ignore[arg-type, var-annotated]
        if channel_bits == 0:
            result_tens[..., i] = np.zeros_like(tens[..., i])
        elif channel_bits == 8:
            result_tens[..., i] = tens[..., i].copy()
        else:
            lut = np.arange(0, 256, dtype=np.uint8)
            mask = ~np.uint8(2 ** (8 - channel_bits) - 1)
            lut &= mask

            result_tens[..., i] = cv.LUT(tens[..., i], lut)

    return to_tensor(result_tens, override_checks=True)


def clip(tens: Tensor, dtype: str, maxval: int) -> Tensor:
    tens = np.clip(tens, 0, maxval).astype(dtype)
    return to_tensor(tens, override_checks=True)


def _equalize_cv(tens, mask = None) -> Tensor:
    if mask is None:
        tens = cv.equalizeHist(tens)
        return to_tensor(tens, override_checks=True)

    histogram = cv.calcHist([tens], [0], mask, [256], (0, 256)).ravel()
    i = 0
    for val in histogram:
        if val > 0:
            break
        i += 1
    i = min(i, 255)

    total = np.sum(histogram)
    if histogram[i] == total:
        tens = np.full_like(tens, i)
        return to_tensor(tens, override_checks=True)

    scale = 255.0 / (total - histogram[i])
    _sum = 0

    lut = np.zeros(256, dtype=np.uint8)
    i += 1
    for i in range(i, len(histogram)):
        _sum += histogram[i]
        lut[i] = clip(round(_sum * scale), np.dtype("uint8"), 255) # type: ignore[arg-type]

    tens = cv.LUT(tens, lut)
    return to_tensor(tens, override_checks=True)


[docs]def equalize(tens: Tensor, mask:Tensor=None, by_channels:bool=True) -> Tensor:
    r"""Equalize the image histogram.

    Args:
        tens (Tensor): RGB or grayscale image.
        mask (Tensor): An optional mask.  If given, only the pixels selected by the mask are included in the analysis. Maybe 1 channel or 3 channel array.
        by_channels (bool): If True, use equalization by channels separately, else convert image to YCbCr representation and use equalization by `Y` channel.

    Returns:
        Equalized image (Tensor)
    

    Examples::

        >> tens = caer.data.beverages()   
        >> equalized = caer.equalize(tens, mask=None)  
        >> equalized.shape   
        (427,640,3)
    
    """
    if tens.dtype != np.uint8:
        raise TypeError("Image must have uint8 channel type")

    if mask is not None:
        if _is_rgb_image(mask) and _is_gray_image(tens):
            raise ValueError("Wrong mask shape. Image shape: {}. Mask shape: {}".format(tens.shape, mask.shape))

        if not by_channels and not _is_gray_image(mask):
            raise ValueError("When `by_channels=False`, only 1-channel mask is supported. Mask shape: {}".format(mask.shape))

    if mask is not None:
        mask = mask.astype(np.uint8)

    if _is_gray_image(tens): 
        return to_tensor(_equalize_cv(tens, mask), override_checks=True)

    if not by_channels:
        result_tens = cv.cvtColor(tens, cv.COLOR_RGB2YCrCb)
        result_tens[..., 0] = _equalize_cv(result_tens[..., 0], mask)
        tens = cv.cvtColor(result_tens, cv.COLOR_YCrCb2RGB) 
        return to_tensor(tens, override_checks=True)

    result_tens = np.empty_like(tens)
    for i in range(3):
        if mask is None:
            _mask = None
        elif _is_gray_image(mask):
            _mask = mask
        else:
            _mask = mask[..., i]

        result_tens[..., i] = _equalize_cv(tens[..., i], _mask)

    return to_tensor(result_tens, override_checks=True)