Source code for osculari.datasets.geometrical_shapes

"""
A simple generic dataset of geometrical shapes in foreground.
"""

import numpy as np
import numpy.typing as npt
import math
import random
from typing import Optional, List, Tuple, Sequence, Callable, Any, Union

import cv2
import torch
from torch.utils.data import Dataset as TorchDataset

from . import dataset_utils

__all__ = [
    'ShapeAppearanceDataset'
]


def generate_random_polygon(num_sides: int, centre: Optional[Tuple[float, float]] = (0.0, 0.0),
                            radius: Optional[float] = 1.0) -> List[Tuple[float, float]]:
    """
    Generates a random polygon with the specified number of sides.

    Args:
        num_sides: The number of sides of the polygon (int).
        centre: The coordinates of the polygon's center (tuple of floats; optional).
        radius: The radius of the polygon (float; optional). If not specified, defaults to 1.0.

    Returns:
        A list of vertex coordinates for the generated polygon (list of tuples).
    """

    # Set the radius of the shape to 1 if not provided
    if radius is None:
        radius = 1.0

    # Set the centre of the shape to (0.0, 0.0) if not provided
    if centre is None:
        centre = (0.0, 0.0)

    # Calculate the angle between each side
    angle = 2 * math.pi / num_sides

    # Initialize a list to store the coordinates of each vertex
    vertices = []

    # Generate random angles for each vertex
    for i in range(num_sides):
        # Generate random angle for the current vertex
        rand_angle = i * angle + random.uniform(0, angle)

        # Calculate the x and y coordinates of the vertex
        x = radius * math.cos(rand_angle) + centre[0]
        y = radius * math.sin(rand_angle) + centre[1]

        vertices.append((x, y))

    # Return the list of vertex coordinates
    return vertices


def cv2_filled_polygons(img: npt.NDArray, pts: Sequence, color: Sequence[float],
                        thickness: Optional[int] = 1) -> npt.NDArray:
    """
    Draws filled polygons on an image using OpenCV.

    Args:
        img: The image to draw the polygons on (np.ndarray).
        pts: The coordinates of the polygon vertices (list of tuples).
        color: The color of the polygons (list of floats).
        thickness: The thickness of the polygons (int; optional). If negative, filled polygons are
         drawn.

    Returns:
        The modified image with the drawn polygons (np.ndarray).
    """

    # Check the image type
    if not isinstance(img, np.ndarray):
        raise TypeError('img must be of type np.ndarray.')

    # Convert the color to float
    color = np.array(color, dtype=float)

    # Draw the polygons using OpenCV
    img = cv2.polylines(img, pts=pts, color=color, thickness=abs(thickness), isClosed=True)

    # Fill the polygons if the thickness is negative
    if thickness < 0:
        img = cv2.fillPoly(img, pts=pts, color=color)

    return img


def fg_shape_mask(img_size: int) -> npt.NDArray[bool]:
    """
    Generates a foreground mask with a randomly positioned geometric shape.

    Args:
        img_size: The size of the image (int).

    Returns:
        The generated foreground mask (np.ndarray[bool]).
    """

    # Initialise the image.
    img = np.zeros((img_size, img_size), dtype=np.uint8)

    # Generate random number of sides of the polygon.
    num_sides = np.random.randint(3, 16)
    # Generate the polygon shape
    shape_vertices = generate_random_polygon(num_sides)

    # Randomly determine the shape diameter and centre shift
    shape_diameter = np.random.randint(int(img_size * 0.10), int(img_size * 0.90))
    centre_shift = np.random.randint(0, img_size - shape_diameter)

    # Adjust the shape vertices based on the chosen parameters
    pts = (np.array(shape_vertices) + 1) / 2
    pts *= shape_diameter
    pts += centre_shift
    pts = pts.astype(int)

    # Create the foreground mask with the shape
    img = cv2_filled_polygons(img, pts=[pts], color=[255], thickness=-1)

    return np.equal(img, 255)


[docs]class ShapeAppearanceDataset(TorchDataset): """ A dataset of geometrical shapes whose appearance properties can be altered. Args: num_samples: The total number of samples to generate. num_images: The number of foreground-background pairs to generate for each sample. img_size: The size of the image (int). background: The background type (str or callable). merge_fg_bg: A function to merge the foreground and background images (callable). This function should accept two positional arguments (foreground and background images), This function should return the merged images and corresponding ground-truth(s). unique_fg_shape: Whether to use unique foreground shapes for each sample (bool; optional). unique_bg: Whether to use unique background images for each sample (bool; optional). transform: An optional transformation to be applied to the generated images (callable; optional). """ def __init__( self, num_samples: int, num_images: int, img_size: int, background: Any, merge_fg_bg: Callable[[List[npt.NDArray[bool]], List[npt.NDArray]], Tuple], unique_fg_shape: Optional[bool] = True, unique_bg: Optional[bool] = True, transform: Optional[Callable] = None ) -> None: super(ShapeAppearanceDataset, self).__init__() self.num_samples = num_samples self.num_images = num_images self.img_size = img_size assert callable(merge_fg_bg) self.merge_fg_bg = merge_fg_bg self.bg = background self.unique_fg_shape = unique_fg_shape self.unique_bg = unique_bg self.transform = transform def __len__(self) -> int: """ Determines the total number of data samples. Returns: The total number of samples (int). """ return self.num_samples
[docs] def make_fg_masks(self) -> List[npt.NDArray[bool]]: """ Generates foreground masks with randomly positioned shapes. Returns: A list of foreground masks (List[npt.NDArray[bool]]). """ if self.unique_fg_shape: fg_mask = fg_shape_mask(self.img_size) return [fg_mask.copy() for _ in range(self.num_images)] else: return [fg_shape_mask(self.img_size) for _ in range(self.num_images)]
[docs] def make_bg_images(self) -> List[npt.NDArray]: """ Generates background images. Returns: A list of background images (List[npt.NDArray]). """ if self.unique_bg: bg_img = dataset_utils.background_img(self.bg, self.img_size) return [bg_img.copy() for _ in range(self.num_images)] else: return [dataset_utils.background_img( self.bg, self.img_size) for _ in range(self.num_images)]
def __getitem__(self, _idx: int) -> (List[Union[torch.Tensor, npt.NDArray]], Any): """ Retrieve a data sample. Args: _idx: The index of the sample (int). Returns: A tuple containing the foreground masks, background images, and ground truth (Tuple). """ fgs = self.make_fg_masks() # foregrounds bgs = self.make_bg_images() # backgrounds images, gt = self.merge_fg_bg(fgs, bgs) if self.transform: images = [self.transform(img) for img in images] return *images, gt