from __future__ import annotations
import itertools
import logging
import sys
from contextlib import contextmanager
from typing import TYPE_CHECKING
from typing import Iterator
from typing import Sequence
from typing import cast as type_cast
import cv2 as cv
import numpy as np
import numpy.typing as npt
from shapely import MultiPolygon
from shapely import Point
from shapely import Polygon
from shapely import STRtree
[docs]
logger = logging.getLogger(__name__)
@contextmanager
[docs]
def temporary_recursion_limit(limit: int) -> Iterator[None]:
old_limit = sys.getrecursionlimit()
try:
sys.setrecursionlimit(limit)
yield
finally:
sys.setrecursionlimit(old_limit)
[docs]
def get_multipolygon_from_binary_arr(
arr: npt.NDArray[np.int_], scale: tuple[float, float] | None = None
) -> tuple[MultiPolygon, Sequence[npt.NDArray[np.int_]], npt.NDArray[np.int_]] | None:
"""Create a Shapely Polygon from a binary array.
Parameters
----------
arr : array
Binary array where non-zero values indicate presence of tissue.
scale : tuple of two floats, optional
If specified, this is the factor by which coordinates are multiplied to recover
the coordinates at the base resolution of the whole slide image.
Returns
-------
polygon
A shapely `MultiPolygon` object representing tissue regions.
contours
A sequence of arrays representing unscaled contours of tissue.
hierarchy
An array of the hierarchy of contours.
"""
# Find contours and hierarchy
contours: Sequence[npt.NDArray]
hierarchy: npt.NDArray | None
contours, hierarchy = cv.findContours(arr, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE)
if hierarchy is None:
return None
hierarchy = hierarchy.squeeze(0)
logger.info(f"Detected {len(contours)} contours")
contours_unscaled = contours
if scale is not None:
logger.info(
"Scaling contours to slide-level (level=0) coordinate space by multiplying"
f" by {scale}"
)
# Reshape to broadcast with contour coordinates.
scale_arr: npt.NDArray = np.array(scale).reshape(1, 1, 2)
contours = tuple(c * scale_arr for c in contours_unscaled)
del scale_arr
# From https://stackoverflow.com/a/75510437/5666087
def merge_polygons(polygon: MultiPolygon, idx: int, add: bool) -> MultiPolygon:
"""
polygon: Main polygon to which a new polygon is added
idx: Index of contour
add: If this contour should be added (True) or subtracted (False)
"""
# Get contour from global list of contours
contour = np.squeeze(contours[idx])
# cv2.findContours() sometimes returns a single point -> skip this case
if len(contour) > 2:
# Convert contour to shapely polygon
new_poly = Polygon(contour)
# Not all polygons are shapely-valid (self intersection, etc.)
if not new_poly.is_valid:
# Convert invalid polygon to valid
new_poly = new_poly.buffer(0)
# Merge new polygon with the main one
if add:
polygon = polygon.union(new_poly)
else:
polygon = polygon.difference(new_poly)
# Check if current polygon has a child
if hierarchy is None:
raise NotImplementedError()
child_idx = hierarchy[idx][2]
if child_idx >= 0:
# Call this function recursively, negate `add` parameter
polygon = merge_polygons(polygon, child_idx, not add)
# Check if there is some next polygon at the same hierarchy level
next_idx = hierarchy[idx][0]
if next_idx >= 0:
# Call this function recursively
polygon = merge_polygons(polygon, next_idx, add)
return polygon
temp_limit = max(sys.getrecursionlimit(), len(contours))
with temporary_recursion_limit(temp_limit):
# Call the function with an initial empty polygon and start from contour 0
polygon = merge_polygons(MultiPolygon(), 0, True)
if TYPE_CHECKING:
hierarchy = type_cast(npt.NDArray[np.int_], hierarchy)
contours_unscaled = type_cast(Sequence[npt.NDArray[np.int_]], contours_unscaled)
# Add back the axis in hierarchy because we squeezed it before.
return polygon, contours_unscaled, hierarchy[np.newaxis]
[docs]
def get_patch_coordinates_within_polygon(
slide_width: int,
slide_height: int,
patch_size: int,
half_patch_size: int,
polygon: Polygon,
overlap: float = 0.0,
) -> npt.NDArray[np.int_]:
"""Get coordinates of patches within a polygon.
Parameters
----------
slide_width : int
The width of the slide in pixels at base resolution.
slide_height : int
The height of the slide in pixels at base resolution.
patch_size : int
The size of a patch in pixels.
half_patch_size : int
Half of the length of a patch in pixels.
polygon : Polygon
A shapely Polygon representing the presence of tissue.
overlap : float
The proportion of the patch_size to overlap. A value of 0.5
would have an overlap of 50%. A value of 0.2 would have an
overlap of 20%. Negative values will add space between patches.
A value of -1 would skip every other patch. Value must be in (-inf, 1).
The default value of 0.0 produces non-overlapping patches.
Returns
-------
coordinates
Array with shape (N, 2), where N is the number of tiles. Each row in this array
contains the coordinates of the top-left of a tile: (minx, miny).
"""
if overlap >= 1:
raise ValueError(f"overlap must be in (-inf, 1) but got {overlap}")
# Handle potentially overlapping slides.
step_size = round((1 - overlap) * patch_size)
logger.info(f"Patches are {patch_size} px, with step size of {step_size} px.")
# Make an array of Nx2, where each row is (x, y) centroid of the patch.
tile_centroids_arr: npt.NDArray[np.int_] = np.array(
list(
itertools.product(
range(0 + half_patch_size, slide_width, step_size),
range(0 + half_patch_size, slide_height, step_size),
)
)
)
tile_centroids_poly = [Point(c) for c in tile_centroids_arr]
# Query which centroids are inside the polygon.
tree = STRtree(tile_centroids_poly)
centroid_indexes_in_polygon: npt.NDArray[np.int_] = tree.query(
polygon, predicate="contains"
)
# Sort so x and y are in ascending order (and y changes most rapidly).
centroid_indexes_in_polygon.sort()
tile_centroids_in_polygon = tile_centroids_arr[centroid_indexes_in_polygon]
# Transform the centroids to the upper-left point (x, y).
tile_minx_miny_in_polygon = tile_centroids_in_polygon - half_patch_size
return tile_minx_miny_in_polygon