Source code for py_sod_metrics.multiscale_iou

import numpy as np
from scipy import ndimage

from .utils import TYPE, get_adaptive_threshold, validate_and_normalize_input


[docs] class MSIoU: """Multi-Scale Intersection over Union (MSIoU) metric. This implements the MSIoU metric which evaluates segmentation quality at multiple scales by comparing edge maps. It addresses the limitation of traditional IoU which struggles with fine structures in segmentation results. ``` @inproceedings{MSIoU, title = {Multiscale IOU: A Metric for Evaluation of Salient Object Detection with Fine Structures}, author = {Ahmadzadeh, Azim and Kempton, Dustin J. and Chen, Yang and Angryk, Rafal A.}, booktitle = ICIP, year = {2021}, } ``` """
[docs] def __init__(self, with_dynamic: bool, with_adaptive: bool, *, with_binary: bool = False, num_levels=10): """Initialize the MSIoU evaluator. Args: with_dynamic (bool, optional): Record dynamic results for max/avg/curve versions. with_adaptive (bool, optional): Record adaptive results for adp version. with_binary (bool, optional): Record binary results for binary version. """ self.dynamic_results = [] if with_dynamic else None self.adaptive_results = [] if with_adaptive else None self.binary_results = [] if with_binary else None # The values of this collection determines the resolutions based on which MIoU is computed. # It is set as the original implementation self.cell_sizes = np.power(2, np.linspace(0, 9, num=num_levels, dtype=int))
[docs] def get_edge(self, mask: np.ndarray): """Edge detection based on the `scipy.ndimage.sobel` function. :param mask: a binary mask of an object whose edges are of interest. :return: a binary mask of 1's as edges and 0's as background. """ sx = ndimage.sobel(mask, axis=0, mode="constant") sy = ndimage.sobel(mask, axis=1, mode="constant") sob = np.hypot(sx, sy) # sob[sob > 0] = 1 return (sob > 0).astype(sob.dtype)
[docs] def shrink_by_grid(self, image: np.ndarray, cell_size: int) -> np.ndarray: """Shrink the image by checking for any non-zero values within grid cells. Performs box-counting after applying zero padding if the image dimensions are not perfectly divisible by the cell size. :param image: The input binary image (edges). :param cell_size: The size of the grid cells. :return: A shrunk binary image where each pixel represents a grid cell. """ if cell_size <= 0: raise ValueError("Cell size must be a positive integer") if cell_size > 1: # Calculate padding sizes to make dimensions divisible by cell_size h, w = image.shape[:2] pad_h = (cell_size - h % cell_size) % cell_size pad_w = (cell_size - w % cell_size) % cell_size # Apply padding if necessary if pad_h > 0 or pad_w > 0: # Padding is added to the top and left edges. image = np.pad(image, ((pad_h, 0), (pad_w, 0)), mode="constant", constant_values=0) # Reshape and check for any non-zero value within each cell h, w = image.shape[:2] image = image.reshape(h // cell_size, cell_size, w // cell_size, cell_size) # Use any() for efficiency, but maintain the exact original data type to avoid precision drift in subsequent sums return image.any(axis=(1, 3)).astype(image.dtype) # image[image > 0] = 1 return (image > 0).astype(image.dtype)
[docs] def multi_scale_iou(self, pred_edge: np.ndarray, gt_edge: np.ndarray, gt_shrunk_cache: list = None) -> list: """Calculate Multi-Scale IoU. Args: pred_edge (np.ndarray): edge map of pred gt_edge (np.ndarray): edge map of gt gt_shrunk_cache (list, optional): precomputed GT shrunk results. Returns: list: ratios """ # Calculate IoU ratios at different scales ratios = [] for i, cell_size in enumerate(self.cell_sizes): # Shrink both prediction and ground truth edges shrunk_pred_edge = self.shrink_by_grid(pred_edge, cell_size=cell_size) if gt_shrunk_cache is not None: shrunk_gt_edge = gt_shrunk_cache[i] else: shrunk_gt_edge = self.shrink_by_grid(gt_edge, cell_size=cell_size) # Calculate IoU with smoothing to prevent division by zero numerator = np.logical_and(shrunk_pred_edge, shrunk_gt_edge).sum() + 1 # Only consider ground truth for denominator denominator = shrunk_gt_edge.sum() + 1 ratios.append(numerator / denominator) return ratios
[docs] def binarizing(self, pred_bin: np.ndarray, gt_edge: np.ndarray, gt_shrunk_cache: list = None) -> list: """Calculate Multi-Scale IoU based on dynamically thresholding. Args: pred_bin (np.ndarray): binarized pred gt_edge (np.ndarray): gt binarized by 128 gt_shrunk_cache (list, optional): precomputed GT shrunk results. Returns: np.ndarray: areas under the curve """ pred_edge = self.get_edge(pred_bin) ratios = self.multi_scale_iou(pred_edge, gt_edge, gt_shrunk_cache=gt_shrunk_cache) # 10 # Calculate area under the curve using trapezoidal rule return np.trapz(y=ratios, dx=1 / (len(self.cell_sizes) - 1))
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Calculate the Multi-Scale IoU for a single prediction-ground truth pair. This method first extracts edges from both prediction and ground truth, then computes IoU ratios at multiple scales defined by self.cell_sizes. Finally, it calculates the area under the curve of these ratios. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. Returns: The MSIoU score for the given pair (float between 0 and 1). """ pred, gt = validate_and_normalize_input(pred, gt, normalize) # Calculate MSIoU for this pair and store the result gt_edge = self.get_edge(gt) if self.dynamic_results is not None: gt_shrunk_cache = [self.shrink_by_grid(gt_edge, cell_size=cell_size) for cell_size in self.cell_sizes] results = [] _pred = (pred * 255).astype(np.uint8) unique_pred_val = np.unique(_pred) + 1 # only consider thresholds that change the binary mask for threshold in np.linspace(0, 256, 257): if threshold in [0, 256] or threshold in unique_pred_val: results.append(self.binarizing(_pred >= threshold, gt_edge, gt_shrunk_cache)) else: results.append(results[-1]) self.dynamic_results.append(results) if self.adaptive_results is not None: adaptive_threshold = get_adaptive_threshold(pred, max_value=1) results = self.binarizing(pred >= adaptive_threshold, gt_edge) self.adaptive_results.append(results) if self.binary_results is not None: self.binary_results.append(self.binarizing(pred > 0.5, gt_edge))
[docs] def get_results(self) -> dict: """Return the results about MSIoU. Calculates the mean of all stored MSIoU values from previous calls to step(). :return: Dictionary with key 'msiou' and the mean MSIoU value. :raises: ValueError if no samples have been processed. """ results = {} if self.dynamic_results is not None: results["dynamic"] = np.mean(np.array(self.dynamic_results, dtype=TYPE), axis=0) if self.adaptive_results is not None: results["adaptive"] = np.mean(np.array(self.adaptive_results, dtype=TYPE)) if self.binary_results is not None: results["binary"] = np.mean(np.array(self.binary_results, dtype=TYPE)) return results