Source code for py_sod_metrics.sod_metrics

import warnings

import cv2
import numpy as np
from scipy.ndimage import convolve
from scipy.ndimage import distance_transform_edt as bwdist
from skimage import measure, morphology

from .utils import EPS, TYPE, get_adaptive_threshold, validate_and_normalize_input


[docs] class Fmeasure: r"""F-measure evaluator for salient object detection. Computes precision, recall, and F-measure at multiple thresholds, supporting both adaptive and dynamic evaluation modes. ``` @inproceedings{Fmeasure, title={Frequency-tuned salient region detection}, author={Achanta, Radhakrishna and Hemami, Sheila and Estrada, Francisco and S{\"u}sstrunk, Sabine}, booktitle=CVPR, number={CONF}, pages={1597--1604}, year={2009} } ``` """
[docs] def __init__(self, beta: float = 0.3): """Initialize the F-measure evaluator. Args: beta (float): the weight of the precision """ warnings.warn("This class will be removed in the future, please use FmeasureV2 instead!") self.beta = beta self.precisions = [] self.recalls = [] self.adaptive_fms = [] self.changeable_fms = []
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred and gt. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) adaptive_fm = self.cal_adaptive_fm(pred=pred, gt=gt) self.adaptive_fms.append(adaptive_fm) precisions, recalls, changeable_fms = self.cal_pr(pred=pred, gt=gt) self.precisions.append(precisions) self.recalls.append(recalls) self.changeable_fms.append(changeable_fms)
[docs] def cal_adaptive_fm(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the adaptive F-measure. Returns: float: adaptive_fm """ # ``np.count_nonzero`` is faster and better adaptive_threshold = get_adaptive_threshold(pred, max_value=1) binary_predcition = pred >= adaptive_threshold area_intersection = binary_predcition[gt].sum() if area_intersection == 0: adaptive_fm = 0 else: pre = area_intersection / np.count_nonzero(binary_predcition) rec = area_intersection / np.count_nonzero(gt) adaptive_fm = (1 + self.beta) * pre * rec / (self.beta * pre + rec) return adaptive_fm
[docs] def cal_pr(self, pred: np.ndarray, gt: np.ndarray) -> tuple: """Calculate the corresponding precision and recall when the threshold changes from 0 to 255. These precisions and recalls can be used to obtain the mean F-measure, maximum F-measure, precision-recall curve and F-measure-threshold curve. For convenience, `changeable_fms` is provided here, which can be used directly to obtain the mean F-measure, maximum F-measure and F-measure-threshold curve. Returns: tuple: (precisions, recalls, changeable_fms) """ # 1. 获取预测结果在真值前背景区域中的直方图 pred = (pred * 255).astype(np.uint8) bins = np.linspace(0, 256, 257) fg_hist, _ = np.histogram(pred[gt], bins=bins) # 最后一个bin为[255, 256] bg_hist, _ = np.histogram(pred[~gt], bins=bins) # 2. 使用累积直方图(Cumulative Histogram)获得对应真值前背景中大于不同阈值的像素数量 # 这里使用累加(cumsum)就是为了一次性得出 >=不同阈值 的像素数量, 这里仅计算了前景区域 fg_w_thrs = np.cumsum(np.flip(fg_hist), axis=0) bg_w_thrs = np.cumsum(np.flip(bg_hist), axis=0) # 3. 使用不同阈值的结果计算对应的precision和recall # p和r的计算的真值是pred==1&gt==1,二者仅有分母不同,分母前者是pred==1,后者是gt==1 # 为了同时计算不同阈值的结果,这里使用hsitogram&flip&cumsum 获得了不同各自的前景像素数量 TPs = fg_w_thrs Ps = fg_w_thrs + bg_w_thrs # 为防止除0,这里针对除0的情况分析后直接对于0分母设为1,因为此时分子必为0 Ps[Ps == 0] = 1 T = max(np.count_nonzero(gt), 1) # TODO: T=0 或者 特定阈值下fg_w_thrs=0或者bg_w_thrs=0,这些都会包含在TPs[i]=0的情况中, # 但是这里使用TPs不便于处理列表 precisions = TPs / Ps recalls = TPs / T numerator = (1 + self.beta) * precisions * recalls denominator = np.where(numerator == 0, 1, self.beta * precisions + recalls) changeable_fms = numerator / denominator return precisions, recalls, changeable_fms
[docs] def get_results(self) -> dict: """Return the results about F-measure. Returns: dict(fm=dict(adp=adaptive_fm, curve=changeable_fm), pr=dict(p=precision, r=recall)) """ adaptive_fm = np.mean(np.array(self.adaptive_fms, TYPE)) changeable_fm = np.mean(np.array(self.changeable_fms, dtype=TYPE), axis=0) precision = np.mean(np.array(self.precisions, dtype=TYPE), axis=0) # N, 256 recall = np.mean(np.array(self.recalls, dtype=TYPE), axis=0) # N, 256 return dict(fm=dict(adp=adaptive_fm, curve=changeable_fm), pr=dict(p=precision, r=recall))
[docs] class MAE: r"""Mean Absolute Error. Computes the MAE between predicted saliency maps and ground truth masks. ``` @inproceedings{MAE, title={Saliency filters: Contrast based filtering for salient region detection}, author={Perazzi, Federico and Kr{\"a}henb{\"u}hl, Philipp and Pritch, Yael and Hornung, Alexander}, booktitle=CVPR, pages={733--740}, year={2012} } ``` """
[docs] def __init__(self): """Initialize the MAE evaluator.""" self.maes = []
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred and gt. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) mae = self.cal_mae(pred, gt) self.maes.append(mae)
[docs] def cal_mae(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray: """Calculate the mean absolute error. Returns: np.ndarray: mae """ mae = np.mean(np.abs(pred - gt)) return mae
[docs] def get_results(self) -> dict: """Return the results about MAE. Returns: dict(mae=mae) """ mae = np.mean(np.array(self.maes, TYPE)) return dict(mae=mae)
[docs] class Smeasure: """S-measure evaluates foreground maps by considering both object-aware and region-aware structural similarity between prediction and ground truth. It combines object-level and region-level scores to provide a comprehensive assessment of structural quality. ``` @inproceedings{Smeasure, title={Structure-measure: A new way to eval foreground maps}, author={Fan, Deng-Ping and Cheng, Ming-Ming and Liu, Yun and Li, Tao and Borji, Ali}, booktitle=ICCV, pages={4548--4557}, year={2017} } ``` """
[docs] def __init__(self, alpha: float = 0.5): """Initialize S-measure (Structure-measure) evaluator. Args: alpha (float, optional): Weight for balancing the object score and the region score. Higher values give more weight to object-level similarity. Valid range: [0, 1]. Defaults to 0.5 for equal weighting. """ self.sms = [] self.alpha = alpha
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred and gt. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) sm = self.cal_sm(pred, gt) self.sms.append(sm)
[docs] def cal_sm(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the S-measure (Structure-measure) score. Computes a weighted combination of object-aware and region-aware structural similarity scores. For edge cases (all foreground or all background), returns simplified metrics. Args: pred (np.ndarray): Normalized prediction map with values in [0, 1]. gt (np.ndarray): Binary ground truth mask. Returns: float: S-measure score in range [0, 1], where higher is better. """ y = np.mean(gt) if y == 0: sm = 1 - np.mean(pred) elif y == 1: sm = np.mean(pred) else: object_score = self.object(pred, gt) * self.alpha region_score = self.region(pred, gt) * (1 - self.alpha) sm = max(0, object_score + region_score) return sm
[docs] def s_object(self, x: np.ndarray) -> float: """Calculate object-aware score for a region. Computes a similarity score that considers both mean and standard deviation of the input region. Args: x (np.ndarray): Input region data. Returns: float: Object-aware similarity score. """ mean = np.mean(x) std = np.std(x, ddof=1) score = 2 * mean / (np.power(mean, 2) + 1 + std + EPS) return score
[docs] def object(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the object-level structural similarity score. Evaluates structural similarity separately for foreground and background regions, then combines them using the ratio of foreground pixels. Args: pred (np.ndarray): Normalized prediction map with values in [0, 1]. gt (np.ndarray): Binary ground truth mask. Returns: float: Object-level similarity score. """ gt_mean = np.mean(gt) fg_score = self.s_object(pred[gt]) * gt_mean bg_score = self.s_object((1 - pred)[~gt]) * (1 - gt_mean) object_score = fg_score + bg_score return object_score
[docs] def region(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the region-level structural similarity score. Divides the image into four quadrants based on the foreground centroid, then calculates SSIM for each quadrant weighted by its area. Args: pred (np.ndarray): Normalized prediction map with values in [0, 1]. gt (np.ndarray): Binary ground truth mask. Returns: float: Region-level similarity score. """ h, w = gt.shape area = h * w # Calculate the centroid coordinate of the foreground if np.count_nonzero(gt) == 0: cy, cx = np.round(h / 2), np.round(w / 2) else: # More details can be found at: https://www.yuque.com/lart/blog/gpbigm cy, cx = np.argwhere(gt).mean(axis=0).round() # To ensure consistency with the matlab code, one is added to the centroid coordinate, # so there is no need to use the redundant addition operation when dividing the region later, # because the sequence generated by ``1:X`` in matlab will contain ``X``. cy, cx = int(cy) + 1, int(cx) + 1 # Use (x,y) to divide the ``pred`` and the ``gt`` into four submatrices, respectively. w_lt = cx * cy / area w_rt = cy * (w - cx) / area w_lb = (h - cy) * cx / area w_rb = 1 - w_lt - w_rt - w_lb score_lt = self.ssim(pred[0:cy, 0:cx], gt[0:cy, 0:cx]) * w_lt score_rt = self.ssim(pred[0:cy, cx:w], gt[0:cy, cx:w]) * w_rt score_lb = self.ssim(pred[cy:h, 0:cx], gt[cy:h, 0:cx]) * w_lb score_rb = self.ssim(pred[cy:h, cx:w], gt[cy:h, cx:w]) * w_rb return score_lt + score_rt + score_lb + score_rb
[docs] def ssim(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the SSIM (Structural Similarity Index) score. Computes structural similarity based on luminance, contrast, and structure comparisons between prediction and ground truth regions. Args: pred (np.ndarray): Prediction region. gt (np.ndarray): Ground truth region. Returns: float: SSIM score in range [0, 1]. """ h, w = pred.shape N = h * w x = np.mean(pred) y = np.mean(gt) sigma_x = np.sum((pred - x) ** 2) / (N - 1) sigma_y = np.sum((gt - y) ** 2) / (N - 1) sigma_xy = np.sum((pred - x) * (gt - y)) / (N - 1) alpha = 4 * x * y * sigma_xy beta = (x**2 + y**2) * (sigma_x + sigma_y) if alpha != 0: score = alpha / (beta + EPS) elif alpha == 0 and beta == 0: score = 1 else: score = 0 return score
[docs] def get_results(self) -> dict: """Return the results about S-measure. Returns: dict(sm=sm) """ sm = np.mean(np.array(self.sms, dtype=TYPE)) return dict(sm=sm)
[docs] class Emeasure: """E-measure assesses binary foreground map quality by measuring the alignment between prediction and ground truth using an enhanced alignment matrix. It addresses limitations of traditional metrics by considering spatial alignment and local/global pixel matching. ``` @inproceedings{Emeasure, title="Enhanced-alignment Measure for Binary Foreground Map Evaluation", author="Deng-Ping {Fan} and Cheng {Gong} and Yang {Cao} and Bo {Ren} and Ming-Ming {Cheng} and Ali {Borji}", booktitle=IJCAI, pages="698--704", year={2018} } ``` Note: More implementation details: https://www.yuque.com/lart/blog/lwgt38 """
[docs] def __init__(self): """Initialize E-measure (Enhanced-alignment Measure) evaluator.""" self.adaptive_ems = [] self.changeable_ems = []
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred and gt. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) self.gt_fg_numel = np.count_nonzero(gt) self.gt_size = gt.shape[0] * gt.shape[1] changeable_ems = self.cal_changeable_em(pred, gt) self.changeable_ems.append(changeable_ems) adaptive_em = self.cal_adaptive_em(pred, gt) self.adaptive_ems.append(adaptive_em)
[docs] def cal_adaptive_em(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the adaptive E-measure using an adaptive threshold. Uses twice the mean prediction value as the adaptive threshold to binarize the prediction before computing E-measure. Args: pred (np.ndarray): Normalized prediction map with values in [0, 1]. gt (np.ndarray): Binary ground truth mask. Returns: float: Adaptive E-measure score. """ adaptive_threshold = get_adaptive_threshold(pred, max_value=1) adaptive_em = self.cal_em_with_threshold(pred, gt, threshold=adaptive_threshold) return adaptive_em
[docs] def cal_changeable_em(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray: """Calculate E-measure scores across all thresholds from 0 to 255. Computes the E-measure for 257 different thresholds, enabling analysis of maximum E-measure, mean E-measure, and E-measure-threshold curves. Args: pred (np.ndarray): Normalized prediction map with values in [0, 1]. gt (np.ndarray): Binary ground truth mask. Returns: np.ndarray: Array of 257 E-measure scores corresponding to thresholds [0, 255]. """ changeable_ems = self.cal_em_with_cumsumhistogram(pred, gt) return changeable_ems
[docs] def cal_em_with_threshold(self, pred: np.ndarray, gt: np.ndarray, threshold: float) -> float: """Calculate the E-measure for a specific binarization threshold. Computes enhanced alignment based on four regions: true positives, false positives, false negatives, and true negatives. Args: pred (np.ndarray): Normalized prediction map with values in [0, 1]. gt (np.ndarray): Binary ground truth mask. threshold (float): Binarization threshold value. Returns: float: E-measure score for the given threshold. Note: Variable naming convention: `[pred_attr(fg/bg)]_[gt_attr(fg/bg)]_[meaning]` '_' indicates don't-care attribute. """ binarized_pred = pred >= threshold fg_fg_numel = np.count_nonzero(binarized_pred & gt) fg_bg_numel = np.count_nonzero(binarized_pred & ~gt) fg___numel = fg_fg_numel + fg_bg_numel bg___numel = self.gt_size - fg___numel if self.gt_fg_numel == 0: enhanced_matrix_sum = bg___numel elif self.gt_fg_numel == self.gt_size: enhanced_matrix_sum = fg___numel else: parts_numel, combinations = self.generate_parts_numel_combinations( fg_fg_numel=fg_fg_numel, fg_bg_numel=fg_bg_numel, pred_fg_numel=fg___numel, pred_bg_numel=bg___numel, ) results_parts = [] for i, (part_numel, combination) in enumerate(zip(parts_numel, combinations)): align_matrix_value = ( 2 * (combination[0] * combination[1]) / (combination[0] ** 2 + combination[1] ** 2 + EPS) ) enhanced_matrix_value = (align_matrix_value + 1) ** 2 / 4 results_parts.append(enhanced_matrix_value * part_numel) enhanced_matrix_sum = sum(results_parts) em = enhanced_matrix_sum / (self.gt_size - 1 + EPS) return em
[docs] def cal_em_with_cumsumhistogram(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray: """Calculate the E-measure corresponding to the threshold that varies from 0 to 255.. Variable naming rules within the function: `[pred attribute(foreground fg, background bg)]_[gt attribute(foreground fg, background bg)]_[meaning]` If only `pred` or `gt` is considered, another corresponding attribute location is replaced with '`_`'. """ pred = (pred * 255).astype(np.uint8) bins = np.linspace(0, 256, 257) fg_fg_hist, _ = np.histogram(pred[gt], bins=bins) fg_bg_hist, _ = np.histogram(pred[~gt], bins=bins) fg_fg_numel_w_thrs = np.cumsum(np.flip(fg_fg_hist), axis=0) fg_bg_numel_w_thrs = np.cumsum(np.flip(fg_bg_hist), axis=0) fg___numel_w_thrs = fg_fg_numel_w_thrs + fg_bg_numel_w_thrs bg___numel_w_thrs = self.gt_size - fg___numel_w_thrs if self.gt_fg_numel == 0: enhanced_matrix_sum = bg___numel_w_thrs elif self.gt_fg_numel == self.gt_size: enhanced_matrix_sum = fg___numel_w_thrs else: parts_numel_w_thrs, combinations = self.generate_parts_numel_combinations( fg_fg_numel=fg_fg_numel_w_thrs, fg_bg_numel=fg_bg_numel_w_thrs, pred_fg_numel=fg___numel_w_thrs, pred_bg_numel=bg___numel_w_thrs, ) results_parts = np.empty(shape=(4, 256), dtype=np.float64) for i, (part_numel, combination) in enumerate(zip(parts_numel_w_thrs, combinations)): align_matrix_value = ( 2 * (combination[0] * combination[1]) / (combination[0] ** 2 + combination[1] ** 2 + EPS) ) enhanced_matrix_value = (align_matrix_value + 1) ** 2 / 4 results_parts[i] = enhanced_matrix_value * part_numel enhanced_matrix_sum = results_parts.sum(axis=0) em = enhanced_matrix_sum / (self.gt_size - 1 + EPS) return em
[docs] def generate_parts_numel_combinations(self, fg_fg_numel, fg_bg_numel, pred_fg_numel, pred_bg_numel): """Generate the number of elements in each part of the image. Args: fg_fg_numel (int): Number of foreground pixels in the foreground region. fg_bg_numel (int): Number of foreground pixels in the background region. pred_fg_numel (int): Number of foreground pixels in the predicted region. pred_bg_numel (int): Number of background pixels in the predicted region. Returns: tuple: A tuple containing the number of elements in each part of the image. """ bg_fg_numel = self.gt_fg_numel - fg_fg_numel bg_bg_numel = pred_bg_numel - bg_fg_numel parts_numel = [fg_fg_numel, fg_bg_numel, bg_fg_numel, bg_bg_numel] mean_pred_value = pred_fg_numel / self.gt_size mean_gt_value = self.gt_fg_numel / self.gt_size demeaned_pred_fg_value = 1 - mean_pred_value demeaned_pred_bg_value = 0 - mean_pred_value demeaned_gt_fg_value = 1 - mean_gt_value demeaned_gt_bg_value = 0 - mean_gt_value combinations = [ (demeaned_pred_fg_value, demeaned_gt_fg_value), (demeaned_pred_fg_value, demeaned_gt_bg_value), (demeaned_pred_bg_value, demeaned_gt_fg_value), (demeaned_pred_bg_value, demeaned_gt_bg_value), ] return parts_numel, combinations
[docs] def get_results(self) -> dict: """Return the results about E-measure. Returns: dict(em=dict(adp=adaptive_em, curve=changeable_em)) """ adaptive_em = np.mean(np.array(self.adaptive_ems, dtype=TYPE)) changeable_em = np.mean(np.array(self.changeable_ems, dtype=TYPE), axis=0) return dict(em=dict(adp=adaptive_em, curve=changeable_em))
[docs] class WeightedFmeasure: """Weighted F-measure considers both pixel dependency and pixel importance when evaluating foreground maps. It weights different pixels according to their distance from the foreground boundary to provide a more perceptually meaningful assessment than standard F-measure. ``` @inproceedings{wFmeasure, title={How to eval foreground maps?}, author={Margolin, Ran and Zelnik-Manor, Lihi and Tal, Ayellet}, booktitle=CVPR, pages={248--255}, year={2014} } ``` """
[docs] def __init__(self, beta: float = 1): """Initialize Weighted F-measure evaluator. Args: beta (float, optional): Weight for balancing precision and recall. Defaults to 1 for equal weighting (F1-score). """ self.beta = beta self.weighted_fms = []
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred and gt. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) if np.all(~gt): wfm = 0 else: wfm = self.cal_wfm(pred, gt) self.weighted_fms.append(wfm)
[docs] def cal_wfm(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the weighted F-measure score. Implements the weighted F-measure algorithm that considers: 1. Pixel dependency: Uses error at closest GT edge for background pixels 2. Pixel importance: Weights errors by distance from foreground Args: pred (np.ndarray): Normalized prediction map with values in [0, 1]. gt (np.ndarray): Binary ground truth mask. Returns: float: Weighted F-measure score based on weighted precision and recall. """ # [Dst,IDXT] = bwdist(dGT); Dst, Idxt = bwdist(gt == 0, return_indices=True) # %Pixel dependency # E = abs(FG-dGT); E = np.abs(pred - gt) # Et = E; # Et(~GT)=Et(IDXT(~GT)); %To deal correctly with the edges of the foreground region Et = np.copy(E) Et[gt == 0] = Et[Idxt[0][gt == 0], Idxt[1][gt == 0]] # K = fspecial('gaussian',7,5); # EA = imfilter(Et,K); K = self.matlab_style_gauss2D((7, 7), sigma=5) EA = convolve(Et, weights=K, mode="constant", cval=0) # MIN_E_EA = E; # MIN_E_EA(GT & EA<E) = EA(GT & EA<E); MIN_E_EA = np.where(gt & (EA < E), EA, E) # %Pixel importance # B = ones(size(GT)); # B(~GT) = 2-1*exp(log(1-0.5)/5.*Dst(~GT)); # Ew = MIN_E_EA.*B; B = np.where(gt == 0, 2 - np.exp(np.log(0.5) / 5 * Dst), np.ones_like(gt)) Ew = MIN_E_EA * B # TPw = sum(dGT(:)) - sum(sum(Ew(GT))); # FPw = sum(sum(Ew(~GT))); TPw = np.sum(gt) - np.sum(Ew[gt == 1]) FPw = np.sum(Ew[gt == 0]) # R = 1- mean2(Ew(GT)); %Weighed Recall # P = TPw./(eps+TPw+FPw); %Weighted Precision # 注意这里使用mask索引矩阵的时候不可使用Ew[gt],这实际上仅在索引Ew的0维度 R = 1 - np.mean(Ew[gt == 1]) P = TPw / (TPw + FPw + EPS) # % Q = (1+Beta^2)*(R*P)./(eps+R+(Beta.*P)); Q = (1 + self.beta) * R * P / (R + self.beta * P + EPS) return Q
[docs] def matlab_style_gauss2D(self, shape: tuple = (7, 7), sigma: int = 5) -> np.ndarray: """Generate a 2D Gaussian kernel compatible with MATLAB's fspecial. Creates a normalized 2D Gaussian kernel that matches MATLAB's `fspecial('gaussian', [shape], sigma)` output. Args: shape (tuple, optional): Kernel size as (height, width). Defaults to (7, 7). sigma (int, optional): Standard deviation of the Gaussian. Defaults to 5. Returns: np.ndarray: Normalized 2D Gaussian kernel. """ m, n = [(ss - 1) / 2 for ss in shape] y, x = np.ogrid[-m : m + 1, -n : n + 1] h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) h[h < np.finfo(h.dtype).eps * h.max()] = 0 sumh = h.sum() if sumh != 0: h /= sumh return h
[docs] def get_results(self) -> dict: """Return the results about weighted F-measure. Returns: dict(wfm=weighted_fm) """ weighted_fm = np.mean(np.array(self.weighted_fms, dtype=TYPE)) return dict(wfm=weighted_fm)
[docs] class HumanCorrectionEffortMeasure: """Human Correction Effort Measure for Dichotomous Image Segmentation. ``` @inproceedings{HumanCorrectionEffortMeasure, title = {Highly Accurate Dichotomous Image Segmentation}, author = {Xuebin Qin and Hang Dai and Xiaobin Hu and Deng-Ping Fan and Ling Shao and Luc Van Gool}, booktitle = ECCV, year = {2022} } ``` """
[docs] def __init__(self, relax: int = 5, epsilon: float = 2.0): """Initialize the Human Correction Effort Measure. Args: relax (int, optional): The number of relaxations. Defaults to 5. epsilon (float, optional): The epsilon value. Defaults to 2.0. """ self.hces = [] self.relax = relax self.epsilon = epsilon self.morphology_kernel = morphology.disk(1)
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred and gt. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) hce = self.cal_hce(pred, gt) self.hces.append(hce)
[docs] def cal_hce(self, pred: np.ndarray, gt: np.ndarray) -> float: """Calculate the Human Correction Effort (HCE) for a pair of prediction and ground truth. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. Returns: float: The HCE value. """ gt_skeleton = morphology.skeletonize(gt).astype(bool) pred = pred > 0.5 union = np.logical_or(gt, pred) TP = np.logical_and(gt, pred) FP = np.logical_xor(pred, TP) FN = np.logical_xor(gt, TP) # relax the union of gt and pred eroded_union = cv2.erode(union.astype(np.uint8), self.morphology_kernel, iterations=self.relax) # get the relaxed FP regions for computing the human efforts in correcting them --- FP_ = np.logical_and(FP, eroded_union) # get the relaxed FP for i in range(0, self.relax): FP_ = cv2.dilate(FP_.astype(np.uint8), self.morphology_kernel) FP_ = np.logical_and(FP_.astype(bool), ~gt) FP_ = np.logical_and(FP, FP_) # get the relaxed FN regions for computing the human efforts in correcting them --- FN_ = np.logical_and(FN, eroded_union) # preserve the structural components of FN # recover the FN, where pixels are not close to the TP borders for i in range(0, self.relax): FN_ = cv2.dilate(FN_.astype(np.uint8), self.morphology_kernel) FN_ = np.logical_and(FN_, ~pred) FN_ = np.logical_and(FN, FN_) # preserve the structural components of FN FN_ = np.logical_or(FN_, np.logical_xor(gt_skeleton, np.logical_and(TP, gt_skeleton))) # Find exact polygon control points and independent regions. # find contours from FP_ and control points and independent regions for human correction contours_FP, _ = cv2.findContours(FP_.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) condition_FP = np.logical_or(TP, FN_) bdies_FP, indep_cnt_FP = self.filter_conditional_boundary(contours_FP, FP_, condition_FP) # find contours from FN_ and control points and independent regions for human correction contours_FN, _ = cv2.findContours(FN_.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) condition_FN = 1 - np.logical_or(np.logical_or(TP, FP_), FN_) bdies_FN, indep_cnt_FN = self.filter_conditional_boundary(contours_FN, FN_, condition_FN) poly_FP_point_cnt = self.count_polygon_control_points(bdies_FP, epsilon=self.epsilon) poly_FN_point_cnt = self.count_polygon_control_points(bdies_FN, epsilon=self.epsilon) return poly_FP_point_cnt + indep_cnt_FP + poly_FN_point_cnt + indep_cnt_FN
[docs] def filter_conditional_boundary(self, contours: list, mask: np.ndarray, condition: np.ndarray): """Filter boundary segments based on a given condition mask and compute the number of independent connected regions that require human correction. Args: contours (List[np.ndarray]): List of boundary contours (OpenCV format). mask (np.ndarray): Binary mask representing the region of interest. condition (np.ndarray): Condition mask used to determine which boundary points need to be considered. Returns: Tuple[List[np.ndarray], int]: - boundaries (List[np.ndarray]): Filtered boundary segments that require correction. - independent_count (int): Number of independent connected regions that need correction (i.e., human editing effort). """ condition = cv2.dilate(condition.astype(np.uint8), self.morphology_kernel) labels = measure.label(mask) # find the connected regions independent_flags = np.ones(labels.max() + 1, dtype=int) # the label of each connected regions independent_flags[0] = 0 # 0 indicate the background region boundaries = [] visited_map = np.zeros(condition.shape[:2], dtype=int) for i in range(len(contours)): temp_boundaries = [] temp_boundary = [] for pt in contours[i]: row, col = pt[0, 1], pt[0, 0] if condition[row, col].sum() == 0 or visited_map[row, col] != 0: if temp_boundary: # if the previous point is not a boundary point, append the previous boundary temp_boundaries.append(temp_boundary) temp_boundary = [] continue temp_boundary.append([col, row]) visited_map[row, col] = visited_map[row, col] + 1 independent_flags[labels[row, col]] = 0 # mark region as requiring correction if temp_boundary: temp_boundaries.append(temp_boundary) # check if the first and the last boundaries are connected. # if yes, invert the first boundary and attach it after the last boundary if len(temp_boundaries) > 1: first_x, first_y = temp_boundaries[0][0] last_x, last_y = temp_boundaries[-1][-1] if ( (abs(first_x - last_x) == 1 and first_y == last_y) or (first_x == last_x and abs(first_y - last_y) == 1) or (abs(first_x - last_x) == 1 and abs(first_y - last_y) == 1) ): temp_boundaries[-1].extend(temp_boundaries[0][::-1]) del temp_boundaries[0] for k in range(len(temp_boundaries)): temp_boundaries[k] = np.array(temp_boundaries[k])[:, np.newaxis, :] if temp_boundaries: boundaries.extend(temp_boundaries) return boundaries, independent_flags.sum()
[docs] def count_polygon_control_points(self, boundaries: list, epsilon: float = 1.0) -> int: """Approximate each boundary using the Ramer-Douglas-Peucker (RDP) algorithm and count the total number of control points of all approximated polygons. Args: boundaries (List[np.ndarray]): List of boundary contours. Each contour is an Nx1x2 numpy array (OpenCV contour format). epsilon (float): RDP approximation tolerance. Larger values result in fewer control points. Returns: int: The total number of control points across all approximated polygons. Reference: https://en.wikipedia.org/wiki/Ramer-Douglas-Peucker_algorithm """ num_points = 0 for boundary in boundaries: approx_poly = cv2.approxPolyDP(boundary, epsilon, False) # approximate boundary num_points += len(approx_poly) # count vertices (control points) return num_points
[docs] def get_results(self) -> dict: """Return the results about HCE. Returns: dict(hce=hce) """ hce = np.mean(np.array(self.hces, dtype=TYPE)) return dict(hce=hce)