Source code for py_sod_metrics.context_measure

import math

import cv2
import numpy as np
from skimage.color import deltaE_ciede2000, rgb2lab
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

from .utils import EPS, TYPE, validate_and_normalize_input


[docs] class ContextMeasure: """Context-measure for evaluating foreground segmentation quality. This metric evaluates predictions by considering both forward inference (how well predictions align with ground truth) and reverse deduction (how completely ground truth is covered by predictions), using context-aware Gaussian kernels. ``` @article{ContextMeasure, title={Context-measure: Contextualizing Metric for Camouflage}, author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping}, journal={arXiv preprint arXiv:2512.07076}, year={2025} } ``` """
[docs] def __init__(self, beta2: float = 1.0, alpha: float = 6.0): """Initialize the Context Measure evaluator. Args: beta2 (float): Balancing factor between forward inference and reverse deduction. Higher values give more weight to forward inference. Defaults to 1.0. alpha (float): Scaling factor for Gaussian kernel covariance, controls the spatial context range. Defaults to 6.0. """ self.beta2 = beta2 self.alpha = alpha self._exp_factor = math.e / (math.e - 1) self.scores = []
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred and gt. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) # align with the original implementation pred = pred.astype(TYPE) gt = gt.astype(TYPE) score = self.compute(pred, gt, cd=np.zeros_like(gt)) self.scores.append(score)
[docs] def compute(self, pred: np.ndarray, gt: np.ndarray, cd: np.ndarray) -> float: """Compute the context measure between prediction and ground truth. Args: pred (np.ndarray): Prediction map (values between 0 and 1). gt (np.ndarray): Ground truth map (boolean or 0/1 values). cd (np.ndarray): Camouflage degree map (values between 0 and 1). Returns: float: Context measure value. """ cov_matrix, x_dis, y_dis = self._compute_y_params(gt) K = self._gaussian_kernel(x_dis, y_dis, cov_matrix) # Forward inference: measure prediction relevance forward = self._forward_inference(pred, gt, K) mforward = np.sum(forward * pred) / (np.sum(pred) + EPS) # Reverse deduction: measure ground truth completeness reverse = self._reverse_deduction(pred, gt, K) wreverse = np.sum(reverse * (gt + cd)) / (np.sum(gt) + np.sum(cd) + EPS) # F-measure style combination return (1 + self.beta2) * mforward * wreverse / (self.beta2 * mforward + wreverse + EPS)
def _forward_inference(self, X: np.ndarray, Y: np.ndarray, kernel: np.ndarray) -> np.ndarray: """Calculate forward inference: how well predictions align with ground truth context.""" x_binary = (X > 0).astype(int) # note: using EPS=1e-8 and this statement, the test result is the same as the original implementation # global_relevance_matrix = cv2.filter2D(Y, cv2.CV_32F, kernel) # note: this is a hack to make sure that the type of Y is compatible with more diverse data global_relevance_matrix = cv2.filter2D(Y.astype(np.float32), cv2.CV_32F, kernel) return x_binary * global_relevance_matrix def _reverse_deduction(self, X: np.ndarray, Y: np.ndarray, kernel: np.ndarray) -> np.ndarray: """Calculate reverse deduction: how completely ground truth is covered by predictions.""" X = X.astype(float) non_global_completeness_matrix = np.exp(-1 * cv2.filter2D(X, -1, kernel)) global_completeness_matrix = 1 - non_global_completeness_matrix reverse = self._exp_factor * Y * global_completeness_matrix return reverse def _gaussian_kernel(self, x_dis: int, y_dis: int, cov_matrix: np.ndarray) -> np.ndarray: """Generate a 2D Gaussian kernel based on covariance matrix.""" det_sigma = np.linalg.det(cov_matrix) inv_sigma = np.linalg.inv(cov_matrix) x, y = np.meshgrid(np.arange(-x_dis, x_dis + 1), np.arange(-y_dis, y_dis + 1), indexing="ij") Z = np.stack([x, y], axis=-1) exp_term = np.einsum("...i,ij,...j->...", Z, inv_sigma, Z) kernel = np.exp(-0.5 * exp_term) / (2 * np.pi * np.sqrt(det_sigma)) return kernel / np.sum(kernel) def _compute_y_params(self, Y: np.ndarray) -> tuple: """Compute Gaussian kernel parameters based on ground truth distribution.""" points = np.argwhere(Y > 0) if len(points) <= 1: return np.diag([0.25, 0.25]), 1, 1 cov_matrix = np.cov(points, rowvar=False) sigma_x = np.sqrt(cov_matrix[0, 0]) sigma_y = np.sqrt(cov_matrix[1, 1]) total_sigma = np.sqrt(cov_matrix[0, 0] + cov_matrix[1, 1]) std_cov_matrix = self.alpha**2 * cov_matrix / (total_sigma**2) std_sigma_x = self.alpha * sigma_x / total_sigma std_sigma_y = self.alpha * sigma_y / total_sigma x_dis = round(3 * std_sigma_x) y_dis = round(3 * std_sigma_y) return std_cov_matrix, x_dis, y_dis
[docs] def get_results(self) -> dict: """Return the results about context measure. Returns: dict(cm=context_measure) """ cm = np.mean(np.array(self.scores, dtype=TYPE)) return dict(cm=cm)
[docs] class CamouflageContextMeasure(ContextMeasure): """Camouflage Context-measure for evaluating camouflaged object detection quality. This metric extends the base ContextMeasure by incorporating camouflage degree, which measures how well the foreground blends with its surrounding background. It uses patch-based nearest neighbor matching in Lab color space with spatial constraints to estimate camouflage difficulty. ``` @article{ContextMeasure, title={Context-measure: Contextualizing Metric for Camouflage}, author={Wang, Chen-Yang and Ji, Gepeng and Shao, Song and Cheng, Ming-Ming and Fan, Deng-Ping}, journal={arXiv preprint arXiv:2512.07076}, year={2025} } ``` """
[docs] def __init__(self, beta2: float = 1.2, alpha: float = 6.0, gamma: int = 8, lambda_spatial: float = 20): """Initialize the Camouflage Context Measure evaluator. Args: beta2 (float): Balancing factor for forward and reverse. Defaults to 1.2 for camouflage. alpha (float): Gaussian kernel scaling factor. Defaults to 6.0. gamma (int): Exponential scaling factor for camouflage degree. Defaults to 8. lambda_spatial (float): Weight for spatial distance in ANN search. Defaults to 20. """ super().__init__(beta2=beta2, alpha=alpha) self.gamma = gamma self.lambda_spatial = lambda_spatial
[docs] def step(self, pred: np.ndarray, gt: np.ndarray, img: np.ndarray, normalize: bool = True): """Statistics the metric for the pair of pred, gt, and img. Args: pred (np.ndarray): Prediction, gray scale image. gt (np.ndarray): Ground truth, gray scale image. img (np.ndarray): Original RGB image (required for camouflage degree calculation). normalize (bool, optional): Whether to normalize the input data. Defaults to True. """ pred, gt = validate_and_normalize_input(pred, gt, normalize) pred = pred.astype(TYPE) gt = gt.astype(TYPE) _, cd = self._calculate_camouflage_degree(img, gt) score = self.compute(pred, gt, cd=cd) self.scores.append(score)
def _calculate_camouflage_degree(self, img: np.ndarray, mask: np.ndarray, w: int = 7) -> tuple: """Compute the camouflage degree matrix using Lab+spatial ANN and RGB reconstruction. Args: img (np.ndarray): RGB image (H x W x 3). mask (np.ndarray): Binary mask (H x W). w (int): Patch size. Defaults to 7. Returns: tuple: (reconstructed_image, camouflage_degree_matrix) """ mask_binary = (mask > 0).astype(np.uint8) fg_mask = mask_binary bg_mask = self._extract_surrounding_background(fg_mask, kernel_size=20) im_fg = fg_mask[:, :, np.newaxis] * img im_bg = bg_mask[:, :, np.newaxis] * img im_lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB) # Step 1: Extract patches in Lab space im_fg_lab = im_lab * fg_mask[:, :, np.newaxis] im_bg_lab = im_lab * bg_mask[:, :, np.newaxis] fg_indices, fg_feat_lab = self._extract_patches(im_fg_lab, fg_mask, w, d=w // 2) bg_indices, bg_feat_lab = self._extract_patches(im_bg_lab, bg_mask, w, d=w // 2) # Check if we have enough patches to compute camouflage degree if len(fg_indices) == 0 or len(bg_indices) == 0: # Return zero camouflage degree when insufficient data img_recon = np.zeros_like(img) cd = np.zeros_like(mask, dtype=TYPE) return img_recon, cd # Step 2: Lab+spatial ANN query fg_nn = self._ann_with_spatial_faiss(bg_feat_lab, fg_feat_lab, bg_indices, fg_indices) # Step 3: Reconstruct foreground in RGB space img_recon = self._reconstruct_image(img, fg_indices, bg_indices, fg_nn, im_bg, w) # Step 4: Compute similarity in Lab space similarity_matrix = self._compute_delta_e2000_matrix(img_recon, im_fg.astype(np.uint8)).astype(TYPE) # Step 5: Compute camouflage degree cd = ((np.exp(self.gamma * similarity_matrix * mask_binary) - 1) / (np.exp(self.gamma) - 1)).astype(TYPE) return img_recon, cd def _ann_with_spatial_faiss(self, x, q, x_coords, q_coords, m=16): """Approximate Nearest Neighbor search with spatial constraints using sklearn. Note: Method name retained for compatibility, but now uses sklearn.neighbors.NearestNeighbors instead of FAISS for a more lightweight dependency. """ all_coords = np.vstack([x_coords, q_coords]) scaled_coords = StandardScaler().fit_transform(all_coords) x_coords_scaled = scaled_coords[: len(x_coords)] q_coords_scaled = scaled_coords[len(x_coords) :] x_aug = np.hstack([x, self.lambda_spatial * x_coords_scaled]).astype(np.float32) q_aug = np.hstack([q, self.lambda_spatial * q_coords_scaled]).astype(np.float32) # Use sklearn NearestNeighbors instead of FAISS for lightweight alternative nbrs = NearestNeighbors(n_neighbors=1, algorithm="auto", metric="euclidean") nbrs.fit(x_aug) _, indices = nbrs.kneighbors(q_aug) # top-1 return indices def _extract_surrounding_background(self, mask: np.ndarray, kernel_size: int = 20) -> np.ndarray: """Extract the surrounding background region around the foreground.""" kernel = np.ones((kernel_size, kernel_size), np.uint8) dilated_mask = cv2.dilate(mask, kernel, iterations=1) surrounding_bg_mask = dilated_mask - mask return surrounding_bg_mask def _extract_patches(self, img: np.ndarray, mask: np.ndarray, w: int, d: int) -> tuple: """Extract valid patches from the image based on mask.""" h, w_, c = img.shape pad_h = (d - (h - w) % d) % d pad_w = (d - (w_ - w) % d) % d img_padded = np.pad(img, ((0, pad_h), (0, pad_w), (0, 0)), mode="reflect") mask_padded = np.pad(mask, ((0, pad_h), (0, pad_w)), mode="constant") new_h, new_w = img_padded.shape[:2] img_patches = np.lib.stride_tricks.sliding_window_view(img_padded, (w, w, img.shape[2]))[::d, ::d, 0, :, :, :] mask_patches = np.lib.stride_tricks.sliding_window_view(mask_padded, (w, w))[::d, ::d, :, :] img_patches = img_patches.reshape(-1, w * w * c) mask_patches = mask_patches.reshape(-1, w, w) grid_x, grid_y = np.meshgrid(np.arange(0, new_h - w + 1, d), np.arange(0, new_w - w + 1, d), indexing="ij") all_indices = np.column_stack((grid_x.ravel(), grid_y.ravel())) valid_idx = np.all(mask_patches > 0, axis=(1, 2)) valid_indices = all_indices[valid_idx] valid_patches = img_patches[valid_idx] return valid_indices, valid_patches def _reconstruct_image( self, img: np.ndarray, fg_indices: np.ndarray, bg_indices: np.ndarray, fg_nn: np.ndarray, im_bg: np.ndarray, w: int, ) -> np.ndarray: """Reconstruct foreground using nearest neighbor background patches.""" img_recon = np.zeros_like(img, dtype=np.int64) counts = np.zeros(img.shape[:2]) + EPS fg_x, fg_y = fg_indices[:, 0], fg_indices[:, 1] nn_i_j = fg_nn[:, 0] cii, cjj = bg_indices[nn_i_j, 0], bg_indices[nn_i_j, 1] fg_x = np.clip(fg_x, 0, img.shape[0] - w) fg_y = np.clip(fg_y, 0, img.shape[1] - w) cii = np.clip(cii, 0, img.shape[0] - w) cjj = np.clip(cjj, 0, img.shape[1] - w) for i in range(fg_indices.shape[0]): img_recon[fg_x[i] : fg_x[i] + w, fg_y[i] : fg_y[i] + w, :] += im_bg[ cii[i] : cii[i] + w, cjj[i] : cjj[i] + w, : ] counts[fg_x[i] : fg_x[i] + w, fg_y[i] : fg_y[i] + w] += 1 counts = np.expand_dims(counts, axis=-1) img_recon = np.round(img_recon / counts).astype(np.uint8) return img_recon def _compute_delta_e2000_matrix(self, img1_rgb: np.ndarray, img2_rgb: np.ndarray) -> np.ndarray: """Compute the perceptual color difference (ΔE 2000) between two images. Args: img1_rgb (np.ndarray): First input image (H x W x 3) in RGB format. img2_rgb (np.ndarray): Second input image (H x W x 3) in RGB format. Returns: np.ndarray: Similarity matrix with values in [0,1] (higher = more similar). """ # Convert RGB to Lab color space lab1 = rgb2lab(img1_rgb) lab2 = rgb2lab(img2_rgb) # Compute ΔE 2000 color difference delta_e_matrix = deltaE_ciede2000(lab1, lab2) # Normalize ΔE 2000 values to [0,1] similarity_matrix = 1 - np.clip(delta_e_matrix / 100, 0, 1) return similarity_matrix
[docs] def get_results(self) -> dict: """Return the results about camouflage context measure. Returns: dict(ccm=camouflage_context_measure) """ ccm = np.mean(np.array(self.scores, dtype=TYPE)) return dict(ccm=ccm) return dict(ccm=ccm) return dict(ccm=ccm)