From 63953e086edd6ca58926c7e0e9904b635a961bce Mon Sep 17 00:00:00 2001 From: Olivier Date: Wed, 14 May 2025 18:00:14 +0200 Subject: [PATCH 1/7] :shirt: Some improvements for the scalers --- .../post_processing/calibration/matrix_scaler.py | 8 -------- .../post_processing/calibration/scaler.py | 15 +++++++++------ .../calibration/temperature_scaler.py | 8 -------- .../post_processing/calibration/vector_scaler.py | 8 -------- 4 files changed, 9 insertions(+), 30 deletions(-) diff --git a/torch_uncertainty/post_processing/calibration/matrix_scaler.py b/torch_uncertainty/post_processing/calibration/matrix_scaler.py index 9044877c..d14cd871 100644 --- a/torch_uncertainty/post_processing/calibration/matrix_scaler.py +++ b/torch_uncertainty/post_processing/calibration/matrix_scaler.py @@ -66,14 +66,6 @@ def set_temperature(self, val_w: float, val_b: float) -> None: ) def _scale(self, logits: Tensor) -> Tensor: - """Scale the predictions with the optimal temperature. - - Args: - logits (Tensor): logits to be scaled. - - Returns: - Tensor: Scaled logits. - """ return self.temp_w @ logits + self.temp_b @property diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py index ab402247..002c6e69 100644 --- a/torch_uncertainty/post_processing/calibration/scaler.py +++ b/torch_uncertainty/post_processing/calibration/scaler.py @@ -1,8 +1,10 @@ import logging +from abc import abstractmethod from typing import Literal import torch -from torch import Tensor, nn, optim +from torch import Tensor, nn +from torch.optim import LBFGS from torch.utils.data import DataLoader from tqdm import tqdm @@ -56,7 +58,7 @@ def fit( Args: dataloader (DataLoader): Dataloader with the calibration data. save_logits (bool, optional): Whether to save the logits and - labels. Defaults to False. + labels in memory. Defaults to False. progress (bool, optional): Whether to show a progress bar. Defaults to True. """ @@ -77,7 +79,7 @@ def fit( all_logits = torch.cat(all_logits).to(self.device) all_labels = torch.cat(all_labels).to(self.device) - optimizer = optim.LBFGS(self.temperature, lr=self.lr, max_iter=self.max_iter) + optimizer = LBFGS(self.temperature, lr=self.lr, max_iter=self.max_iter) def calib_eval() -> float: optimizer.zero_grad() @@ -99,6 +101,7 @@ def forward(self, inputs: Tensor) -> Tensor: ) return self._scale(self.model(inputs)) + @abstractmethod def _scale(self, logits: Tensor) -> Tensor: """Scale the logits with the optimal temperature. @@ -108,7 +111,7 @@ def _scale(self, logits: Tensor) -> Tensor: Returns: Tensor: Scaled logits. """ - raise NotImplementedError + ... def fit_predict( self, @@ -119,5 +122,5 @@ def fit_predict( return self(self.logits) @property - def temperature(self) -> list: - raise NotImplementedError + @abstractmethod + def temperature(self) -> list: ... diff --git a/torch_uncertainty/post_processing/calibration/temperature_scaler.py b/torch_uncertainty/post_processing/calibration/temperature_scaler.py index acb1da98..29e7c7a5 100644 --- a/torch_uncertainty/post_processing/calibration/temperature_scaler.py +++ b/torch_uncertainty/post_processing/calibration/temperature_scaler.py @@ -50,14 +50,6 @@ def set_temperature(self, val: float) -> None: self.temp = nn.Parameter(torch.ones(1, device=self.device) * val, requires_grad=True) def _scale(self, logits: Tensor) -> Tensor: - """Scale the prediction with the optimal temperature. - - Args: - logits (Tensor): logits to be scaled. - - Returns: - Tensor: Scaled logits. - """ return logits / self.temperature[0] @property diff --git a/torch_uncertainty/post_processing/calibration/vector_scaler.py b/torch_uncertainty/post_processing/calibration/vector_scaler.py index 3bda5d57..e27c233e 100644 --- a/torch_uncertainty/post_processing/calibration/vector_scaler.py +++ b/torch_uncertainty/post_processing/calibration/vector_scaler.py @@ -64,14 +64,6 @@ def set_temperature(self, val_w: float, val_b: float) -> None: ) def _scale(self, logits: torch.Tensor) -> torch.Tensor: - """Scale the predictions with the optimal temperature. - - Args: - logits (torch.Tensor): logits to be scaled. - - Returns: - torch.Tensor: Scaled logits. - """ return self.temp_w * logits + self.temp_b @property From 884786d226e294df6fc227f3078816b560a82a71 Mon Sep 17 00:00:00 2001 From: Olivier Date: Wed, 14 May 2025 18:20:15 +0200 Subject: [PATCH 2/7] :sparkles: Start supporting binary classification --- torch_uncertainty/post_processing/calibration/scaler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py index 002c6e69..365f832f 100644 --- a/torch_uncertainty/post_processing/calibration/scaler.py +++ b/torch_uncertainty/post_processing/calibration/scaler.py @@ -79,6 +79,12 @@ def fit( all_logits = torch.cat(all_logits).to(self.device) all_labels = torch.cat(all_labels).to(self.device) + if all_logits.dim() == 2 and all_logits.shape[1] == 1: + all_logits = all_logits.squeeze(1) + if all_logits.dim() == 1: + confidence = torch.log(all_logits.sigmoid()) + all_logits = torch.cat([torch.log(1 - confidence), torch.log(confidence)]) + optimizer = LBFGS(self.temperature, lr=self.lr, max_iter=self.max_iter) def calib_eval() -> float: From 0df1a0d4821fa4c330bfbfe583b42182e7a8cb95 Mon Sep 17 00:00:00 2001 From: Olivier Date: Wed, 14 May 2025 18:21:21 +0200 Subject: [PATCH 3/7] :bug: Stack logits --- torch_uncertainty/post_processing/calibration/scaler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py index 365f832f..ce23e039 100644 --- a/torch_uncertainty/post_processing/calibration/scaler.py +++ b/torch_uncertainty/post_processing/calibration/scaler.py @@ -83,7 +83,7 @@ def fit( all_logits = all_logits.squeeze(1) if all_logits.dim() == 1: confidence = torch.log(all_logits.sigmoid()) - all_logits = torch.cat([torch.log(1 - confidence), torch.log(confidence)]) + all_logits = torch.stack([torch.log(1 - confidence), torch.log(confidence)], dim=1) optimizer = LBFGS(self.temperature, lr=self.lr, max_iter=self.max_iter) From b2da1d328eb1dafbc4eed813c2fbc2ee8610cea2 Mon Sep 17 00:00:00 2001 From: Olivier Date: Wed, 14 May 2025 18:22:22 +0200 Subject: [PATCH 4/7] :bug: Fix type --- torch_uncertainty/post_processing/calibration/scaler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py index ce23e039..6adf9a2f 100644 --- a/torch_uncertainty/post_processing/calibration/scaler.py +++ b/torch_uncertainty/post_processing/calibration/scaler.py @@ -83,6 +83,7 @@ def fit( all_logits = all_logits.squeeze(1) if all_logits.dim() == 1: confidence = torch.log(all_logits.sigmoid()) + all_labels = all_labels.to(dtype=torch.float32) all_logits = torch.stack([torch.log(1 - confidence), torch.log(confidence)], dim=1) optimizer = LBFGS(self.temperature, lr=self.lr, max_iter=self.max_iter) From 00422064059e7417bf8d9b427af15c5fda376ea3 Mon Sep 17 00:00:00 2001 From: Olivier Date: Thu, 15 May 2025 14:21:13 +0200 Subject: [PATCH 5/7] :shirt: Fix binary scaling --- torch_uncertainty/post_processing/calibration/scaler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py index 6adf9a2f..34b5c240 100644 --- a/torch_uncertainty/post_processing/calibration/scaler.py +++ b/torch_uncertainty/post_processing/calibration/scaler.py @@ -4,6 +4,7 @@ import torch from torch import Tensor, nn +from torch.nn import functional as F from torch.optim import LBFGS from torch.utils.data import DataLoader from tqdm import tqdm @@ -82,9 +83,8 @@ def fit( if all_logits.dim() == 2 and all_logits.shape[1] == 1: all_logits = all_logits.squeeze(1) if all_logits.dim() == 1: - confidence = torch.log(all_logits.sigmoid()) - all_labels = all_labels.to(dtype=torch.float32) - all_logits = torch.stack([torch.log(1 - confidence), torch.log(confidence)], dim=1) + all_labels = all_labels.to(dtype=torch.long) + all_logits = torch.stack([F.logsigmoid(-all_logits), F.logsigmoid(all_logits)], dim=1) optimizer = LBFGS(self.temperature, lr=self.lr, max_iter=self.max_iter) From 3e4e4cd5868381255b5a1249bdfa9d87e3303a08 Mon Sep 17 00:00:00 2001 From: Olivier Date: Thu, 15 May 2025 15:05:06 +0200 Subject: [PATCH 6/7] :sparkles: Finish fixing --- .../post_processing/calibration/scaler.py | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py index 34b5c240..5d3389bd 100644 --- a/torch_uncertainty/post_processing/calibration/scaler.py +++ b/torch_uncertainty/post_processing/calibration/scaler.py @@ -4,7 +4,6 @@ import torch from torch import Tensor, nn -from torch.nn import functional as F from torch.optim import LBFGS from torch.utils.data import DataLoader from tqdm import tqdm @@ -21,17 +20,19 @@ def __init__( model: nn.Module | None = None, lr: float = 0.1, max_iter: int = 100, + eps: float = 1e-8, device: Literal["cpu", "cuda"] | torch.device | None = None, ) -> None: """Virtual class for scaling post-processing for calibrated probabilities. Args: - model (nn.Module): Model to calibrate. - lr (float, optional): Learning rate for the optimizer. Defaults to 0.1. + model (nn.Module | None): Model to calibrate. Defaults to ``None``. + lr (float, optional): Learning rate for the optimizer. Defaults to ``0.1``. max_iter (int, optional): Maximum number of iterations for the - optimizer. Defaults to 100. + optimizer. Defaults to ``100``. + eps (float): Small value for stability. Defaults to ``1e-8``. device (Optional[Literal["cpu", "cuda"]], optional): Device to use - for optimization. Defaults to None. + for optimization. Defaults to ``None``. Reference: Guo, C., Pleiss, G., Sun, Y., & Weinberger, K. Q. On calibration @@ -41,13 +42,17 @@ def __init__( self.device = device if lr <= 0: - raise ValueError("Learning rate must be positive.") + raise ValueError(f"Learning rate must be strictly positive. Got {lr}.") self.lr = lr if max_iter <= 0: - raise ValueError("Max iterations must be positive.") + raise ValueError(f"Max iterations must be strictly positive. Got {max_iter}.") self.max_iter = int(max_iter) + if eps <= 0: + raise ValueError(f"Eps must be strictly positive. Got {eps}.") + self.eps = int(eps) + def fit( self, dataloader: DataLoader, @@ -57,11 +62,12 @@ def fit( """Fit the temperature parameters to the calibration data. Args: - dataloader (DataLoader): Dataloader with the calibration data. + dataloader (DataLoader): Dataloader with the calibration data. If there is no model, + the dataloader should include the confidence score directly and not the logits. save_logits (bool, optional): Whether to save the logits and - labels in memory. Defaults to False. + labels in memory. Defaults to ``False``. progress (bool, optional): Whether to show a progress bar. - Defaults to True. + Defaults to ``True``. """ if self.model is None or isinstance(self.model, nn.Identity): logging.warning( @@ -78,13 +84,13 @@ def fit( all_logits.append(logits) all_labels.append(labels) all_logits = torch.cat(all_logits).to(self.device) - all_labels = torch.cat(all_labels).to(self.device) + all_labels = torch.cat(all_labels).to(dtype=torch.long).to(self.device) + all_logits = all_logits.clamp(self.eps, 1 - self.eps) if all_logits.dim() == 2 and all_logits.shape[1] == 1: all_logits = all_logits.squeeze(1) if all_logits.dim() == 1: - all_labels = all_labels.to(dtype=torch.long) - all_logits = torch.stack([F.logsigmoid(-all_logits), F.logsigmoid(all_logits)], dim=1) + all_logits = torch.stack([torch.log(1 - all_logits), torch.log(all_logits)], dim=1) optimizer = LBFGS(self.temperature, lr=self.lr, max_iter=self.max_iter) From f3df42a596f735b8d03e67fb7330cd56bedd5bdf Mon Sep 17 00:00:00 2001 From: Olivier Date: Thu, 15 May 2025 15:11:24 +0200 Subject: [PATCH 7/7] :sparkles: Final fix --- .../post_processing/calibration/matrix_scaler.py | 4 +++- torch_uncertainty/post_processing/calibration/scaler.py | 2 +- .../post_processing/calibration/temperature_scaler.py | 4 +++- .../post_processing/calibration/vector_scaler.py | 4 +++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/torch_uncertainty/post_processing/calibration/matrix_scaler.py b/torch_uncertainty/post_processing/calibration/matrix_scaler.py index d14cd871..34605c11 100644 --- a/torch_uncertainty/post_processing/calibration/matrix_scaler.py +++ b/torch_uncertainty/post_processing/calibration/matrix_scaler.py @@ -15,6 +15,7 @@ def __init__( init_b: float = 0, lr: float = 0.1, max_iter: int = 200, + eps: float = 1e-8, device: Literal["cpu", "cuda"] | device | None = None, ) -> None: """Matrix scaling post-processing for calibrated probabilities. @@ -29,6 +30,7 @@ def __init__( lr (float, optional): Learning rate for the optimizer. Defaults to 0.1. max_iter (int, optional): Maximum number of iterations for the optimizer. Defaults to 100. + eps (float): Small value for stability. Defaults to ``1e-8``. device (Optional[Literal["cpu", "cuda"]], optional): Device to use for optimization. Defaults to None. @@ -37,7 +39,7 @@ def __init__( of modern neural networks. In ICML 2017. """ - super().__init__(model=model, lr=lr, max_iter=max_iter, device=device) + super().__init__(model=model, lr=lr, max_iter=max_iter, eps=eps, device=device) if not isinstance(num_classes, int): raise TypeError(f"num_classes must be an integer. Got {num_classes}.") diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py index 5d3389bd..ee456672 100644 --- a/torch_uncertainty/post_processing/calibration/scaler.py +++ b/torch_uncertainty/post_processing/calibration/scaler.py @@ -51,7 +51,7 @@ def __init__( if eps <= 0: raise ValueError(f"Eps must be strictly positive. Got {eps}.") - self.eps = int(eps) + self.eps = eps def fit( self, diff --git a/torch_uncertainty/post_processing/calibration/temperature_scaler.py b/torch_uncertainty/post_processing/calibration/temperature_scaler.py index 29e7c7a5..159fd517 100644 --- a/torch_uncertainty/post_processing/calibration/temperature_scaler.py +++ b/torch_uncertainty/post_processing/calibration/temperature_scaler.py @@ -13,6 +13,7 @@ def __init__( init_val: float = 1, lr: float = 0.1, max_iter: int = 100, + eps: float = 1e-8, device: Literal["cpu", "cuda"] | torch.device | None = None, ) -> None: """Temperature scaling post-processing for calibrated probabilities. @@ -24,6 +25,7 @@ def __init__( lr (float, optional): Learning rate for the optimizer. Defaults to 0.1. max_iter (int, optional): Maximum number of iterations for the optimizer. Defaults to 100. + eps (float): Small value for stability. Defaults to ``1e-8``. device (Optional[Literal["cpu", "cuda"]], optional): Device to use for optimization. Defaults to None. @@ -31,7 +33,7 @@ def __init__( Guo, C., Pleiss, G., Sun, Y., & Weinberger, K. Q. On calibration of modern neural networks. In ICML 2017. """ - super().__init__(model=model, lr=lr, max_iter=max_iter, device=device) + super().__init__(model=model, lr=lr, max_iter=max_iter, eps=eps, device=device) if init_val <= 0: raise ValueError(f"Initial temperature value must be positive. Got {init_val}") diff --git a/torch_uncertainty/post_processing/calibration/vector_scaler.py b/torch_uncertainty/post_processing/calibration/vector_scaler.py index e27c233e..da6d1887 100644 --- a/torch_uncertainty/post_processing/calibration/vector_scaler.py +++ b/torch_uncertainty/post_processing/calibration/vector_scaler.py @@ -15,6 +15,7 @@ def __init__( init_b: float = 0, lr: float = 0.1, max_iter: int = 200, + eps: float = 1e-8, device: Literal["cpu", "cuda"] | torch.device | None = None, ) -> None: """Vector scaling post-processing for calibrated probabilities. @@ -29,6 +30,7 @@ def __init__( lr (float, optional): Learning rate for the optimizer. Defaults to 0.1. max_iter (int, optional): Maximum number of iterations for the optimizer. Defaults to 100. + eps (float): Small value for stability. Defaults to ``1e-8``. device (Optional[Literal["cpu", "cuda"]], optional): Device to use for optimization. Defaults to None. @@ -37,7 +39,7 @@ def __init__( of modern neural networks. In ICML 2017. """ - super().__init__(model=model, lr=lr, max_iter=max_iter, device=device) + super().__init__(model=model, lr=lr, max_iter=max_iter, eps=eps, device=device) if not isinstance(num_classes, int): raise TypeError(f"num_classes must be an integer. Got {num_classes}.")