From aafb057ddf35c18d290e4d6c5be862d47346c901 Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 12:05:06 +0200
Subject: [PATCH 01/11] :bug: Fix scaling for multiclass & improve tutorial

---
 .../Post_Hoc_Methods/tutorial_scaler.py             | 13 ++++++++-----
 .../post_processing/calibration/scaler.py           |  6 ++----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py b/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
index 815f0244..a212f806 100644
--- a/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
+++ b/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
@@ -82,9 +82,9 @@
 
 # Split datasets
 dataset = dm.test
-cal_dataset, test_dataset, other = random_split(dataset, [1000, 1000, len(dataset) - 2000])
-test_dataloader = DataLoader(test_dataset, batch_size=32)
-calibration_dataloader = DataLoader(cal_dataset, batch_size=32)
+cal_dataset, test_dataset = random_split(dataset, [2000, len(dataset) - 2000])
+test_dataloader = DataLoader(test_dataset, batch_size=128)
+calibration_dataloader = DataLoader(cal_dataset, batch_size=128)
 
 # Initialize the ECE
 ece = CalibrationError(task="multiclass", num_classes=100)
@@ -135,11 +135,14 @@
     probs = logits.softmax(-1)
     ece.update(probs, target)
 
-print(f"ECE after scaling - {ece.compute():.3%}.")
+print(
+    f"ECE after scaling - {ece.compute():.3%} with temperature {scaled_model.temperature[0].item():.3}."
+)
 
 # %%
 # We finally compute and plot the scaled top-label calibration figure. We see
-# that the model is now better calibrated.
+# that the model is now better calibrated. If the temperature is greater than 1,
+# the final model is less confident than before.
 fig, ax = ece.plot()
 fig.show()
 
diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py
index 708810f7..e6c6c364 100644
--- a/torch_uncertainty/post_processing/calibration/scaler.py
+++ b/torch_uncertainty/post_processing/calibration/scaler.py
@@ -85,12 +85,10 @@ def fit(
             all_logits = torch.cat(all_logits).to(self.device)
             all_labels = torch.cat(all_labels).to(self.device)
 
-        # Stabilize optimization
-        all_logits = all_logits.clamp(self.eps, 1 - self.eps)
-
         # Handle binary classification case
         if all_logits.dim() == 2 and all_logits.shape[1] == 1:
-            all_logits = all_logits.squeeze(1)
+            # Stabilize optimization
+            all_logits = all_logits.clamp(self.eps, 1 - self.eps).squeeze(1)
         if all_logits.dim() == 1:
             # allow labels as probabilities
             if ((all_labels != 0) * (all_labels != 1)).sum(dtype=torch.int) != 0:

From b08dc963515e4435ae1543e9bdd7360b28e8cf4c Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 16:03:02 +0200
Subject: [PATCH 02/11] :bug: Continue fixing the scaler

---
 torch_uncertainty/post_processing/calibration/scaler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py
index e6c6c364..9e913175 100644
--- a/torch_uncertainty/post_processing/calibration/scaler.py
+++ b/torch_uncertainty/post_processing/calibration/scaler.py
@@ -87,9 +87,10 @@ def fit(
 
         # Handle binary classification case
         if all_logits.dim() == 2 and all_logits.shape[1] == 1:
+            all_logits = all_logits.squeeze(1)
             # Stabilize optimization
-            all_logits = all_logits.clamp(self.eps, 1 - self.eps).squeeze(1)
         if all_logits.dim() == 1:
+            all_logits = all_logits.clamp(self.eps, 1 - self.eps).squeeze(1)
             # allow labels as probabilities
             if ((all_labels != 0) * (all_labels != 1)).sum(dtype=torch.int) != 0:
                 all_labels = torch.stack([1 - all_labels, all_labels], dim=1)

From 5af5bd468f992eaf2c9d0f61ed4d63a30c021ffb Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 16:03:52 +0200
Subject: [PATCH 03/11] :bug: continue

---
 torch_uncertainty/post_processing/calibration/scaler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torch_uncertainty/post_processing/calibration/scaler.py b/torch_uncertainty/post_processing/calibration/scaler.py
index 9e913175..20c14124 100644
--- a/torch_uncertainty/post_processing/calibration/scaler.py
+++ b/torch_uncertainty/post_processing/calibration/scaler.py
@@ -90,7 +90,7 @@ def fit(
             all_logits = all_logits.squeeze(1)
             # Stabilize optimization
         if all_logits.dim() == 1:
-            all_logits = all_logits.clamp(self.eps, 1 - self.eps).squeeze(1)
+            all_logits = all_logits.clamp(self.eps, 1 - self.eps)
             # allow labels as probabilities
             if ((all_labels != 0) * (all_labels != 1)).sum(dtype=torch.int) != 0:
                 all_labels = torch.stack([1 - all_labels, all_labels], dim=1)

From 620556ab0cdd3732a92a6adceede5cf836d035ab Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 18:00:13 +0200
Subject: [PATCH 04/11] :hammer: Continue reworking conformal methods &
 acknowledge TorchCP

---
 tests/post_processing/test_conformal.py       |  35 +++--
 .../post_processing/conformal/__init__.py     |   6 +-
 .../post_processing/conformal/abstract.py     |  57 +++++++-
 .../post_processing/conformal/aps.py          | 109 ++++++++++++++
 .../conformal/conformal_aps.py                | 119 ---------------
 .../conformal/conformal_raps.py               | 138 ------------------
 .../conformal/conformal_thr.py                |  95 ------------
 .../post_processing/conformal/raps.py         |  96 ++++++++++++
 .../post_processing/conformal/thr.py          |  75 ++++++++++
 9 files changed, 358 insertions(+), 372 deletions(-)
 create mode 100644 torch_uncertainty/post_processing/conformal/aps.py
 delete mode 100644 torch_uncertainty/post_processing/conformal/conformal_aps.py
 delete mode 100644 torch_uncertainty/post_processing/conformal/conformal_raps.py
 delete mode 100644 torch_uncertainty/post_processing/conformal/conformal_thr.py
 create mode 100644 torch_uncertainty/post_processing/conformal/raps.py
 create mode 100644 torch_uncertainty/post_processing/conformal/thr.py

diff --git a/tests/post_processing/test_conformal.py b/tests/post_processing/test_conformal.py
index d3a61bd9..c48817eb 100644
--- a/tests/post_processing/test_conformal.py
+++ b/tests/post_processing/test_conformal.py
@@ -17,10 +17,18 @@ class TestConformal:
 
     def test_errors(self):
         Conformal.__abstractmethods__ = set()
-        conformal = Conformal(model=None)
-        assert conformal.model is None
+        conformal = Conformal(
+            model=None,
+            alpha=0.1,
+            ts_init_val=1,
+            ts_lr=1,
+            ts_max_iter=1,
+            enable_ts=True,
+            device="cpu",
+        )
+        assert conformal.model.model is None
         conformal.set_model(nn.Identity())
-        assert isinstance(conformal.model, nn.Identity)
+        assert isinstance(conformal.model.model, nn.Identity)
         conformal.fit(None)
         conformal.forward(None)
         conformal.conformal(None)
@@ -50,10 +58,7 @@ def test_fit(self):
         assert out.shape == (10, 3)
 
     def test_failures(self):
-        with pytest.raises(NotImplementedError):
-            _ = ConformalClsAPS(alpha=0.1, score_type="test")
-
-        with pytest.raises(ValueError):
+        with pytest.raises(RuntimeError):
             _ = ConformalClsAPS(
                 alpha=0.1,
             ).quantile
@@ -83,10 +88,7 @@ def test_fit(self):
         assert out.shape == (10, 3)
 
     def test_failures(self):
-        with pytest.raises(NotImplementedError):
-            ConformalClsRAPS(alpha=0.1, score_type="test")
-
-        with pytest.raises(ValueError):
+        with pytest.raises(RuntimeError):
             ConformalClsRAPS(alpha=0.1).quantile  # noqa: B018
 
 
@@ -94,14 +96,13 @@ class TestConformalClsTHR:
     """Testing the ConformalClsTHR class."""
 
     def test_main(self):
-        conformal = ConformalClsTHR(alpha=0.1, model=None, init_val=2)
+        conformal = ConformalClsTHR(alpha=0.1, model=None, ts_init_val=2)
 
         assert conformal.temperature == 2.0
 
         conformal.set_model(nn.Identity())
 
-        assert isinstance(conformal.model, nn.Identity)
-        assert isinstance(conformal.temperature_scaler.model, nn.Identity)
+        assert isinstance(conformal.model.model, nn.Identity)
 
     def test_fit(self):
         inputs = repeat(torch.tensor([0.6, 0.3, 0.1]), "c -> b c", b=10)
@@ -110,7 +111,9 @@ def test_fit(self):
         calibration_set = list(zip(inputs, labels, strict=True))
         dl = DataLoader(calibration_set, batch_size=10)
 
-        conformal = ConformalClsTHR(alpha=0.1, model=nn.Identity(), init_val=2, lr=1, max_iter=10)
+        conformal = ConformalClsTHR(
+            alpha=0.1, model=nn.Identity(), ts_init_val=2, ts_lr=1, ts_max_iter=10
+        )
         conformal.fit(dl)
         out = conformal.conformal(inputs)
         assert out.shape == (10, 3)
@@ -119,7 +122,7 @@ def test_fit(self):
         ).all()
 
     def test_failures(self):
-        with pytest.raises(ValueError):
+        with pytest.raises(RuntimeError):
             _ = ConformalClsTHR(
                 alpha=0.1,
             ).quantile
diff --git a/torch_uncertainty/post_processing/conformal/__init__.py b/torch_uncertainty/post_processing/conformal/__init__.py
index 203cb6b6..1e77e673 100644
--- a/torch_uncertainty/post_processing/conformal/__init__.py
+++ b/torch_uncertainty/post_processing/conformal/__init__.py
@@ -1,5 +1,5 @@
 # ruff: noqa: F401
 from .abstract import Conformal
-from .conformal_aps import ConformalClsAPS
-from .conformal_raps import ConformalClsRAPS
-from .conformal_thr import ConformalClsTHR
+from .aps import ConformalClsAPS
+from .raps import ConformalClsRAPS
+from .thr import ConformalClsTHR
diff --git a/torch_uncertainty/post_processing/conformal/abstract.py b/torch_uncertainty/post_processing/conformal/abstract.py
index ab58fd92..99c900fe 100644
--- a/torch_uncertainty/post_processing/conformal/abstract.py
+++ b/torch_uncertainty/post_processing/conformal/abstract.py
@@ -1,13 +1,68 @@
 from abc import abstractmethod
+from typing import Literal
 
-from torch import Tensor
+import torch
+from torch import Tensor, nn
 
+from torch_uncertainty.post_processing import TemperatureScaler
 from torch_uncertainty.post_processing.abstract import PostProcessing
 
 
 class Conformal(PostProcessing):
+    """Conformal base class."""
+
+    q_hat: float = None
+
+    def __init__(
+        self,
+        alpha: float,
+        model: nn.Module | None,
+        ts_init_val: float,
+        ts_lr: float,
+        ts_max_iter: int,
+        enable_ts: bool,
+        device: Literal["cpu", "cuda"] | torch.device | None,
+    ) -> None:
+        super().__init__(model=model)
+        self.alpha = alpha
+        self.enable_ts = enable_ts
+        if enable_ts:
+            self.model = TemperatureScaler(
+                model=model,
+                init_val=ts_init_val,
+                lr=ts_lr,
+                max_iter=ts_max_iter,
+                device=device,
+            )
+        else:
+            self.model = model
+        self.device = device or "cpu"
+
+    def set_model(self, model: nn.Module | None) -> None:
+        if self.enable_ts:
+            self.model.set_model(model=model.eval())
+        else:
+            self.model = model
+
+    def model_forward(self, inputs: Tensor) -> Tensor:
+        """Apply the model and return the scores."""
+        self.model.eval()
+        return self.model(inputs.to(self.device)).softmax(-1)
+
     @abstractmethod
     def conformal(self, inputs: Tensor) -> Tensor: ...
 
     def forward(self, inputs: Tensor) -> Tensor:
         return self.conformal(inputs)
+
+    @property
+    def quantile(self) -> Tensor:
+        if self.q_hat is None:
+            raise RuntimeError("Quantile q_hat is not set. Run `.fit()` first.")
+        return self.q_hat
+
+    @property
+    def temperature(self) -> float:
+        if self.enable_ts:
+            return self.model.temperature[0].item()
+        raise RuntimeError("Cannot return temperature when enable_ts is False.")
diff --git a/torch_uncertainty/post_processing/conformal/aps.py b/torch_uncertainty/post_processing/conformal/aps.py
new file mode 100644
index 00000000..62b5df55
--- /dev/null
+++ b/torch_uncertainty/post_processing/conformal/aps.py
@@ -0,0 +1,109 @@
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+from torch.utils.data import DataLoader
+
+from .abstract import Conformal
+
+
+class ConformalClsAPS(Conformal):
+    def __init__(
+        self,
+        alpha: float,
+        model: nn.Module | None = None,
+        randomized: bool = True,
+        ts_init_val: float = 1,
+        ts_lr: float = 0.1,
+        ts_max_iter: int = 100,
+        enable_ts: bool = True,
+        device: Literal["cpu", "cuda"] | torch.device | None = None,
+    ) -> None:
+        r"""Conformal prediction with APS scores.
+
+        Args:
+            alpha (float): The confidence level meaning we allow :math:`1-\alpha` error.
+            model (nn.Module): Trained classification model. Defaults to ``None``.
+            randomized (bool): Whether to use randomized smoothing in APS. Defaults to ``True``.
+            ts_init_val (float, optional): Initial value for the temperature.
+                Defaults to ``1.0``.
+            ts_lr (float, optional): Learning rate for the optimizer. Defaults to ``0.1``.
+            ts_max_iter (int, optional): Maximum number of iterations for the
+                optimizer. Defaults to ``100``.
+            enable_ts (bool): Whether to scale the logits. Defaults to ``True``.
+            device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
+                Defaults to ``None``.
+
+        Reference:
+            - TODO:
+
+        Code inspired by TorchCP.
+        """
+        super().__init__(
+            alpha=alpha,
+            model=model,
+            ts_init_val=ts_init_val,
+            ts_lr=ts_lr,
+            ts_max_iter=ts_max_iter,
+            enable_ts=enable_ts,
+            device=device,
+        )
+        self.randomized = randomized
+
+    def model_forward(self, inputs: Tensor) -> Tensor:
+        """Apply the model and return the scores."""
+        self.model.eval()
+        return self.model(inputs.to(self.device)).softmax(-1)
+
+    def _sort_sum(self, probs: Tensor) -> tuple[Tensor, Tensor, Tensor]:
+        """Sort probabilities and compute cumulative sums."""
+        ordered, indices = torch.sort(probs, dim=-1, descending=True)
+        cumsum = torch.cumsum(ordered, dim=-1)
+        return indices, ordered, cumsum
+
+    def _calculate_all_labels(self, probs):
+        """Calculate APS scores for all labels."""
+        if probs.dim() == 1 or probs.dim() > 2:
+            raise ValueError("Input probabilities must be 2D.")
+        indices, ordered, cumsum = self._sort_sum(probs)
+        if self.randomized:
+            noise = torch.rand(probs.shape, device=probs.device)
+        else:
+            noise = torch.zeros_like(probs)
+
+        ordered_scores = cumsum - ordered * noise
+        _, sorted_indices = torch.sort(indices, descending=False, dim=-1)
+        return ordered_scores.gather(dim=-1, index=sorted_indices)
+
+    def _calculate_single_label(self, probs, label):
+        """Calculate APS score for a single label."""
+        indices, ordered, cumsum = self._sort_sum(probs)
+        if self.randomized:
+            noise = torch.rand(indices.shape[0], device=probs.device)
+        else:
+            noise = torch.zeros(indices.shape[0], device=probs.device)
+
+        idx = torch.where(indices == label.view(-1, 1))
+        return cumsum[idx] - noise * ordered[idx]
+
+    @torch.no_grad()
+    def fit(self, dataloader: DataLoader) -> None:
+        """Calibrate the APS threshold q_hat on a calibration set."""
+        if self.enable_ts:
+            self.model.fit(dataloader=dataloader)
+
+        aps_scores = []
+        for images, labels in dataloader:
+            images, labels = images.to(self.device), labels.to(self.device)
+            probs = self.model_forward(images)
+            scores = self._calculate_single_label(probs, labels)
+            aps_scores.append(scores)
+
+        self.q_hat = torch.quantile(torch.cat(aps_scores), 1 - self.alpha).item()
+
+    @torch.no_grad()
+    def conformal(self, inputs: Tensor) -> Tensor:
+        """Compute the prediction set for each input."""
+        probs = self.model_forward(inputs)
+        pred_set = self._calculate_all_labels(probs) <= self.quantile
+        return pred_set.float() / pred_set.sum(dim=1, keepdim=True)
diff --git a/torch_uncertainty/post_processing/conformal/conformal_aps.py b/torch_uncertainty/post_processing/conformal/conformal_aps.py
deleted file mode 100644
index 19fe2dd9..00000000
--- a/torch_uncertainty/post_processing/conformal/conformal_aps.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import logging
-from functools import partial
-from typing import Literal
-
-import torch
-from torch import Tensor, nn
-from torch.utils.data import DataLoader
-
-from .abstract import Conformal
-
-
-class ConformalClsAPS(Conformal):
-    def __init__(
-        self,
-        alpha: float,
-        model: nn.Module | None = None,
-        score_type: str = "softmax",
-        randomized: bool = True,
-        device: Literal["cpu", "cuda"] | torch.device | None = None,
-    ) -> None:
-        r"""Conformal prediction with APS scores.
-
-        Args:
-            alpha (float): The confidence level meaning we allow :math:`1-\alpha` error.
-            model (nn.Module): Trained classification model. Defaults to ``None``.
-            score_type (str): Type of score transformation. Only ``"softmax"`` is supported for now.
-                Defaults to ``"softmax"``.
-            randomized (bool): Whether to use randomized smoothing in APS. Defaults to ``True``.
-            device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
-                Defaults to ``None``.
-
-        Reference:
-            - TODO:
-        """
-        super().__init__(model=model)
-        self.randomized = randomized
-        self.alpha = alpha
-        self.device = device or "cpu"
-        self.q_hat = None
-
-        if score_type == "softmax":
-            self.transform = partial(torch.softmax, dim=-1)
-        else:
-            raise NotImplementedError("Only softmax is supported for now.")
-
-    def model_forward(self, inputs: Tensor) -> Tensor:
-        """Apply the model and return transformed scores (softmax)."""
-        if self.model is None or isinstance(self.model, nn.Identity):
-            logging.warning(
-                "model is None. Fitting post_processing method on the dataloader's data directly."
-            )
-            self.model = nn.Identity()
-        logits = self.model(inputs.to(self.device))
-        return self.transform(logits)
-
-    def _sort_sum(self, probs: Tensor):
-        """Sort probabilities and compute cumulative sums."""
-        ordered, indices = torch.sort(probs, dim=-1, descending=True)
-        cumsum = torch.cumsum(ordered, dim=-1)
-        return indices, ordered, cumsum
-
-    def _calculate_single_label(self, probs: Tensor, labels: Tensor):
-        """Compute APS score for the true label."""
-        indices, ordered, cumsum = self._sort_sum(probs)
-        if self.randomized:
-            u = torch.rand(indices.shape[0], device=probs.device)
-        else:
-            u = torch.zeros(indices.shape[0], device=probs.device)
-
-        scores = torch.zeros(probs.shape[0], device=probs.device)
-        for i in range(probs.shape[0]):
-            pos = (indices[i] == labels[i]).nonzero(as_tuple=False)
-            if pos.numel() == 0:
-                raise ValueError("True label not found.")
-            pos = pos[0].item()
-            scores[i] = cumsum[i, pos] - u[i] * ordered[i, pos]
-        return scores
-
-    def _calculate_all_labels(self, probs: Tensor):
-        """Compute APS scores for all labels."""
-        indices, ordered, cumsum = self._sort_sum(probs)
-        if self.randomized:
-            u = torch.rand(probs.shape, device=probs.device)
-        else:
-            u = torch.zeros_like(probs, device=probs.device)
-        ordered_scores = cumsum - ordered * u
-        sorted_indices = torch.sort(indices, descending=False, dim=-1).indices
-        return ordered_scores.gather(dim=-1, index=sorted_indices)
-
-    @torch.no_grad()
-    def fit(self, dataloader: DataLoader) -> None:
-        """Calibrate the APS threshold q_hat on a calibration set."""
-        self.model.eval()
-        aps_scores = []
-
-        for images, labels in dataloader:
-            images, labels = images.to(self.device), labels.to(self.device)
-            probs = self.model_forward(images)
-            scores = self._calculate_single_label(probs, labels)
-            aps_scores.append(scores)
-
-        aps_scores = torch.cat(aps_scores)
-        self.q_hat = torch.quantile(aps_scores, 1 - self.alpha)
-
-    @torch.no_grad()
-    def conformal(self, inputs: Tensor) -> Tensor:
-        """Compute the prediction set for each input."""
-        self.model.eval()
-        probs = self.model_forward(inputs.to(self.device))
-        all_scores = self._calculate_all_labels(probs)
-        pred_set = all_scores <= self.quantile
-        confidence_score = 1 / pred_set.sum(dim=1, keepdim=True)
-        return pred_set.float() * confidence_score
-
-    @property
-    def quantile(self) -> Tensor:
-        if self.q_hat is None:
-            raise ValueError("Quantile q_hat is not set. Run `.fit()` first.")
-        return self.q_hat.detach()
diff --git a/torch_uncertainty/post_processing/conformal/conformal_raps.py b/torch_uncertainty/post_processing/conformal/conformal_raps.py
deleted file mode 100644
index 32749a09..00000000
--- a/torch_uncertainty/post_processing/conformal/conformal_raps.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import logging
-from functools import partial
-from typing import Literal
-
-import torch
-from torch import Tensor, nn
-from torch.utils.data import DataLoader
-
-from .abstract import Conformal
-
-
-class ConformalClsRAPS(Conformal):
-    def __init__(
-        self,
-        alpha: float,
-        model: nn.Module | None = None,
-        score_type: str = "softmax",
-        randomized: bool = True,
-        penalty: float = 0.1,
-        k_reg: int = 1,
-        device: Literal["cpu", "cuda"] | torch.device | None = None,
-    ) -> None:
-        r"""Conformal prediction with RAPS scores.
-
-        Args:
-            alpha (float): The confidence level meaning we allow :math:`1-\alpha` error.
-            model (nn.Module): Trained classification model. Defaults to ``None``.
-            score_type (str): Type of score transformation. Only ``"softmax"`` is supported for now.
-                Defaults to ``"softmax"``.
-            randomized (bool): Whether to use randomized smoothing in RAPS. Defaults to ``True``.
-            penalty (float): Regularization weight. Defaults to ``0.1``.
-            k_reg (int): Rank threshold for regularization. Defaults to ``1``.
-            device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
-                Defaults to ``None``.
-
-        Reference:
-            - TODO:
-        """
-        super().__init__(model=model)
-        self.alpha = alpha
-        self.score_type = score_type
-        self.randomized = randomized
-        self.penalty = penalty
-        self.k_reg = k_reg
-        self.q_hat = None
-        self.device = device or "cpu"
-
-        if self.score_type == "softmax":
-            self.transform = partial(torch.softmax, dim=-1)
-        else:
-            raise NotImplementedError("Only softmax is supported for now.")
-
-    def model_forward(self, inputs: Tensor) -> Tensor:
-        """Apply the model and return transformed scores (softmax)."""
-        if self.model is None or isinstance(self.model, nn.Identity):
-            logging.warning(
-                "model is None. Fitting post_processing method on the dataloader's data directly."
-            )
-            self.model = nn.Identity()
-        logits = self.model(inputs)
-        return self.transform(logits)
-
-    def _sort_sum(self, probs: Tensor):
-        """Sort probabilities and compute cumulative sums."""
-        ordered, indices = torch.sort(probs, dim=-1, descending=True)
-        cumsum = torch.cumsum(ordered, dim=-1)
-        return indices, ordered, cumsum
-
-    def _calculate_single_label(self, probs: Tensor, labels: Tensor) -> Tensor:
-        """Compute RAPS score for the true label."""
-        indices, ordered, cumsum = self._sort_sum(probs)
-        batch_size = probs.shape[0]
-
-        if self.randomized:
-            noise = torch.rand(batch_size, device=probs.device)
-        else:
-            noise = torch.zeros(batch_size, device=probs.device)
-
-        scores = torch.zeros(batch_size, device=probs.device)
-        for i in range(batch_size):
-            pos_tensor = (indices[i] == labels[i]).nonzero(as_tuple=False)
-            if pos_tensor.numel() == 0:
-                raise ValueError("True label not found.")
-            pos = pos_tensor[0].item()
-
-            reg = max(self.penalty * ((pos + 1) - self.k_reg), 0)
-            scores[i] = cumsum[i, pos] - ordered[i, pos] * noise[i] + reg
-        return scores
-
-    def _calculate_all_labels(self, probs: Tensor) -> Tensor:
-        """Compute RAPS scores for all labels."""
-        indices, ordered, cumsum = self._sort_sum(probs)
-        num_classes = probs.shape[1]
-        noise = torch.rand_like(probs) if self.randomized else torch.zeros_like(probs)
-        ranks = torch.arange(1, num_classes + 1, device=probs.device, dtype=torch.float)
-        penalty_vector = self.penalty * (ranks - self.k_reg)
-        penalty_vector = torch.clamp(penalty_vector, min=0)
-        penalty_matrix = penalty_vector.unsqueeze(0).expand_as(ordered)
-        modified_scores = cumsum - ordered * noise + penalty_matrix
-
-        # Reorder scores back to original label order
-        reordered_scores = torch.empty_like(modified_scores)
-        reordered_scores.scatter_(dim=-1, index=indices, src=modified_scores)
-        return reordered_scores
-
-    @torch.no_grad()
-    def fit(self, dataloader: DataLoader) -> None:
-        """Calibrate the RAPS threshold q_hat on a calibration set."""
-        self.model.eval()
-        raps_scores = []
-
-        for images, labels in dataloader:
-            images, labels = images.to(self.device), labels.to(self.device)
-            probs = self.model_forward(images)
-            scores = self._calculate_single_label(probs, labels)
-            raps_scores.append(scores)
-
-        raps_scores = torch.cat(raps_scores)
-        self.q_hat = torch.quantile(raps_scores, 1 - self.alpha)
-
-    @torch.no_grad()
-    def conformal(self, inputs: Tensor) -> Tensor:
-        """Compute the prediction set for each input.
-
-        Returns:
-            Tensor: Uniform prediction over the predicted set size (B, C).
-        """
-        self.model.eval()
-        probs = self.model_forward(inputs.to(self.device))
-        pred_set = self._calculate_all_labels(probs) <= self.quantile
-        confidence_score = 1 / pred_set.sum(dim=1, keepdim=True)
-        return pred_set.float() * confidence_score
-
-    @property
-    def quantile(self) -> Tensor:
-        if self.q_hat is None:
-            raise ValueError("Quantile q_hat is not set. Run `.fit()` first.")
-        return self.q_hat.detach()
diff --git a/torch_uncertainty/post_processing/conformal/conformal_thr.py b/torch_uncertainty/post_processing/conformal/conformal_thr.py
deleted file mode 100644
index 07ffed40..00000000
--- a/torch_uncertainty/post_processing/conformal/conformal_thr.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from typing import Literal
-
-import torch
-from torch import Tensor, nn
-from torch.utils.data import DataLoader
-
-from torch_uncertainty.post_processing import TemperatureScaler
-
-from .abstract import Conformal
-
-
-class ConformalClsTHR(Conformal):
-    def __init__(
-        self,
-        alpha: float,
-        model: nn.Module | None = None,
-        init_val: float = 1,
-        lr: float = 0.1,
-        max_iter: int = 100,
-        device: Literal["cpu", "cuda"] | torch.device | None = None,
-    ) -> None:
-        r"""Conformal prediction post-processing for calibrated models.
-
-        Args:
-            alpha (float): The confidence level, meaning we allow :math:`1-\alpha` error.
-            model (nn.Module, optional): Model to be calibrated. Defaults to ``None``.
-            init_val (float, optional): Initial value for the temperature.
-                Defaults to ``1``.
-            lr (float, optional): Learning rate for the optimizer. Defaults to ``0.1``.
-            max_iter (int, optional): Maximum number of iterations for the
-                optimizer. Defaults to ``100``.
-            device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
-                Defaults to ``None``.
-
-        Reference:
-            - `Least ambiguous set-valued classifiers with bounded error levels, Sadinle, M. et al., (2016) <https://arxiv.org/abs/1609.00451>`_.
-        """
-        super().__init__(model=model)
-        self.alpha = alpha
-        self.temperature_scaler = TemperatureScaler(
-            model=model,
-            init_val=init_val,
-            lr=lr,
-            max_iter=max_iter,
-            device=device,
-        )
-        self.device = device or "cpu"
-        self.q_hat = None  # Will be set after calibration
-
-    def set_model(self, model: nn.Module) -> None:
-        self.model = model.eval()
-        self.temperature_scaler.set_model(model=model)
-
-    def model_forward(self, inputs: Tensor) -> Tensor:
-        """Apply temperature scaling."""
-        return self.temperature_scaler(inputs.to(self.device))
-
-    def fit(self, dataloader: DataLoader) -> None:
-        self.temperature_scaler.fit(dataloader=dataloader)
-        logits_list = []
-        labels_list = []
-        with torch.no_grad():
-            for images, labels in dataloader:
-                images, labels = images.to(self.device), labels.to(self.device)
-                scaled_logits = self.model_forward(images)
-                logits_list.append(scaled_logits)
-                labels_list.append(labels)
-
-        probs = torch.cat(logits_list).softmax(-1)
-        labels = torch.cat(labels_list).long()
-        true_class_probs = probs.gather(1, labels.unsqueeze(1)).squeeze(1)
-        scores = 1.0 - true_class_probs  # scores are (1 - true prob)
-        self.q_hat = torch.quantile(scores, 1.0 - self.alpha)
-
-    @torch.no_grad()
-    def conformal(self, inputs: Tensor) -> Tensor:
-        """Perform conformal prediction on the test set."""
-        self.model.eval()
-        probs = self.model_forward(inputs.to(self.device)).softmax(-1)
-        pred_set = probs >= 1.0 - self.quantile
-        top1 = torch.argmax(probs, dim=1, keepdim=True)
-        pred_set.scatter_(1, top1, True)  # Always include top-1 class
-        confidence_score = 1 / pred_set.sum(dim=1, keepdim=True)
-        return pred_set.float() * confidence_score
-
-    @property
-    def quantile(self) -> Tensor:
-        if self.q_hat is None:
-            raise ValueError("Quantile q_hat is not set. Run `.fit()` first.")
-        return self.q_hat
-
-    @property
-    def temperature(self) -> Tensor:
-        """Get the temperature parameter."""
-        return self.temperature_scaler.temperature[0].detach()
diff --git a/torch_uncertainty/post_processing/conformal/raps.py b/torch_uncertainty/post_processing/conformal/raps.py
new file mode 100644
index 00000000..065791e1
--- /dev/null
+++ b/torch_uncertainty/post_processing/conformal/raps.py
@@ -0,0 +1,96 @@
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+
+from .aps import ConformalClsAPS
+
+
+class ConformalClsRAPS(ConformalClsAPS):
+    def __init__(
+        self,
+        alpha: float,
+        model: nn.Module | None = None,
+        randomized: bool = True,
+        penalty: float = 0.1,
+        regularization_rank: int = 1,
+        ts_init_val: float = 1.0,
+        ts_lr: float = 0.1,
+        ts_max_iter: int = 100,
+        enable_ts: bool = True,
+        device: Literal["cpu", "cuda"] | torch.device | None = None,
+    ) -> None:
+        r"""Conformal prediction with RAPS scores.
+
+        Args:
+            alpha (float): The confidence level meaning we allow :math:`1-\alpha` error.
+            model (nn.Module): Trained classification model. Defaults to ``None``.
+            randomized (bool): Whether to use randomized smoothing in RAPS. Defaults to ``True``.
+            penalty (float): Regularization weight. Defaults to ``0.1``.
+            regularization_rank (int): Rank threshold for regularization. Defaults to ``1``.
+            ts_init_val (float, optional): Initial value for the temperature.
+                Defaults to ``1.0``.
+            ts_lr (float, optional): Learning rate for the optimizer. Defaults to ``0.1``.
+            ts_max_iter (int, optional): Maximum number of iterations for the
+                optimizer. Defaults to ``100``.
+            enable_ts (bool): Whether to scale the logits. Defaults to ``True``.
+            device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
+                Defaults to ``None``.
+
+        Reference:
+            - TODO:
+
+        Code inspired by TorchCP.
+        """
+        super().__init__(
+            alpha=alpha,
+            model=model,
+            randomized=randomized,
+            ts_init_val=ts_init_val,
+            ts_lr=ts_lr,
+            ts_max_iter=ts_max_iter,
+            enable_ts=enable_ts,
+            device=device,
+        )
+        if penalty < 0:
+            raise ValueError(f"penalty should be non-negative. Got {penalty}.")
+
+        if not isinstance(regularization_rank, int):
+            raise TypeError(f"regularization_rank should be an integer. Got {regularization_rank}.")
+
+        if regularization_rank < 0:
+            raise ValueError(
+                f"regularization_rank should be non-negative. Got {regularization_rank}."
+            )
+
+        self.penalty = penalty
+        self.regularization_rank = regularization_rank
+
+    def _calculate_all_labels(self, probs):
+        indices, ordered, cumsum = self._sort_sum(probs)
+        if self.randomized:
+            noise = torch.rand(probs.shape, device=probs.device)
+        else:
+            noise = torch.zeros_like(probs)
+        reg = torch.maximum(
+            self.penalty
+            * (
+                torch.arange(1, probs.shape[-1] + 1, device=probs.device) - self.regularization_rank
+            ),
+            torch.tensor(0, device=probs.device),
+        )
+        ordered_scores = cumsum - ordered * noise + reg
+        _, sorted_indices = torch.sort(indices, descending=False, dim=-1)
+        return ordered_scores.gather(dim=-1, index=sorted_indices)
+
+    def _calculate_single_label(self, probs: Tensor, label: Tensor) -> Tensor:
+        indices, ordered, cumsum = self._sort_sum(probs)
+        if self.randomized:
+            noise = torch.rand(indices.shape[0], device=probs.device)
+        else:
+            noise = torch.zeros(indices.shape[0], device=probs.device)
+        idx = torch.where(indices == label.view(-1, 1))
+        reg = torch.maximum(
+            self.penalty * (idx[1] + 1 - self.regularization_rank), torch.tensor(0).to(probs.device)
+        )
+        return cumsum[idx] - noise * ordered[idx] + reg
diff --git a/torch_uncertainty/post_processing/conformal/thr.py b/torch_uncertainty/post_processing/conformal/thr.py
new file mode 100644
index 00000000..abee746b
--- /dev/null
+++ b/torch_uncertainty/post_processing/conformal/thr.py
@@ -0,0 +1,75 @@
+from typing import Literal
+
+import torch
+from torch import Tensor, nn
+from torch.utils.data import DataLoader
+
+from .abstract import Conformal
+
+
+class ConformalClsTHR(Conformal):
+    def __init__(
+        self,
+        alpha: float,
+        model: nn.Module | None = None,
+        ts_init_val: float = 1.0,
+        ts_lr: float = 0.1,
+        ts_max_iter: int = 100,
+        enable_ts: bool = True,
+        device: Literal["cpu", "cuda"] | torch.device | None = None,
+    ) -> None:
+        r"""Conformal prediction post-processing for calibrated models.
+
+        Args:
+            alpha (float): The confidence level, meaning we allow :math:`1-\alpha` error.
+            model (nn.Module, optional): Model to be calibrated. Defaults to ``None``.
+            ts_init_val (float, optional): Initial value for the temperature.
+                Defaults to ``1.0``.
+            ts_lr (float, optional): Learning rate for the optimizer. Defaults to ``0.1``.
+            ts_max_iter (int, optional): Maximum number of iterations for the
+                optimizer. Defaults to ``100``.
+            enable_ts (bool): Whether to scale the logits. Defaults to ``True``.
+            device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
+                Defaults to ``None``.
+
+        Reference:
+            - `Least ambiguous set-valued classifiers with bounded error levels, Sadinle, M. et al., (2016) <https://arxiv.org/abs/1609.00451>`_.
+
+        Code inspired by TorchCP.
+        """
+        super().__init__(
+            alpha=alpha,
+            model=model,
+            ts_init_val=ts_init_val,
+            ts_lr=ts_lr,
+            ts_max_iter=ts_max_iter,
+            enable_ts=enable_ts,
+            device=device,
+        )
+
+    def fit(self, dataloader: DataLoader) -> None:
+        if self.enable_ts:
+            self.model.fit(dataloader=dataloader)
+
+        logit_list = []
+        label_list = []
+        with torch.no_grad():
+            for images, labels in dataloader:
+                images, labels = images.to(self.device), labels.to(self.device)
+                logit_list.append(self.model_forward(images))
+                label_list.append(labels)
+
+        probs = torch.cat(logit_list)
+        labels = torch.cat(label_list).long()
+        true_class_probs = probs.gather(1, labels.unsqueeze(1)).squeeze(1)
+        scores = 1.0 - true_class_probs
+        self.q_hat = torch.quantile(scores, 1.0 - self.alpha).item()
+
+    @torch.no_grad()
+    def conformal(self, inputs: Tensor) -> Tensor:
+        """Perform conformal prediction on the test set."""
+        probs = self.model_forward(inputs)
+        pred_set = probs >= 1.0 - self.quantile
+        top1 = torch.argmax(probs, dim=1, keepdim=True)
+        pred_set.scatter_(1, top1, True)  # Always include top-1 class
+        return pred_set.float() / pred_set.sum(dim=1, keepdim=True)

From 9307ec04b818d28c4915991442c4ea94255c8c12 Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 18:00:50 +0200
Subject: [PATCH 05/11] :books: Slightly improve tutorials

---
 .../Bayesian_Methods/tutorial_bayesian.py     |  84 +++++----
 .../Bayesian_Methods/tutorial_muad_mc_drop.py |   2 +-
 .../Classification/tutorial_bayesian.py       | 174 ------------------
 .../tutorial_distribution_shift.py            |   4 +-
 .../tutorial_evidential_classification.py     |   2 +-
 .../Classification/tutorial_ood_detection.py  |   4 +-
 .../tutorial_from_de_to_pe.py                 |   2 +-
 .../Post_Hoc_Methods/tutorial_conformal.py    |  50 ++---
 .../Post_Hoc_Methods/tutorial_scaler.py       |   6 +-
 .../Regression/tutorial_der_cubic.py          |   2 +
 .../Segmentation/tutorial_muad_deep_en.py     |   6 +-
 .../Segmentation/tutorial_muad_packed.py      |   6 +-
 .../Segmentation/tutorial_muad_seg.py         |   8 +-
 13 files changed, 101 insertions(+), 249 deletions(-)
 delete mode 100644 auto_tutorial_source/Classification/tutorial_bayesian.py

diff --git a/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py b/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
index d2cce3ae..5ded08e2 100644
--- a/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
+++ b/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
@@ -1,36 +1,37 @@
 # ruff: noqa: E402, E703, D212, D415, T201
 """
-Train a Bayesian Neural Network in Three Minutes
+Training a Bayesian Neural Network in 20 seconds
 ================================================
 
-In this tutorial, we will train a variational inference Bayesian Neural Network (BNN) LeNet classifier on the MNIST dataset.
+In this tutorial, we will train a variational inference Bayesian Neural Network (viBNN) LeNet classifier on the MNIST dataset.
 
 Foreword on Bayesian Neural Networks
 ------------------------------------
 
 Bayesian Neural Networks (BNNs) are a class of neural networks that estimate the uncertainty on their predictions via uncertainty
 on their weights. This is achieved by considering the weights of the neural network as random variables, and by learning their
-posterior distribution. This is in contrast to standard neural networks, which only learn a single set of weights, which can be
-seen as Dirac distributions on the weights.
+posterior distribution. This is in contrast to standard neural networks, which only learn a single set of weights (this can be
+seen as Dirac distributions on the weights).
 
-For more information on Bayesian Neural Networks, we refer the reader to the following resources:
+For more information on Bayesian Neural Networks, we refer to the following resources:
 
 - Weight Uncertainty in Neural Networks `ICML2015 <https://arxiv.org/pdf/1505.05424.pdf>`_
-- Hands-on Bayesian Neural Networks - a Tutorial for Deep Learning Users `IEEE Computational Intelligence Magazine <https://arxiv.org/pdf/2007.06823.pdf>`_
+- Hands-on Bayesian Neural Networks - a Tutorial for Deep Learning Users `IEEE Computational Intelligence Magazine
+    <https://arxiv.org/pdf/2007.06823.pdf>`_
 
 Training a Bayesian LeNet using TorchUncertainty models and Lightning
 ---------------------------------------------------------------------
 
-In this part, we train a Bayesian LeNet, based on the model and routines already implemented in TU.
+In this first part, we train a Bayesian LeNet, based on the model and routines already implemented in TU.
 
 1. Loading the utilities
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 To train a BNN using TorchUncertainty, we have to load the following modules:
 
-- our TUTrainer
-- the model: bayesian_lenet, which lies in the torch_uncertainty.model
-- the classification training routine from torch_uncertainty.routines
+- our TUTrainer to improve the display of our metrics
+- the model: bayesian_lenet, which lies in the torch_uncertainty.model.classification.lenet module
+- the classification training routine from torch_uncertainty.routines module
 - the Bayesian objective: the ELBOLoss, which lies in the torch_uncertainty.losses file
 - the datamodule that handles dataloaders: MNISTDataModule from torch_uncertainty.datamodules
 
@@ -46,23 +47,28 @@
 from torch_uncertainty import TUTrainer
 from torch_uncertainty.datamodules import MNISTDataModule
 from torch_uncertainty.losses import ELBOLoss
-from torch_uncertainty.models.classification import bayesian_lenet
+from torch_uncertainty.models.classification.lenet import bayesian_lenet
 from torch_uncertainty.routines import ClassificationRoutine
 
+# We also define the main hyperparameters, with just one epoch for the sake of time
+BATCH_SIZE = 512
+MAX_EPOCHS = 2
+
 # %%
 # 2. Creating the necessary variables
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # In the following, we instantiate our trainer, define the root of the datasets and the logs.
 # We also create the datamodule that handles the MNIST dataset, dataloaders and transforms.
-# Please note that the datamodules can also handle OOD detection by setting the eval_ood
-# parameter to True. Finally, we create the model using the blueprint from torch_uncertainty.models.
+# Please note that the datamodules can also handle OOD detection by setting the `eval_ood`
+# parameter to True, as well as distribution shift with `eval_shift`.
+# Finally, we create the model using the blueprint from torch_uncertainty.models.
 
-trainer = TUTrainer(accelerator="gpu", devices=1, enable_progress_bar=False, max_epochs=1)
+trainer = TUTrainer(accelerator="gpu", devices=1, enable_progress_bar=False, max_epochs=MAX_EPOCHS)
 
 # datamodule
 root = Path("data")
-datamodule = MNISTDataModule(root=root, batch_size=128, eval_ood=False)
+datamodule = MNISTDataModule(root=root, batch_size=BATCH_SIZE, num_workers=8)
 
 # model
 model = bayesian_lenet(datamodule.num_channels, datamodule.num_classes)
@@ -70,15 +76,14 @@
 # %%
 # 3. The Loss and the Training Routine
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# Then, we just have to define the loss to be used during training. To do this,
-# we redefine the default parameters from the ELBO loss using the partial
-# function from functools. We use the hyperparameters proposed in the blitz
-# library. As we are train a classification model, we use the CrossEntropyLoss
-# as the likelihood.
-# We then define the training routine using the classification training routine
-# from torch_uncertainty.classification. We provide the model, the ELBO
+#
+# Then, we just define the loss to be used during training, which is a bit special and called
+# the evidence lower bound. We use the hyperparameters proposed in the blitz
+# library. As we are training a classification model, we use the CrossEntropyLoss
+# as the negative log likelihood. We then define the training routine using the classification
+# training routine from torch_uncertainty.classification. We provide the model, the ELBO
 # loss and the optimizer to the routine.
-# We will use the Adam optimizer with the default learning rate of 0.001.
+# We use an Adam optimizer with a learning rate of 0.02.
 
 loss = ELBOLoss(
     model=model,
@@ -91,10 +96,7 @@
     model=model,
     num_classes=datamodule.num_classes,
     loss=loss,
-    optim_recipe=optim.Adam(
-        model.parameters(),
-        lr=1e-3,
-    ),
+    optim_recipe=optim.Adam(model.parameters(), lr=2e-2),
     is_ensemble=True,
 )
 
@@ -105,25 +107,26 @@
 # Now that we have prepared all of this, we just have to gather everything in
 # the main function and to train the model using our wrapper of Lightning Trainer.
 # Specifically, it needs the routine, that includes the model as well as the
-# training/eval logic and the datamodule
+# training/eval logic and the datamodule.
 # The dataset will be downloaded automatically in the root/data folder, and the
 # logs will be saved in the root/logs folder.
 
 trainer.fit(model=routine, datamodule=datamodule)
 trainer.test(model=routine, datamodule=datamodule)
-
 # %%
 # 5. Testing the Model
 # ~~~~~~~~~~~~~~~~~~~~
 #
 # Now that the model is trained, let's test it on MNIST.
 # Please note that we apply a reshape to the logits to determine the dimension corresponding to the ensemble
-# and to the batch. As for TorchUncertainty 0.2.0, the ensemble dimension is merged with the batch dimension
+# and to the batch. As for TorchUncertainty 0.5.0, the ensemble dimension is merged with the batch dimension
 # in this order (num_estimator x batch, classes).
+
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torchvision
+from einops import rearrange
 
 
 def imshow(img) -> None:
@@ -134,32 +137,33 @@ def imshow(img) -> None:
     plt.show()
 
 
-dataiter = iter(datamodule.val_dataloader())
-images, labels = next(dataiter)
+images, labels = next(iter(datamodule.val_dataloader()))
 
 # print images
 imshow(torchvision.utils.make_grid(images[:4, ...]))
 print("Ground truth: ", " ".join(f"{labels[j]}" for j in range(4)))
 
 # Put the model in eval mode to use several samples
-model = model.eval()
-logits = model(images).reshape(16, 128, 10)  # num_estimators, batch_size, num_classes
+model = routine.eval()
+logits = routine(images[:4, ...])
+print("Output logit shape (Num predictions x Batch) x Classes: ", logits.shape)
+logits = rearrange(logits, "(m b) c -> b m c", b=4)  # batch_size, num_estimators, num_classes
 
-# We apply the softmax on the classes and average over the estimators
+# We apply the softmax on the classes then average over the estimators
 probs = torch.nn.functional.softmax(logits, dim=-1)
-avg_probs = probs.mean(dim=0)
-var_probs = probs.std(dim=0)
+avg_probs = probs.mean(dim=1)
+var_probs = probs.std(dim=1)
 
-_, predicted = torch.max(avg_probs, 1)
+predicted = torch.argmax(avg_probs, -1)
 
 print("Predicted digits: ", " ".join(f"{predicted[j]}" for j in range(4)))
 print(
     "Std. dev. of the scores over the posterior samples",
-    " ".join(f"{var_probs[j][predicted[j]]:.3}" for j in range(4)),
+    " ".join(f"{var_probs[j][predicted[j]]:.3f}" for j in range(4)),
 )
 # %%
 # Here, we show the variance of the top prediction. This is a non-standard but intuitive way to show the diversity of the predictions
-# of the ensemble. Ideally, the variance should be high when the average top prediction is incorrect.
+# of the ensemble. Ideally, the variance should be high when the prediction is incorrect.
 #
 # References
 # ----------
diff --git a/auto_tutorial_source/Bayesian_Methods/tutorial_muad_mc_drop.py b/auto_tutorial_source/Bayesian_Methods/tutorial_muad_mc_drop.py
index 89109f60..89da40ab 100644
--- a/auto_tutorial_source/Bayesian_Methods/tutorial_muad_mc_drop.py
+++ b/auto_tutorial_source/Bayesian_Methods/tutorial_muad_mc_drop.py
@@ -212,7 +212,7 @@ def enet_weighing(dataloader, num_classes, c=1.02):
     optim_recipe={"optimizer": optimizer, "lr_scheduler": lr_updater},
 )
 
-trainer = TUTrainer(accelerator="gpu", devices=1, max_epochs=NB_EPOCHS, enable_progress_bar=True)
+trainer = TUTrainer(accelerator="gpu", devices=1, max_epochs=NB_EPOCHS, enable_progress_bar=False)
 # %%
 # 6. Training the model
 # ~~~~~~~~~~~~~~~~~~~~~
diff --git a/auto_tutorial_source/Classification/tutorial_bayesian.py b/auto_tutorial_source/Classification/tutorial_bayesian.py
deleted file mode 100644
index be5bbdda..00000000
--- a/auto_tutorial_source/Classification/tutorial_bayesian.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# ruff: noqa: E402, E703, D212, D415, T201
-"""
-Train a Bayesian Neural Network in 20 seconds
-=============================================
-
-In this tutorial, we will train a variational inference Bayesian Neural Network (viBNN) LeNet classifier on the MNIST dataset.
-
-Foreword on Bayesian Neural Networks
-------------------------------------
-
-Bayesian Neural Networks (BNNs) are a class of neural networks that estimate the uncertainty on their predictions via uncertainty
-on their weights. This is achieved by considering the weights of the neural network as random variables, and by learning their
-posterior distribution. This is in contrast to standard neural networks, which only learn a single set of weights (this can be
-seen as Dirac distributions on the weights).
-
-For more information on Bayesian Neural Networks, we refer to the following resources:
-
-- Weight Uncertainty in Neural Networks `ICML2015 <https://arxiv.org/pdf/1505.05424.pdf>`_
-- Hands-on Bayesian Neural Networks - a Tutorial for Deep Learning Users `IEEE Computational Intelligence Magazine
-    <https://arxiv.org/pdf/2007.06823.pdf>`_
-
-Training a Bayesian LeNet using TorchUncertainty models and Lightning
----------------------------------------------------------------------
-
-In this first part, we train a Bayesian LeNet, based on the model and routines already implemented in TU.
-
-1. Loading the utilities
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-To train a BNN using TorchUncertainty, we have to load the following modules:
-
-- our TUTrainer
-- the model: bayesian_lenet, which lies in the torch_uncertainty.model.classification.lenet module
-- the classification training routine from torch_uncertainty.routines module
-- the Bayesian objective: the ELBOLoss, which lies in the torch_uncertainty.losses file
-- the datamodule that handles dataloaders: MNISTDataModule from torch_uncertainty.datamodules
-
-We will also need to define an optimizer using torch.optim and Pytorch's
-neural network utils from torch.nn.
-"""
-
-# %%
-from pathlib import Path
-
-from torch import nn, optim
-
-from torch_uncertainty import TUTrainer
-from torch_uncertainty.datamodules import MNISTDataModule
-from torch_uncertainty.losses import ELBOLoss
-from torch_uncertainty.models.classification.lenet import bayesian_lenet
-from torch_uncertainty.routines import ClassificationRoutine
-
-# We also define the main hyperparameters, with just one epoch for the sake of time
-BATCH_SIZE = 512
-MAX_EPOCHS = 1
-
-# %%
-# 2. Creating the necessary variables
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# In the following, we instantiate our trainer, define the root of the datasets and the logs.
-# We also create the datamodule that handles the MNIST dataset, dataloaders and transforms.
-# Please note that the datamodules can also handle OOD detection by setting the `eval_ood`
-# parameter to True, as well as distribution shift with `eval_shift`.
-# Finally, we create the model using the blueprint from torch_uncertainty.models.
-
-trainer = TUTrainer(accelerator="gpu", devices=1, enable_progress_bar=False, max_epochs=MAX_EPOCHS)
-
-# datamodule
-root = Path("data")
-datamodule = MNISTDataModule(root=root, batch_size=BATCH_SIZE, eval_ood=False, num_workers=8)
-
-# model
-model = bayesian_lenet(datamodule.num_channels, datamodule.num_classes)
-
-# %%
-# 3. The Loss and the Training Routine
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# Then, we just define the loss to be used during training, which is a bit special and called
-# the evidence lower bound. We use the hyperparameters proposed in the blitz
-# library. As we are training a classification model, we use the CrossEntropyLoss
-# as the negative log likelihood. We then define the training routine using the classification
-# training routine from torch_uncertainty.classification. We provide the model, the ELBO
-# loss and the optimizer to the routine.
-# We use an Adam optimizer with a learning rate of 0.02.
-
-loss = ELBOLoss(
-    model=model,
-    inner_loss=nn.CrossEntropyLoss(),
-    kl_weight=1 / 10000,
-    num_samples=3,
-)
-
-routine = ClassificationRoutine(
-    model=model,
-    num_classes=datamodule.num_classes,
-    loss=loss,
-    optim_recipe=optim.Adam(model.parameters(), lr=2e-2),
-    is_ensemble=True,
-)
-
-# %%
-# 4. Gathering Everything and Training the Model
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# Now that we have prepared all of this, we just have to gather everything in
-# the main function and to train the model using our wrapper of Lightning Trainer.
-# Specifically, it needs the routine, that includes the model as well as the
-# training/eval logic and the datamodule.
-# The dataset will be downloaded automatically in the root/data folder, and the
-# logs will be saved in the root/logs folder.
-
-trainer.fit(model=routine, datamodule=datamodule)
-trainer.test(model=routine, datamodule=datamodule)
-# %%
-# 5. Testing the Model
-# ~~~~~~~~~~~~~~~~~~~~
-#
-# Now that the model is trained, let's test it on MNIST.
-# Please note that we apply a reshape to the logits to determine the dimension corresponding to the ensemble
-# and to the batch. As for TorchUncertainty 0.5.0, the ensemble dimension is merged with the batch dimension
-# in this order (num_estimator x batch, classes).
-
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-import torchvision
-from einops import rearrange
-
-
-def imshow(img) -> None:
-    npimg = img.numpy()
-    plt.imshow(np.transpose(npimg, (1, 2, 0)))
-    plt.axis("off")
-    plt.tight_layout()
-    plt.show()
-
-
-images, labels = next(iter(datamodule.val_dataloader()))
-
-# print images
-imshow(torchvision.utils.make_grid(images[:4, ...]))
-print("Ground truth: ", " ".join(f"{labels[j]}" for j in range(4)))
-
-# Put the model in eval mode to use several samples
-model = routine.eval()
-logits = routine(images[:4, ...])
-print("Output logit shape (Num predictions x Batch) Classes: ", logits.shape)
-logits = rearrange(logits, "(m b) c -> b m c", b=4)  # batch_size, num_estimators, num_classes
-
-# We apply the softmax on the classes then average over the estimators
-probs = torch.nn.functional.softmax(logits, dim=-1)
-avg_probs = probs.mean(dim=1)
-var_probs = probs.std(dim=1)
-
-predicted = torch.argmax(avg_probs, -1)
-
-print("Predicted digits: ", " ".join(f"{predicted[j]}" for j in range(4)))
-print(
-    "Std. dev. of the scores over the posterior samples",
-    " ".join(f"{var_probs[j][predicted[j]]:.3f}" for j in range(4)),
-)
-# %%
-# Here, we show the variance of the top prediction. This is a non-standard but intuitive way to show the diversity of the predictions
-# of the ensemble. Ideally, the variance should be high when the prediction is incorrect.
-#
-# References
-# ----------
-#
-# - **LeNet & MNIST:** LeCun, Y., Bottou, L., Bengio, Y., & Haffner, P. (1998). Gradient-based learning applied to document recognition. `Proceedings of the IEEE <http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
-# - **Bayesian Neural Networks:** Blundell, C., Cornebise, J., Kavukcuoglu, K., & Wierstra, D. (2015). Weight Uncertainty in Neural Networks. `ICML 2015 <https://arxiv.org/pdf/1505.05424.pdf>`_.
-# - **The Adam optimizer:** Kingma, D. P., & Ba, J. (2014). "Adam: A method for stochastic optimization." `ICLR 2015 <https://arxiv.org/pdf/1412.6980.pdf>`_.
-# - **The Blitz** `library <https://github.com/piEsposito/blitz-bayesian-deep-learning>`_ (for the hyperparameters).
diff --git a/auto_tutorial_source/Classification/tutorial_distribution_shift.py b/auto_tutorial_source/Classification/tutorial_distribution_shift.py
index d99667c8..4421ee0f 100644
--- a/auto_tutorial_source/Classification/tutorial_distribution_shift.py
+++ b/auto_tutorial_source/Classification/tutorial_distribution_shift.py
@@ -107,7 +107,9 @@
 # We specify the maximum number of epochs, the precision and the device to be used.
 
 # Initialize the TUTrainer with a maximum of 10 epochs and the specified device
-trainer = TUTrainer(max_epochs=10, precision="16-mixed", accelerator="cuda", devices=1)
+trainer = TUTrainer(
+    max_epochs=10, precision="16-mixed", accelerator="cuda", devices=1, enable_progress_bar=False
+)
 
 # Begin training the model using the CIFAR-10 DataModule
 trainer.fit(routine, datamodule=datamodule)
diff --git a/auto_tutorial_source/Classification/tutorial_evidential_classification.py b/auto_tutorial_source/Classification/tutorial_evidential_classification.py
index 909ece08..372ced49 100644
--- a/auto_tutorial_source/Classification/tutorial_evidential_classification.py
+++ b/auto_tutorial_source/Classification/tutorial_evidential_classification.py
@@ -35,7 +35,7 @@
 from pathlib import Path
 
 import torch
-from torch import nn, optim
+from torch import optim
 
 from torch_uncertainty import TUTrainer
 from torch_uncertainty.datamodules import MNISTDataModule
diff --git a/auto_tutorial_source/Classification/tutorial_ood_detection.py b/auto_tutorial_source/Classification/tutorial_ood_detection.py
index ca3f750c..939fba4c 100644
--- a/auto_tutorial_source/Classification/tutorial_ood_detection.py
+++ b/auto_tutorial_source/Classification/tutorial_ood_detection.py
@@ -91,7 +91,9 @@
 # we will train for a single epoch and load a model from `TorchUncertainty's HuggingFace <https://huggingface.co/torch-uncertainty>`_.
 
 # Initialize the TUTrainer
-trainer = TUTrainer(max_epochs=1, precision="16-mixed", accelerator="cuda", devices=1)
+trainer = TUTrainer(
+    max_epochs=1, precision="16-mixed", accelerator="cuda", devices=1, enable_progress_bar=False
+)
 
 # Train the model for 1 epoch using the CIFAR-10 DataModule
 trainer.fit(routine, datamodule=datamodule)
diff --git a/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py b/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py
index 9f1219d5..8b433212 100644
--- a/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py
+++ b/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py
@@ -152,7 +152,7 @@ def optim_recipe(model, lr_mult: float = 1.0):
 from torch_uncertainty.routines import ClassificationRoutine
 
 # Create the trainer that will handle the training
-trainer = TUTrainer(accelerator="gpu", devices=1, max_epochs=MAX_EPOCHS)
+trainer = TUTrainer(accelerator="gpu", devices=1, max_epochs=MAX_EPOCHS, enable_progress_bar=False)
 
 # The routine is a wrapper of the model that contains the training logic with the metrics, etc
 routine = ClassificationRoutine(
diff --git a/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py b/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py
index 4b347885..748f7ccc 100644
--- a/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py
+++ b/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py
@@ -3,11 +3,9 @@
 Conformal Prediction on CIFAR-10 with TorchUncertainty
 ======================================================
 
-
 We evaluate the model's performance both before and after applying different conformal predictors (THR, APS, RAPS), and visualize how conformal prediction estimates the prediction sets.
 
-We use the pretrained ResNet models provided on Hugging Face.
-
+We use the pretrained ResNet models we provide on Hugging Face.
 """
 
 # %%
@@ -27,6 +25,7 @@
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # We use a ResNet18 model trained on CIFAR-10, provided by the TorchUncertainty team
+
 ckpt_path = hf_hub_download(repo_id="torch-uncertainty/resnet18_c10", filename="resnet18_c10.ckpt")
 model = resnet(in_channels=3, num_classes=10, arch=18, conv_bias=False, style="cifar")
 ckpt = torch.load(ckpt_path, weights_only=True)
@@ -34,12 +33,13 @@
 model = model.cuda().eval()
 
 # %%
-# 2. Load CIFAR-10 dataset & define dataloaders
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# 2. Load CIFAR-10 Dataset & Define Dataloaders
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # We set eval_ood to True to evaluate the performance of Conformal scores for detecting out-of-distribution
-# samples. We also use a validation split taken from the training set with 10% of the training images to fit
-# the conformal methods.
+# samples. In this case, since we use a model trained on the full training set, we use the test set to as calibration
+# set for the Conformal methods and for its evaluation. This is not a proper way to evaluate the coverage.
+
 BATCH_SIZE = 128
 
 datamodule = CIFAR10DataModule(
@@ -47,7 +47,7 @@
     batch_size=BATCH_SIZE,
     num_workers=8,
     eval_ood=True,
-    val_split=0.1,
+    postprocess_set="test",
 )
 datamodule.prepare_data()
 datamodule.setup()
@@ -56,12 +56,15 @@
 # %%
 # 3. Define the Lightning Trainer
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-trainer = TUTrainer(accelerator="gpu", devices=1, max_epochs=5)
+
+trainer = TUTrainer(accelerator="gpu", devices=1, max_epochs=5, enable_progress_bar=False)
 
 
 # %%
-# 4. Define a function to visualize the prediction sets
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# 4. Function to Visualize the Prediction Sets
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
 def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_examples=5):
     _, axs = plt.subplots(2, num_examples, figsize=(15, 5))
     for i in range(num_examples):
@@ -82,8 +85,11 @@ def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_ex
 
 
 # %%
-# 5. Estimate prediction sets with ConformalClsTHR
+# 5. Estimate Prediction Sets with ConformalClsTHR
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Using alpha=0.01, we aim for a 1% error rate.
+
 print("[Phase 2]: ConformalClsTHR calibration")
 conformal_model = ConformalClsTHR(alpha=0.01, device="cuda")
 
@@ -100,6 +106,7 @@ def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_ex
 # %%
 # 6. Visualization of ConformalClsTHR prediction sets
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 inputs, labels = next(iter(datamodule.test_dataloader()[0]))
 
 conformal_model.cuda()
@@ -110,13 +117,11 @@ def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_ex
 visualize_prediction_sets(inputs, labels, confidence_scores[:5].cpu(), classes)
 
 # %%
-# 7. Estimate prediction sets with ConformalClsAPS
+# 7. Estimate Prediction Sets with ConformalClsAPS
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 print("[Phase 3]: ConformalClsAPS calibration")
-conformal_model = ConformalClsAPS(
-    alpha=0.01,
-    device="cuda",
-)
+conformal_model = ConformalClsAPS(alpha=0.01, device="cuda", enable_ts=False)
 
 routine_aps = ClassificationRoutine(
     num_classes=10,
@@ -132,13 +137,12 @@ def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_ex
 visualize_prediction_sets(inputs, labels, confidence_scores[:5].cpu(), classes)
 
 # %%
-# 8. Estimate prediction sets with ConformalClsRAPS
+# 8. Estimate Prediction Sets with ConformalClsRAPS
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 print("[Phase 4]: ConformalClsRAPS calibration")
 conformal_model = ConformalClsRAPS(
-    alpha=0.01,
-    model=model,
-    device="cuda",
+    alpha=0.01, regularization_rank=3, penalty=0.002, model=model, device="cuda", enable_ts=False
 )
 
 routine_raps = ClassificationRoutine(
@@ -156,9 +160,9 @@ def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_ex
 
 # %%
 # Summary
-# ~~~~~~~
+# -------
+#
 # In this tutorial, we explored how to apply conformal prediction to a pretrained ResNet on CIFAR-10.
 # We evaluated three methods: Thresholding (THR), Adaptive Prediction Sets (APS), and Regularized APS (RAPS).
 # For each, we calibrated on a validation set, evaluated OOD performance, and visualized prediction sets.
-
 # You can explore further by adjusting `alpha`, changing the model, or testing on other datasets.
diff --git a/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py b/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
index a212f806..8c4f8b4a 100644
--- a/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
+++ b/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
@@ -49,7 +49,6 @@
 
 # Load the weights in the pre-built model
 model.load_state_dict(weights)
-
 # %%
 # 3. Setting up the Datamodule and Dataloaders
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -155,10 +154,9 @@
 # Temperature scaling is very efficient when the calibration set is representative of the test set.
 # In this case, we say that the calibration and test set are drawn from the same distribution.
 # However, this may not hold true in real-world cases where dataset shift could happen.
-
-# %%
+#
 # References
-# ~~~~~~~~~~
+# ----------
 #
 # - **Expected Calibration Error:** Naeini, M. P., Cooper, G. F., & Hauskrecht, M. (2015). Obtaining Well Calibrated Probabilities Using Bayesian Binning. In `AAAI 2015 <https://arxiv.org/pdf/1411.0160.pdf>`_.
 # - **Temperature Scaling:** Guo, C., Pleiss, G., Sun, Y., & Weinberger, K. Q. (2017). On calibration of modern neural networks. In `ICML 2017 <https://arxiv.org/pdf/1706.04599.pdf>`_.
diff --git a/auto_tutorial_source/Regression/tutorial_der_cubic.py b/auto_tutorial_source/Regression/tutorial_der_cubic.py
index 510b65b6..41158eba 100644
--- a/auto_tutorial_source/Regression/tutorial_der_cubic.py
+++ b/auto_tutorial_source/Regression/tutorial_der_cubic.py
@@ -96,6 +96,7 @@ def optim_regression(
 # %%
 # 4. The Loss and the Training Routine
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
 # Next, we need to define the loss to be used during training. To do this, we
 # set the weight of the regularizer of the DER Loss. After that, we define the
 # training routine using the probabilistic regression training routine from
@@ -115,6 +116,7 @@ def optim_regression(
 # %%
 # 5. Gathering Everything and Training the Model
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
 # Finally, we train the model using the trainer and the regression routine. We also
 # test the model using the same trainer
 
diff --git a/auto_tutorial_source/Segmentation/tutorial_muad_deep_en.py b/auto_tutorial_source/Segmentation/tutorial_muad_deep_en.py
index ef717c27..8f8764fd 100644
--- a/auto_tutorial_source/Segmentation/tutorial_muad_deep_en.py
+++ b/auto_tutorial_source/Segmentation/tutorial_muad_deep_en.py
@@ -220,7 +220,11 @@ def enet_weighing(dataloader, num_classes, c=1.02):
 )
 
 trainer = TUTrainer(
-    accelerator="gpu", devices=1, max_epochs=NB_EPOCHS, enable_progress_bar=True, precision=16
+    accelerator="gpu",
+    devices=1,
+    max_epochs=NB_EPOCHS,
+    enable_progress_bar=False,
+    precision="16-mixed",
 )
 
 # %%
diff --git a/auto_tutorial_source/Segmentation/tutorial_muad_packed.py b/auto_tutorial_source/Segmentation/tutorial_muad_packed.py
index 0907a57a..edb11635 100644
--- a/auto_tutorial_source/Segmentation/tutorial_muad_packed.py
+++ b/auto_tutorial_source/Segmentation/tutorial_muad_packed.py
@@ -213,7 +213,11 @@ def enet_weighing(dataloader, num_classes, c=1.02):
 )
 
 trainer = TUTrainer(
-    accelerator="gpu", devices=1, max_epochs=NB_EPOCHS, enable_progress_bar=True, precision=16
+    accelerator="gpu",
+    devices=1,
+    max_epochs=NB_EPOCHS,
+    enable_progress_bar=False,
+    precision="16-mixed",
 )
 
 # %%
diff --git a/auto_tutorial_source/Segmentation/tutorial_muad_seg.py b/auto_tutorial_source/Segmentation/tutorial_muad_seg.py
index f55c7973..3244766f 100644
--- a/auto_tutorial_source/Segmentation/tutorial_muad_seg.py
+++ b/auto_tutorial_source/Segmentation/tutorial_muad_seg.py
@@ -210,7 +210,13 @@ def enet_weighing(dataloader, num_classes, c=1.02):
     optim_recipe={"optimizer": optimizer, "lr_scheduler": lr_updater},
 )
 
-trainer = TUTrainer(accelerator="gpu", devices=1, max_epochs=NB_EPOCHS, enable_progress_bar=True)
+trainer = TUTrainer(
+    accelerator="gpu",
+    devices=1,
+    max_epochs=NB_EPOCHS,
+    enable_progress_bar=False,
+    precision="16-mixed",
+)
 # %%
 # 6. Training the model
 # ~~~~~~~~~~~~~~~~~~~~~

From 8d29c6b71aaf35aac7fd76c33bb8f1bfd612dcc9 Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 23:07:05 +0200
Subject: [PATCH 06/11] :zap: Update version

---
 auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py | 2 +-
 docs/source/conf.py                                        | 2 +-
 pyproject.toml                                             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py b/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
index 5ded08e2..ffb52c4e 100644
--- a/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
+++ b/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
@@ -119,7 +119,7 @@
 #
 # Now that the model is trained, let's test it on MNIST.
 # Please note that we apply a reshape to the logits to determine the dimension corresponding to the ensemble
-# and to the batch. As for TorchUncertainty 0.5.0, the ensemble dimension is merged with the batch dimension
+# and to the batch. As for TorchUncertainty 0.5.1, the ensemble dimension is merged with the batch dimension
 # in this order (num_estimator x batch, classes).
 
 import matplotlib.pyplot as plt
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 10d481ad..bf3a55bc 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -20,7 +20,7 @@
     f"{datetime.now().year!s}, Adrien Lafage and Olivier Laurent"
 )
 author = "Adrien Lafage and Olivier Laurent"
-release = "0.5.0"
+release = "0.5.1"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/pyproject.toml b/pyproject.toml
index aa5e6822..5abc280b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
 
 [project]
 name = "torch_uncertainty"
-version = "0.5.0"
+version = "0.5.1"
 authors = [
     { name = "ENSTA U2IS", email = "olivier.laurent@ensta-paris.fr" },
     { name = "Adrien Lafage", email = "adrienlafage@outlook.com" },

From e95435a0c1121e1c7380dc178c2adff3a530bbc7 Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 23:22:44 +0200
Subject: [PATCH 07/11] :hammer: loss is None by default in routines

---
 torch_uncertainty/routines/classification.py   | 11 ++++++++---
 torch_uncertainty/routines/pixel_regression.py | 11 ++++++++---
 torch_uncertainty/routines/regression.py       | 11 ++++++++---
 torch_uncertainty/routines/segmentation.py     | 11 ++++++++---
 4 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/torch_uncertainty/routines/classification.py b/torch_uncertainty/routines/classification.py
index 6be28a37..ac18a26e 100644
--- a/torch_uncertainty/routines/classification.py
+++ b/torch_uncertainty/routines/classification.py
@@ -70,7 +70,7 @@ def __init__(
         self,
         model: nn.Module,
         num_classes: int,
-        loss: nn.Module,
+        loss: nn.Module | None = None,
         is_ensemble: bool = False,
         num_tta: int = 1,
         format_batch_fn: nn.Module | None = None,
@@ -92,6 +92,7 @@ def __init__(
             model (torch.nn.Module): Model to train.
             num_classes (int): Number of classes.
             loss (torch.nn.Module): Loss function to optimize the :attr:`model`.
+                Defaults to ``None``.
             is_ensemble (bool, optional): Indicates whether the model is an
                 ensemble at test time or not. Defaults to ``False``.
             num_tta (int): Number of test-time augmentations (TTA). If ``1``: no TTA.
@@ -371,9 +372,13 @@ def _apply_mixup(self, batch: tuple[Tensor, Tensor]) -> tuple[Tensor, Tensor]:
     def configure_optimizers(self) -> Optimizer | dict:
         return self.optim_recipe
 
-    def on_train_start(self) -> None:
+    def on_train_start(self) -> None:  # coverage: ignore
         """Put the hyperparameters in tensorboard."""
-        if self.logger is not None:  # coverage: ignore
+        if self.loss is None:
+            raise ValueError(
+                "To train a model, you must specify the `loss` argument in the routine. Got None."
+            )
+        if self.logger is not None:
             self.logger.log_hyperparams(
                 self.hparams,
             )
diff --git a/torch_uncertainty/routines/pixel_regression.py b/torch_uncertainty/routines/pixel_regression.py
index 48da3713..2862adee 100644
--- a/torch_uncertainty/routines/pixel_regression.py
+++ b/torch_uncertainty/routines/pixel_regression.py
@@ -51,7 +51,7 @@ def __init__(
         self,
         model: nn.Module,
         output_dim: int,
-        loss: nn.Module,
+        loss: nn.Module | None = None,
         dist_family: str | None = None,
         dist_estimate: str = "mean",
         is_ensemble: bool = False,
@@ -69,6 +69,7 @@ def __init__(
             model (nn.Module): Model to train.
             output_dim (int): Number of outputs of the model.
             loss (nn.Module): Loss function to optimize the :attr:`model`.
+                Defaults to ``None``.
             dist_family (str, optional): The distribution family to use for
                 probabilistic pixel regression. If ``None`` then point-wise regression.
                 Defaults to ``None``.
@@ -151,9 +152,13 @@ def _init_metrics(self) -> None:
     def configure_optimizers(self) -> Optimizer | dict:
         return self.optim_recipe
 
-    def on_train_start(self) -> None:
+    def on_train_start(self) -> None:  # coverage: ignore
         """Put the hyperparameters in tensorboard."""
-        if self.logger is not None:  # coverage: ignore
+        if self.loss is None:
+            raise ValueError(
+                "To train a model, you must specify the `loss` argument in the routine. Got None."
+            )
+        if self.logger is not None:
             self.logger.log_hyperparams(
                 self.hparams,
             )
diff --git a/torch_uncertainty/routines/regression.py b/torch_uncertainty/routines/regression.py
index fc5ac13c..31dea1bc 100644
--- a/torch_uncertainty/routines/regression.py
+++ b/torch_uncertainty/routines/regression.py
@@ -34,7 +34,7 @@ def __init__(
         self,
         model: nn.Module,
         output_dim: int,
-        loss: nn.Module,
+        loss: nn.Module | None = None,
         dist_family: str | None = None,
         dist_estimate: str = "mean",
         is_ensemble: bool = False,
@@ -50,6 +50,7 @@ def __init__(
             model (torch.nn.Module): Model to train.
             output_dim (int): Number of outputs of the model.
             loss (torch.nn.Module): Loss function to optimize the :attr:`model`.
+                Defaults to ``None``.
             dist_family (str, optional): The distribution family to use for probabilistic regression. If ``None`` then point-wise regression. Defaults to ``None``.
             dist_estimate (str, optional): The estimate to use when computing the point-wise metrics. Defaults to ``"mean"``.
             is_ensemble (bool, optional): Whether the model is an ensemble. Defaults to ``False``.
@@ -122,9 +123,13 @@ def _init_metrics(self) -> None:
     def configure_optimizers(self) -> Optimizer | dict:
         return self.optim_recipe
 
-    def on_train_start(self) -> None:
+    def on_train_start(self) -> None:  # coverage: ignore
         """Put the hyperparameters in tensorboard."""
-        if self.logger is not None:  # coverage: ignore
+        if self.loss is None:
+            raise ValueError(
+                "To train a model, you must specify the `loss` argument in the routine. Got None."
+            )
+        if self.logger is not None:
             self.logger.log_hyperparams(
                 self.hparams,
             )
diff --git a/torch_uncertainty/routines/segmentation.py b/torch_uncertainty/routines/segmentation.py
index ae96baf5..8d407778 100644
--- a/torch_uncertainty/routines/segmentation.py
+++ b/torch_uncertainty/routines/segmentation.py
@@ -45,7 +45,7 @@ def __init__(
         self,
         model: nn.Module,
         num_classes: int,
-        loss: nn.Module,
+        loss: nn.Module | None = None,
         optim_recipe: dict | Optimizer | None = None,
         eval_shift: bool = False,
         format_batch_fn: nn.Module | None = None,
@@ -65,6 +65,7 @@ def __init__(
             model (torch.nn.Module): Model to train.
             num_classes (int): Number of classes in the segmentation task.
             loss (torch.nn.Module): Loss function to optimize the :attr:`model`.
+                Defaults to ``None``.
             optim_recipe (dict or Optimizer, optional): The optimizer and
                 optionally the scheduler to use. Defaults to ``None``.
             eval_shift (bool, optional): Indicates whether to evaluate the Distribution
@@ -209,8 +210,12 @@ def forward(self, inputs: Tensor) -> Tensor:
         """
         return self.model(inputs)
 
-    def on_train_start(self) -> None:
-        if self.logger is not None:  # coverage: ignore
+    def on_train_start(self) -> None:  # coverage: ignore
+        if self.loss is None:
+            raise ValueError(
+                "To train a model, you must specify the `loss` argument in the routine. Got None."
+            )
+        if self.logger is not None:
             self.logger.log_hyperparams(self.hparams)
 
     def on_validation_start(self) -> None:

From 7afe0f71a33e16e0ce4130dcc796f8d3f2a94118 Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 23:23:26 +0200
Subject: [PATCH 08/11] :white_check_mark: Add some tests for conformal methods

---
 tests/post_processing/test_conformal.py       | 34 +++++++++++++++++--
 .../post_processing/conformal/aps.py          |  2 +-
 .../post_processing/conformal/raps.py         |  4 +--
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/tests/post_processing/test_conformal.py b/tests/post_processing/test_conformal.py
index c48817eb..a1d668de 100644
--- a/tests/post_processing/test_conformal.py
+++ b/tests/post_processing/test_conformal.py
@@ -52,7 +52,7 @@ def test_fit(self):
             out == repeat(torch.tensor([True, True, False]), "c -> b c", b=10).float() / 2
         ).all()
 
-        conformal = ConformalClsAPS(alpha=0.1, model=nn.Identity(), randomized=True)
+        conformal = ConformalClsAPS(alpha=0.1, model=nn.Identity(), randomized=True, enable_ts=True)
         conformal.fit(dl)
         out = conformal.conformal(inputs)
         assert out.shape == (10, 3)
@@ -82,7 +82,9 @@ def test_fit(self):
             out == repeat(torch.tensor([True, True, False]), "c -> b c", b=10).float() / 2
         ).all()
 
-        conformal = ConformalClsRAPS(alpha=0.1, model=nn.Identity(), randomized=True)
+        conformal = ConformalClsRAPS(
+            alpha=0.1, model=nn.Identity(), randomized=True, enable_ts=True
+        )
         conformal.fit(dl)
         out = conformal.conformal(inputs)
         assert out.shape == (10, 3)
@@ -91,6 +93,29 @@ def test_failures(self):
         with pytest.raises(RuntimeError):
             ConformalClsRAPS(alpha=0.1).quantile  # noqa: B018
 
+        with pytest.raises(ValueError, match="penalty should be non-negative. Got "):
+            _ = ConformalClsRAPS(
+                alpha=0.1,
+                penalty=-0.1,
+            )
+
+        with pytest.raises(TypeError, match="regularization_rank should be an integer. Got"):
+            _ = ConformalClsRAPS(
+                alpha=0.1,
+                regularization_rank=0.1,
+            )
+
+        with pytest.raises(ValueError, match="regularization_rank should be non-negative. Got "):
+            _ = ConformalClsRAPS(
+                alpha=0.1,
+                regularization_rank=-1,
+            )
+        conformal = ConformalClsRAPS(alpha=0.1, model=nn.Identity(), randomized=True)
+        with pytest.raises(
+            RuntimeError, match="Cannot return temperature when enable_ts is False."
+        ):
+            _ = conformal.temperature
+
 
 class TestConformalClsTHR:
     """Testing the ConformalClsTHR class."""
@@ -121,6 +146,11 @@ def test_fit(self):
             out == repeat(torch.tensor([True, True, False]), "c -> b c", b=10).float() / 2
         ).all()
 
+        conformal = ConformalClsTHR(
+            alpha=0.1, model=nn.Identity(), ts_init_val=2, ts_lr=1, ts_max_iter=10, enable_ts=False
+        )
+        conformal.fit(dl)
+
     def test_failures(self):
         with pytest.raises(RuntimeError):
             _ = ConformalClsTHR(
diff --git a/torch_uncertainty/post_processing/conformal/aps.py b/torch_uncertainty/post_processing/conformal/aps.py
index 62b5df55..1b4db62a 100644
--- a/torch_uncertainty/post_processing/conformal/aps.py
+++ b/torch_uncertainty/post_processing/conformal/aps.py
@@ -30,7 +30,7 @@ def __init__(
             ts_lr (float, optional): Learning rate for the optimizer. Defaults to ``0.1``.
             ts_max_iter (int, optional): Maximum number of iterations for the
                 optimizer. Defaults to ``100``.
-            enable_ts (bool): Whether to scale the logits. Defaults to ``True``.
+            enable_ts (bool): Whether to scale the logits. Defaults to ``False``.
             device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
                 Defaults to ``None``.
 
diff --git a/torch_uncertainty/post_processing/conformal/raps.py b/torch_uncertainty/post_processing/conformal/raps.py
index 065791e1..4f179ada 100644
--- a/torch_uncertainty/post_processing/conformal/raps.py
+++ b/torch_uncertainty/post_processing/conformal/raps.py
@@ -17,7 +17,7 @@ def __init__(
         ts_init_val: float = 1.0,
         ts_lr: float = 0.1,
         ts_max_iter: int = 100,
-        enable_ts: bool = True,
+        enable_ts: bool = False,
         device: Literal["cpu", "cuda"] | torch.device | None = None,
     ) -> None:
         r"""Conformal prediction with RAPS scores.
@@ -33,7 +33,7 @@ def __init__(
             ts_lr (float, optional): Learning rate for the optimizer. Defaults to ``0.1``.
             ts_max_iter (int, optional): Maximum number of iterations for the
                 optimizer. Defaults to ``100``.
-            enable_ts (bool): Whether to scale the logits. Defaults to ``True``.
+            enable_ts (bool): Whether to scale the logits. Defaults to ``False``.
             device (Literal["cpu", "cuda"] | torch.device | None, optional): device.
                 Defaults to ``None``.
 

From 4ac93f7001362d19a83a9d990636ba24443032d4 Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Thu, 22 May 2025 23:30:00 +0200
Subject: [PATCH 09/11] :white_check_mark: Improve coverage

---
 tests/post_processing/test_conformal.py            | 4 +---
 torch_uncertainty/post_processing/conformal/aps.py | 2 --
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/post_processing/test_conformal.py b/tests/post_processing/test_conformal.py
index a1d668de..7fc44ffb 100644
--- a/tests/post_processing/test_conformal.py
+++ b/tests/post_processing/test_conformal.py
@@ -75,6 +75,7 @@ def test_fit(self):
         dl = DataLoader(calibration_set, batch_size=10)
 
         conformal = ConformalClsRAPS(alpha=0.1, model=nn.Identity(), randomized=False)
+        conformal.set_model(nn.Identity())
         conformal.fit(dl)
         out = conformal.conformal(inputs)
         assert out.shape == (10, 3)
@@ -122,11 +123,8 @@ class TestConformalClsTHR:
 
     def test_main(self):
         conformal = ConformalClsTHR(alpha=0.1, model=None, ts_init_val=2)
-
         assert conformal.temperature == 2.0
-
         conformal.set_model(nn.Identity())
-
         assert isinstance(conformal.model.model, nn.Identity)
 
     def test_fit(self):
diff --git a/torch_uncertainty/post_processing/conformal/aps.py b/torch_uncertainty/post_processing/conformal/aps.py
index 1b4db62a..28027209 100644
--- a/torch_uncertainty/post_processing/conformal/aps.py
+++ b/torch_uncertainty/post_processing/conformal/aps.py
@@ -63,8 +63,6 @@ def _sort_sum(self, probs: Tensor) -> tuple[Tensor, Tensor, Tensor]:
 
     def _calculate_all_labels(self, probs):
         """Calculate APS scores for all labels."""
-        if probs.dim() == 1 or probs.dim() > 2:
-            raise ValueError("Input probabilities must be 2D.")
         indices, ordered, cumsum = self._sort_sum(probs)
         if self.randomized:
             noise = torch.rand(probs.shape, device=probs.device)

From 3a85dcf333548dff2d40855b840360fa20b03175 Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Fri, 23 May 2025 00:58:30 +0200
Subject: [PATCH 10/11] :shirt: Misc

---
 .../muad/configs/muad/deeplab/deeplabv3+.yaml             | 4 +++-
 torch_uncertainty/routines/pixel_regression.py            | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/experiments/segmentation/muad/configs/muad/deeplab/deeplabv3+.yaml b/experiments/segmentation/muad/configs/muad/deeplab/deeplabv3+.yaml
index bb763b84..e1b451c5 100644
--- a/experiments/segmentation/muad/configs/muad/deeplab/deeplabv3+.yaml
+++ b/experiments/segmentation/muad/configs/muad/deeplab/deeplabv3+.yaml
@@ -18,10 +18,11 @@ trainer:
         logging_interval: step
 model:
   model:
-    class_path: torch_uncertainty.models.segmentation.deep_lab_v3_resnet50
+    class_path: torch_uncertainty.models.segmentation.deep_lab_v3_resnet
     init_args:
       num_classes: 15
       style: v3+
+      arch: 50
   num_classes: 15
   loss:
     class_path: torch.nn.CrossEntropyLoss
@@ -45,6 +46,7 @@ model:
             - 5.2729
             - 17.8703
             - 50.4984
+  log_plots: true
 data:
   root: ./data
   batch_size: 12
diff --git a/torch_uncertainty/routines/pixel_regression.py b/torch_uncertainty/routines/pixel_regression.py
index 2862adee..3230a301 100644
--- a/torch_uncertainty/routines/pixel_regression.py
+++ b/torch_uncertainty/routines/pixel_regression.py
@@ -280,7 +280,7 @@ def validation_step(self, batch: tuple[Tensor, Tensor], batch_idx: int) -> None:
         preds, dist = self.evaluation_forward(inputs)
 
         if batch_idx == 0 and self.log_plots:
-            self._plot_depth(
+            self._plot_pixel_regression(
                 inputs[: self.num_image_plot, ...],
                 preds[: self.num_image_plot, ...],
                 targets[: self.num_image_plot, ...],
@@ -320,7 +320,7 @@ def test_step(
 
         if batch_idx == 0 and self.log_plots:
             num_images = min(inputs.size(0), self.num_image_plot)
-            self._plot_depth(
+            self._plot_pixel_regression(
                 inputs[:num_images, ...],
                 preds[:num_images, ...],
                 targets[:num_images, ...],
@@ -370,7 +370,7 @@ def on_test_epoch_end(self) -> None:
                 result_dict,
             )
 
-    def _plot_depth(
+    def _plot_pixel_regression(
         self,
         inputs: Tensor,
         preds: Tensor,
@@ -401,7 +401,7 @@ def colorize(
     vmin: float | None = None,
     vmax: float | None = None,
     cmap: str = "magma",
-):
+) -> Tensor:
     """Colorize a tensor of depth values.
 
     Args:

From 61791aa2447afc8c7e9b622734c0e44a98cb820b Mon Sep 17 00:00:00 2001
From: Olivier <olivier.ar.laurent@gmail.com>
Date: Fri, 23 May 2025 01:05:38 +0200
Subject: [PATCH 11/11] :shirt: Add some typing

---
 .../Post_Hoc_Methods/tutorial_conformal.py       |  2 +-
 tests/datamodules/test_depth.py                  |  2 +-
 tests/models/test_bts.py                         |  2 +-
 tests/models/test_deeplab.py                     |  2 +-
 .../models/wrappers/test_checkpoint_collector.py |  4 ++--
 tests/models/wrappers/test_ema.py                |  2 +-
 tests/models/wrappers/test_zero.py               |  4 ++--
 tests/post_processing/test_conformal.py          | 16 ++++++++--------
 tests/routines/test_classification.py            |  4 ++--
 tests/test_utils.py                              |  2 +-
 torch_uncertainty/callbacks/checkpoint.py        |  6 +++---
 .../callbacks/compound_checkpoint.py             |  2 +-
 torch_uncertainty/datasets/utils.py              |  2 +-
 .../metrics/classification/coverage_rate.py      |  2 +-
 .../metrics/classification/set_size.py           |  2 +-
 .../metrics/segmentation/seg_binary_auroc.py     |  2 +-
 .../segmentation/seg_binary_average_precision.py |  2 +-
 .../metrics/segmentation/seg_fpr95.py            |  2 +-
 torch_uncertainty/models/depth/bts.py            |  4 ++--
 .../models/segmentation/segformer.py             |  2 +-
 .../models/segmentation/unet/mimo.py             |  2 +-
 .../models/segmentation/unet/packed.py           | 14 +++++++-------
 .../models/segmentation/unet/standard.py         | 10 ++++++----
 .../models/wrappers/deep_ensembles.py            |  2 +-
 torch_uncertainty/models/wrappers/swa.py         |  2 +-
 .../post_processing/conformal/aps.py             |  4 ++--
 .../post_processing/conformal/raps.py            |  2 +-
 torch_uncertainty/transforms/mixup.py            |  2 +-
 torch_uncertainty/utils/cli.py                   |  2 +-
 29 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py b/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py
index 748f7ccc..b0d67865 100644
--- a/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py
+++ b/auto_tutorial_source/Post_Hoc_Methods/tutorial_conformal.py
@@ -65,7 +65,7 @@
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
-def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_examples=5):
+def visualize_prediction_sets(inputs, labels, confidence_scores, classes, num_examples=5) -> None:
     _, axs = plt.subplots(2, num_examples, figsize=(15, 5))
     for i in range(num_examples):
         ax = axs[0, i]
diff --git a/tests/datamodules/test_depth.py b/tests/datamodules/test_depth.py
index 21d218e8..08537c8a 100644
--- a/tests/datamodules/test_depth.py
+++ b/tests/datamodules/test_depth.py
@@ -27,7 +27,7 @@ def test_depth_dm(self) -> None:
         assert isinstance(dm.train_transform, nn.Identity)
         assert isinstance(dm.test_transform, nn.Identity)
 
-    def test_depth_dm_failures(self):
+    def test_depth_dm_failures(self) -> None:
         with pytest.raises(ValueError):
             DepthDataModule(
                 dataset=DummPixelRegressionDataset,
diff --git a/tests/models/test_bts.py b/tests/models/test_bts.py
index a2b67ae8..474e0610 100644
--- a/tests/models/test_bts.py
+++ b/tests/models/test_bts.py
@@ -7,7 +7,7 @@ class TestBTS:
     """Testing the BTS model class."""
 
     @torch.no_grad()
-    def test_main(self):
+    def test_main(self) -> None:
         model = bts_resnet(50, 1).eval()
         model(torch.randn(1, 3, 32, 32))
         model = bts_resnet(50, 1, dist_family="normal").eval()
diff --git a/tests/models/test_deeplab.py b/tests/models/test_deeplab.py
index c9eaa6f1..a16bc6a3 100644
--- a/tests/models/test_deeplab.py
+++ b/tests/models/test_deeplab.py
@@ -8,7 +8,7 @@ class TestDeeplab:
     """Testing the Deeplab class."""
 
     @torch.no_grad()
-    def test_main(self):
+    def test_main(self) -> None:
         model = deep_lab_v3_resnet(10, 50, "v3", 16, True, False).eval()
         model(torch.randn(1, 3, 32, 32))
         model = deep_lab_v3_resnet(10, 50, "v3", 16, False, False).eval()
diff --git a/tests/models/wrappers/test_checkpoint_collector.py b/tests/models/wrappers/test_checkpoint_collector.py
index d0a0d97e..6ee21a0e 100644
--- a/tests/models/wrappers/test_checkpoint_collector.py
+++ b/tests/models/wrappers/test_checkpoint_collector.py
@@ -8,7 +8,7 @@
 class TestCheckpointCollector:
     """Testing the CheckpointCollector class."""
 
-    def test_training(self):
+    def test_training(self) -> None:
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
         ens = CheckpointCollector(dummy_model(1, 10), store_on_cpu=True)
@@ -49,7 +49,7 @@ def test_training(self):
         ens.eval()
         ens(torch.randn(1, 1, device=device))
 
-    def test_failures(self):
+    def test_failures(self) -> None:
         with pytest.raises(ValueError):
             CheckpointCollector(dummy_model(1, 10), cycle_start=0)
 
diff --git a/tests/models/wrappers/test_ema.py b/tests/models/wrappers/test_ema.py
index 2e96e1eb..62e01ee7 100644
--- a/tests/models/wrappers/test_ema.py
+++ b/tests/models/wrappers/test_ema.py
@@ -16,6 +16,6 @@ def test_training(self) -> None:
         ema.train()
         ema.update_wrapper(0)
 
-    def test_failures(self):
+    def test_failures(self) -> None:
         with pytest.raises(ValueError, match="must be in"):
             EMA(nn.Module(), momentum=-1)
diff --git a/tests/models/wrappers/test_zero.py b/tests/models/wrappers/test_zero.py
index d2ec611b..3b52d8c1 100644
--- a/tests/models/wrappers/test_zero.py
+++ b/tests/models/wrappers/test_zero.py
@@ -9,7 +9,7 @@ class TestZero:
     """Testing the Zero wrapper class."""
 
     @torch.no_grad()
-    def test_main(self):
+    def test_main(self) -> None:
         model = Zero(nn.Identity(), num_tta=12, filter_views=0.5)
         out = model(torch.randn(2, 10))
         assert out.shape == (2, 10)
@@ -18,7 +18,7 @@ def test_main(self):
         out = model.eval()(torch.randn(24, 3))
         assert out.shape == (2, 3)
 
-    def test_failures(self):
+    def test_failures(self) -> None:
         with pytest.raises(ValueError, match="must be in the range"):
             Zero(nn.Identity(), num_tta=12, filter_views=2.1)
         with pytest.raises(
diff --git a/tests/post_processing/test_conformal.py b/tests/post_processing/test_conformal.py
index 7fc44ffb..d621f38a 100644
--- a/tests/post_processing/test_conformal.py
+++ b/tests/post_processing/test_conformal.py
@@ -15,7 +15,7 @@
 class TestConformal:
     """Testing the Conformal class."""
 
-    def test_errors(self):
+    def test_errors(self) -> None:
         Conformal.__abstractmethods__ = set()
         conformal = Conformal(
             model=None,
@@ -37,7 +37,7 @@ def test_errors(self):
 class TestConformalClsAPS:
     """Testing the ConformalClsRAPS class."""
 
-    def test_fit(self):
+    def test_fit(self) -> None:
         inputs = repeat(torch.tensor([0.6, 0.3, 0.1]), "c -> b c", b=10)
         labels = torch.tensor([0, 2] + [1] * 8)
 
@@ -57,7 +57,7 @@ def test_fit(self):
         out = conformal.conformal(inputs)
         assert out.shape == (10, 3)
 
-    def test_failures(self):
+    def test_failures(self) -> None:
         with pytest.raises(RuntimeError):
             _ = ConformalClsAPS(
                 alpha=0.1,
@@ -67,7 +67,7 @@ def test_failures(self):
 class TestConformalClsRAPS:
     """Testing the ConformalClsRAPS class."""
 
-    def test_fit(self):
+    def test_fit(self) -> None:
         inputs = repeat(torch.tensor([6.0, 4.0, 1.0]), "c -> b c", b=10)
         labels = torch.tensor([0, 2] + [1] * 8)
 
@@ -90,7 +90,7 @@ def test_fit(self):
         out = conformal.conformal(inputs)
         assert out.shape == (10, 3)
 
-    def test_failures(self):
+    def test_failures(self) -> None:
         with pytest.raises(RuntimeError):
             ConformalClsRAPS(alpha=0.1).quantile  # noqa: B018
 
@@ -121,13 +121,13 @@ def test_failures(self):
 class TestConformalClsTHR:
     """Testing the ConformalClsTHR class."""
 
-    def test_main(self):
+    def test_main(self) -> None:
         conformal = ConformalClsTHR(alpha=0.1, model=None, ts_init_val=2)
         assert conformal.temperature == 2.0
         conformal.set_model(nn.Identity())
         assert isinstance(conformal.model.model, nn.Identity)
 
-    def test_fit(self):
+    def test_fit(self) -> None:
         inputs = repeat(torch.tensor([0.6, 0.3, 0.1]), "c -> b c", b=10)
         labels = torch.tensor([0, 2] + [1] * 8)
 
@@ -149,7 +149,7 @@ def test_fit(self):
         )
         conformal.fit(dl)
 
-    def test_failures(self):
+    def test_failures(self) -> None:
         with pytest.raises(RuntimeError):
             _ = ConformalClsTHR(
                 alpha=0.1,
diff --git a/tests/routines/test_classification.py b/tests/routines/test_classification.py
index 6ff376e1..74d9123d 100644
--- a/tests/routines/test_classification.py
+++ b/tests/routines/test_classification.py
@@ -335,7 +335,7 @@ def test_two_estimator_two_classes_elbo_vr_logs(self) -> None:
         trainer.test(model, dm)
         model(dm.get_test_set()[0][0])
 
-    def test_one_estimator_conformal(self):
+    def test_one_estimator_conformal(self) -> None:
         trainer = TUTrainer(accelerator="cpu", fast_dev_run=True)
 
         dm = DummyClassificationDataModule(
@@ -377,7 +377,7 @@ def test_one_estimator_conformal(self):
         )
         trainer.test(routine, dm)
 
-    def test_classification_failures(self):
+    def test_classification_failures(self) -> None:
         # num_classes
         with pytest.raises(ValueError):
             ClassificationRoutine(num_classes=0, model=nn.Module(), loss=None)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 1771f840..8f291b87 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -39,7 +39,7 @@ def test_hub_exists(self) -> None:
         hub.load_hf("test", version=1)
         hub.load_hf("test", version=2)
 
-    def test_hub_notexists(self):
+    def test_hub_notexists(self) -> None:
         with (
             contextlib.suppress(ValueError),
             pytest.raises((RepositoryNotFoundError, HfHubHTTPError)),
diff --git a/torch_uncertainty/callbacks/checkpoint.py b/torch_uncertainty/callbacks/checkpoint.py
index d6075a43..dd7d6451 100644
--- a/torch_uncertainty/callbacks/checkpoint.py
+++ b/torch_uncertainty/callbacks/checkpoint.py
@@ -60,7 +60,7 @@ def best_model_path(self) -> str: ...
 
 
 class TUClsCheckpoint(TUCheckpoint):
-    def __init__(self):
+    def __init__(self) -> None:
         """Keep multiple checkpoints corresponding to the best classification metric values."""
         super().__init__()
         self.callbacks = {
@@ -96,7 +96,7 @@ def best_model_path(self) -> str:
 
 
 class TUSegCheckpoint(TUCheckpoint):
-    def __init__(self):
+    def __init__(self) -> None:
         """Keep multiple checkpoints corresponding to the best segmentation metric values."""
         super().__init__()
         self.callbacks = {
@@ -132,7 +132,7 @@ def best_model_path(self) -> str:
 
 
 class TURegCheckpoint(TUCheckpoint):
-    def __init__(self, probabilistic: bool = False):
+    def __init__(self, probabilistic: bool = False) -> None:
         """Keep multiple checkpoints corresponding to the best regression metric values."""
         super().__init__()
         self.callbacks = {
diff --git a/torch_uncertainty/callbacks/compound_checkpoint.py b/torch_uncertainty/callbacks/compound_checkpoint.py
index 077cbcbd..f5e8f49c 100644
--- a/torch_uncertainty/callbacks/compound_checkpoint.py
+++ b/torch_uncertainty/callbacks/compound_checkpoint.py
@@ -23,7 +23,7 @@ def __init__(
         every_n_epochs: int | None = None,
         save_on_train_epoch_end: bool | None = None,
         enable_version_counter: bool = True,
-    ):
+    ) -> None:
         """Save the checkpoints maximizing or minimizing a given linear form on the metric values."""
         self.compound_metric_dict = compound_metric_dict
         super().__init__(
diff --git a/torch_uncertainty/datasets/utils.py b/torch_uncertainty/datasets/utils.py
index c24a280c..ffae6dcb 100644
--- a/torch_uncertainty/datasets/utils.py
+++ b/torch_uncertainty/datasets/utils.py
@@ -41,7 +41,7 @@ def __init__(self, dataset: Dataset, num_augmentations: int) -> None:
         self.dataset = dataset
         self.num_augmentations = num_augmentations
 
-    def __len__(self):
+    def __len__(self) -> int:
         """Get the virtual length of the dataset."""
         return len(self.dataset) * self.num_augmentations
 
diff --git a/torch_uncertainty/metrics/classification/coverage_rate.py b/torch_uncertainty/metrics/classification/coverage_rate.py
index c9d324e6..4882596f 100644
--- a/torch_uncertainty/metrics/classification/coverage_rate.py
+++ b/torch_uncertainty/metrics/classification/coverage_rate.py
@@ -16,7 +16,7 @@ def __init__(
         average: str = "micro",
         validate_args: bool = True,
         **kwargs,
-    ):
+    ) -> None:
         """Empirical coverage rate metric.
 
         Args:
diff --git a/torch_uncertainty/metrics/classification/set_size.py b/torch_uncertainty/metrics/classification/set_size.py
index c601a627..4b546b58 100644
--- a/torch_uncertainty/metrics/classification/set_size.py
+++ b/torch_uncertainty/metrics/classification/set_size.py
@@ -16,7 +16,7 @@ def __init__(
         self,
         reduction: Literal["mean", "sum", "none", None] = "mean",
         **kwargs,
-    ):
+    ) -> None:
         """Set size to compute the efficiency of conformal prediction methods.
 
         Args:
diff --git a/torch_uncertainty/metrics/segmentation/seg_binary_auroc.py b/torch_uncertainty/metrics/segmentation/seg_binary_auroc.py
index a2a2a89b..c79f01f1 100644
--- a/torch_uncertainty/metrics/segmentation/seg_binary_auroc.py
+++ b/torch_uncertainty/metrics/segmentation/seg_binary_auroc.py
@@ -18,7 +18,7 @@ def __init__(
         ignore_index: int | None = None,
         validate_args: bool = True,
         **kwargs: Any,
-    ):
+    ) -> None:
         """SegmentationBinaryAUROC computes the Area Under the Receiver Operating Characteristic Curve (AUROC)
         for binary segmentation tasks. It aggregates the AUROC across batches and computes the average AUROC
         over all batches processed.
diff --git a/torch_uncertainty/metrics/segmentation/seg_binary_average_precision.py b/torch_uncertainty/metrics/segmentation/seg_binary_average_precision.py
index c68dd325..e14f42cb 100644
--- a/torch_uncertainty/metrics/segmentation/seg_binary_average_precision.py
+++ b/torch_uncertainty/metrics/segmentation/seg_binary_average_precision.py
@@ -17,7 +17,7 @@ def __init__(
         ignore_index: int | None = None,
         validate_args: bool = True,
         **kwargs: Any,
-    ):
+    ) -> None:
         """SegmentationBinaryAveragePrecision computes the Average Precision (AP) for binary segmentation tasks.
         It aggregates the mean AP across batches and computes the average AP over all batches processed.
         """
diff --git a/torch_uncertainty/metrics/segmentation/seg_fpr95.py b/torch_uncertainty/metrics/segmentation/seg_fpr95.py
index 56ec58fb..ad3b5ccb 100644
--- a/torch_uncertainty/metrics/segmentation/seg_fpr95.py
+++ b/torch_uncertainty/metrics/segmentation/seg_fpr95.py
@@ -10,7 +10,7 @@ class SegmentationFPR95(Metric):
     higher_is_better = False
     full_state_update = False
 
-    def __init__(self, pos_label: int, **kwargs):
+    def __init__(self, pos_label: int, **kwargs) -> None:
         """FPR95 metric for segmentation tasks.
         Compute the mean FPR95 per batch across all batches.
 
diff --git a/torch_uncertainty/models/depth/bts.py b/torch_uncertainty/models/depth/bts.py
index 29c2371c..f16bcc14 100644
--- a/torch_uncertainty/models/depth/bts.py
+++ b/torch_uncertainty/models/depth/bts.py
@@ -98,7 +98,7 @@ def __init__(
         in_channels: int,
         out_channels: int,
         ratio: int = 2,
-        **factory_kwargs,
+        **factory_kwargs: dict,
     ) -> None:
         """Upsampling convolution.
 
@@ -132,7 +132,7 @@ def __init__(
         num_out_filters: int,
         max_depth: float,
         is_final: bool = False,
-        **factory_kwargs,
+        **factory_kwargs: dict,
     ) -> None:
         super().__init__()
         self.max_depth = max_depth
diff --git a/torch_uncertainty/models/segmentation/segformer.py b/torch_uncertainty/models/segmentation/segformer.py
index e17f2674..3f87165c 100644
--- a/torch_uncertainty/models/segmentation/segformer.py
+++ b/torch_uncertainty/models/segmentation/segformer.py
@@ -484,7 +484,7 @@ def forward(self, inputs: Tensor) -> Tensor:
 def resize(
     inputs: Tensor,
     size: torch.Size | None = None,
-    scale_factor=None,
+    scale_factor: float | tuple[float] | None = None,
     mode: str = "nearest",
     align_corners: bool | None = None,
     warning: bool = True,
diff --git a/torch_uncertainty/models/segmentation/unet/mimo.py b/torch_uncertainty/models/segmentation/unet/mimo.py
index 86f47044..b69ec97d 100644
--- a/torch_uncertainty/models/segmentation/unet/mimo.py
+++ b/torch_uncertainty/models/segmentation/unet/mimo.py
@@ -13,7 +13,7 @@ def __init__(
         num_estimators: int,
         bilinear: bool = False,
         dropout_rate: float = 0.0,
-    ):
+    ) -> None:
         super().__init__(
             in_channels=in_channels * num_estimators,
             num_classes=num_classes * num_estimators,
diff --git a/torch_uncertainty/models/segmentation/unet/packed.py b/torch_uncertainty/models/segmentation/unet/packed.py
index e15abc07..4d86db9d 100644
--- a/torch_uncertainty/models/segmentation/unet/packed.py
+++ b/torch_uncertainty/models/segmentation/unet/packed.py
@@ -1,6 +1,6 @@
 import torch
 from einops import rearrange
-from torch import nn
+from torch import Tensor, nn
 from torchvision.transforms import functional as F
 
 from torch_uncertainty.layers.packed import (
@@ -52,7 +52,7 @@ def __init__(
             nn.ReLU(inplace=True),
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
@@ -70,7 +70,7 @@ def __init__(
             first=True,
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
@@ -90,7 +90,7 @@ def __init__(
             ),
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         return self.mpconv(x)
 
 
@@ -135,7 +135,7 @@ def __init__(
                 gamma=gamma,
             )
 
-    def forward(self, x1, x2):
+    def forward(self, x1: Tensor, x2: Tensor) -> Tensor:
         x1 = self.up(x1)
         diff_y = x2.size(2) - x1.size(2)
         diff_x = x2.size(3) - x1.size(3)
@@ -162,7 +162,7 @@ def __init__(
             last=True,
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
@@ -215,7 +215,7 @@ def __init__(
         # Final output
         self.outc = _PackedOutconv(num_blocks[0], num_classes, alpha, num_estimators, gamma)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         # Downsampling
         x1 = self.inc(x)
         x2 = self.down1(x1)
diff --git a/torch_uncertainty/models/segmentation/unet/standard.py b/torch_uncertainty/models/segmentation/unet/standard.py
index 7326e5f3..3cd261ac 100644
--- a/torch_uncertainty/models/segmentation/unet/standard.py
+++ b/torch_uncertainty/models/segmentation/unet/standard.py
@@ -40,7 +40,9 @@ def check_unet_parameters(
 class _DoubleConv(nn.Module):
     """(convolution => [BN] => ReLU) * 2."""
 
-    def __init__(self, in_channels: int, out_channels: int, mid_channels: int | None = None):
+    def __init__(
+        self, in_channels: int, out_channels: int, mid_channels: int | None = None
+    ) -> None:
         super().__init__()
         if mid_channels is None:
             mid_channels = out_channels
@@ -65,7 +67,7 @@ def __init__(self, in_channels: int, out_channels: int, mid_channels: int | None
             nn.ReLU(inplace=True),
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         return self.double_conv(x)
 
 
@@ -81,7 +83,7 @@ def forward(self, x: Tensor) -> Tensor:
 class _Up(nn.Module):
     """Upscaling then double conv."""
 
-    def __init__(self, in_channels, out_channels, bilinear=True):
+    def __init__(self, in_channels: int, out_channels: int, bilinear: bool = True) -> None:
         super().__init__()
 
         # if bilinear, use the normal convolutions to reduce the number of channels
@@ -92,7 +94,7 @@ def __init__(self, in_channels, out_channels, bilinear=True):
             self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
             self.conv = _DoubleConv(in_channels, out_channels)
 
-    def forward(self, x1, x2):
+    def forward(self, x1: Tensor, x2: Tensor) -> Tensor:
         x1 = self.up(x1)
         # input is CHW
         diff_y = x2.size()[2] - x1.size()[2]
diff --git a/torch_uncertainty/models/wrappers/deep_ensembles.py b/torch_uncertainty/models/wrappers/deep_ensembles.py
index 946c4a47..9a280b4e 100644
--- a/torch_uncertainty/models/wrappers/deep_ensembles.py
+++ b/torch_uncertainty/models/wrappers/deep_ensembles.py
@@ -39,7 +39,7 @@ def forward(self, x: Tensor) -> Tensor:
             preds = [model.forward(x) for model in self.core_models]
         return torch.cat(preds, dim=0)
 
-    def to(self, *args, **kwargs):
+    def to(self, *args, **kwargs: dict):
         device, dtype, non_blocking = torch._C._nn._parse_to(*args, **kwargs)[:3]
 
         if self.store_on_cpu:
diff --git a/torch_uncertainty/models/wrappers/swa.py b/torch_uncertainty/models/wrappers/swa.py
index 3eceb6e3..4bb9e21a 100644
--- a/torch_uncertainty/models/wrappers/swa.py
+++ b/torch_uncertainty/models/wrappers/swa.py
@@ -65,7 +65,7 @@ def forward(self, x: Tensor) -> Tensor:
             return self.core_model.forward(x)
         return self.eval_forward(x)
 
-    def bn_update(self, loader: DataLoader, device) -> None:
+    def bn_update(self, loader: DataLoader, device: str | torch.device | None) -> None:
         if self.need_bn_update and self.swa_model is not None:
             torch.optim.swa_utils.update_bn(loader, self.swa_model, device=device)
             self.need_bn_update = False
diff --git a/torch_uncertainty/post_processing/conformal/aps.py b/torch_uncertainty/post_processing/conformal/aps.py
index 28027209..634a66c6 100644
--- a/torch_uncertainty/post_processing/conformal/aps.py
+++ b/torch_uncertainty/post_processing/conformal/aps.py
@@ -61,7 +61,7 @@ def _sort_sum(self, probs: Tensor) -> tuple[Tensor, Tensor, Tensor]:
         cumsum = torch.cumsum(ordered, dim=-1)
         return indices, ordered, cumsum
 
-    def _calculate_all_labels(self, probs):
+    def _calculate_all_labels(self, probs: Tensor) -> Tensor:
         """Calculate APS scores for all labels."""
         indices, ordered, cumsum = self._sort_sum(probs)
         if self.randomized:
@@ -73,7 +73,7 @@ def _calculate_all_labels(self, probs):
         _, sorted_indices = torch.sort(indices, descending=False, dim=-1)
         return ordered_scores.gather(dim=-1, index=sorted_indices)
 
-    def _calculate_single_label(self, probs, label):
+    def _calculate_single_label(self, probs: Tensor, label: Tensor) -> Tensor:
         """Calculate APS score for a single label."""
         indices, ordered, cumsum = self._sort_sum(probs)
         if self.randomized:
diff --git a/torch_uncertainty/post_processing/conformal/raps.py b/torch_uncertainty/post_processing/conformal/raps.py
index 4f179ada..d14515cb 100644
--- a/torch_uncertainty/post_processing/conformal/raps.py
+++ b/torch_uncertainty/post_processing/conformal/raps.py
@@ -66,7 +66,7 @@ def __init__(
         self.penalty = penalty
         self.regularization_rank = regularization_rank
 
-    def _calculate_all_labels(self, probs):
+    def _calculate_all_labels(self, probs: Tensor) -> Tensor:
         indices, ordered, cumsum = self._sort_sum(probs)
         if self.randomized:
             noise = torch.rand(probs.shape, device=probs.device)
diff --git a/torch_uncertainty/transforms/mixup.py b/torch_uncertainty/transforms/mixup.py
index c1c46c9e..e582f037 100644
--- a/torch_uncertainty/transforms/mixup.py
+++ b/torch_uncertainty/transforms/mixup.py
@@ -94,7 +94,7 @@ def __init__(self, alpha: float = 1.0, mode: str = "batch", num_classes: int = 1
         self.num_classes = num_classes
         self.mode = mode
 
-    def _get_params(self, batch_size: int, device: torch.device):
+    def _get_params(self, batch_size: int, device: torch.device) -> tuple[float, Tensor]:
         if self.mode == "batch":
             lam = self.rng.beta(a=self.alpha, b=self.alpha)
         else:
diff --git a/torch_uncertainty/utils/cli.py b/torch_uncertainty/utils/cli.py
index 53703139..12c68717 100644
--- a/torch_uncertainty/utils/cli.py
+++ b/torch_uncertainty/utils/cli.py
@@ -76,7 +76,7 @@ def __init__(
         run: bool = True,
         auto_configure_optimizers: bool = True,
         eval_after_fit_default: bool = False,
-        **kwargs: Any,
+        **kwargs: dict,
     ) -> None:
         """Custom LightningCLI for torch-uncertainty.