ENSTA-U2IS-AI · o-laurent · Jun 26, 2024 · May 29, 2024 · May 30, 2024 · May 31, 2024
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -65,7 +65,7 @@ jobs:
       if: steps.changed-files-specific.outputs.only_changed != 'true'
       run: |
         python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu
-        python3 -m pip install .[image,dev,docs]
+        python3 -m pip install .[all]
 
     - name: Check style & format
       if: steps.changed-files-specific.outputs.only_changed != 'true'

diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ _TorchUncertainty_ is a package designed to help you leverage [uncertainty quant
 
 :books: Our webpage and documentation is available here: [torch-uncertainty.github.io](https://torch-uncertainty.github.io). :books:
 
-TorchUncertainty contains the *official implementations* of multiple papers from *major machine-learning and computer vision conferences* and was/will be featured in tutorials at **WACV 2024** and **ECCV 2024**.
+TorchUncertainty contains the *official implementations* of multiple papers from *major machine-learning and computer vision conferences* and was/will be featured in tutorials at **[WACV](https://wacv2024.thecvf.com/) 2024**, **[HAICON](https://haicon24.de/) 2024** and **[ECCV](https://eccv.ecva.net/) 2024**.
 
 ---
 
@@ -69,6 +69,8 @@ To date, the following deep learning baselines have been implemented:
 - MIMO
 - Packed-Ensembles (see [Blog post](https://medium.com/@adrien.lafage/make-your-neural-networks-more-reliable-with-packed-ensembles-7ad0b737a873)) - [Tutorial](https://torch-uncertainty.github.io/auto_tutorials/tutorial_pe_cifar10.html)
 - Bayesian Neural Networks :construction: Work in progress :construction: - [Tutorial](https://torch-uncertainty.github.io/auto_tutorials/tutorial_bayesian.html)
+- Checkpoint Ensembles & Snapshot Ensembles
+- Stochastic Weight Averaging & Stochastic Weight Averaging Gaussian
 - Regression with Beta Gaussian NLL Loss
 - Deep Evidential Classification & Regression - [Tutorial](https://torch-uncertainty.github.io/auto_tutorials/tutorial_evidential_classification.html)
 
@@ -84,6 +86,7 @@ To date, the following post-processing methods have been implemented:
 
 - Temperature, Vector, & Matrix scaling - [Tutorial](https://torch-uncertainty.github.io/auto_tutorials/tutorial_scaler.html)
 - Monte Carlo Batch Normalization - [Tutorial](https://torch-uncertainty.github.io/auto_tutorials/tutorial_mc_batch_norm.html)
+- A wrapper for Laplace appoximation using the [Laplace library](https://github.com/aleximmer/Laplace)
 
 ## Tutorials
 

diff --git a/auto_tutorials_source/tutorial_bayesian.py b/auto_tutorials_source/tutorial_bayesian.py
@@ -55,12 +55,12 @@
 # We will use the Adam optimizer with the default learning rate of 0.001.
 
 
-def optim_lenet(model: nn.Module) -> dict:
+def optim_lenet(model: nn.Module):
     optimizer = optim.Adam(
         model.parameters(),
         lr=1e-3,
     )
-    return {"optimizer": optimizer}
+    return optimizer
 
 
 # %%
@@ -75,7 +75,7 @@ def optim_lenet(model: nn.Module) -> dict:
 trainer = Trainer(accelerator="cpu", enable_progress_bar=False, max_epochs=1)
 
 # datamodule
-root = Path("") / "data"
+root = Path("data")
 datamodule = MNISTDataModule(root=root, batch_size=128, eval_ood=False)
 
 # model
@@ -105,6 +105,7 @@ def optim_lenet(model: nn.Module) -> dict:
     num_classes=datamodule.num_classes,
     loss=loss,
     optim_recipe=optim_lenet(model),
+    is_ensemble=True
 )
 
 # %%
@@ -125,8 +126,10 @@ def optim_lenet(model: nn.Module) -> dict:
 # 6. Testing the Model
 # ~~~~~~~~~~~~~~~~~~~~
 #
-# Now that the model is trained, let's test it on MNIST
-
+# Now that the model is trained, let's test it on MNIST.
+# Please note that we apply a reshape to the logits to determine the dimension corresponding to the ensemble
+# and to the batch. As for TorchUncertainty 2.0, the ensemble dimension is merged with the batch dimension
+# in this order (num_estimator x batch, classes).
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
@@ -148,14 +151,22 @@ def imshow(img):
 imshow(torchvision.utils.make_grid(images[:4, ...]))
 print("Ground truth: ", " ".join(f"{labels[j]}" for j in range(4)))
 
-logits = model(images)
+# Put the model in eval mode to use several samples
+model = model.eval()
+logits = model(images).reshape(16, 128, 10) # num_estimators, batch_size, num_classes
+
+# We apply the softmax on the classes and average over the estimators
 probs = torch.nn.functional.softmax(logits, dim=-1)
+avg_probs = probs.mean(dim=0)
+var_probs = probs.std(dim=0)
 
-_, predicted = torch.max(probs, 1)
+_, predicted = torch.max(avg_probs, 1)
 
 print("Predicted digits: ", " ".join(f"{predicted[j]}" for j in range(4)))
-
+print("Std. dev. of the scores over the posterior samples", " ".join(f"{var_probs[j][predicted[j]]:.3}" for j in range(4)))
 # %%
+# The scores should be quite certain.
+#
 # References
 # ----------
 #

diff --git a/auto_tutorials_source/tutorial_mc_batch_norm.py b/auto_tutorials_source/tutorial_mc_batch_norm.py
@@ -102,6 +102,9 @@
 # .eval() to enable Monte Carlo batch normalization at inference.
 # In this tutorial, we plot the most uncertain images, i.e. the images for which
 # the variance of the predictions is the highest.
+# Please note that we apply a reshape to the logits to determine the dimension corresponding to the ensemble
+# and to the batch. As for TorchUncertainty 2.0, the ensemble dimension is merged with the batch dimension
+# in this order (num_estimator x batch, classes).
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -121,7 +124,7 @@ def imshow(img):
 images, labels = next(dataiter)
 
 routine.eval()
-logits = routine(images).reshape(8, 128, 10)
+logits = routine(images).reshape(8, 128, 10)  # num_estimators, batch_size, num_classes
 
 probs = torch.nn.functional.softmax(logits, dim=-1)
 most_uncertain = sorted(probs.var(0).sum(-1).topk(4).indices)

diff --git a/auto_tutorials_source/tutorial_mc_dropout.py b/auto_tutorials_source/tutorial_mc_dropout.py
@@ -51,7 +51,8 @@
 # dataloaders and transforms. We create the model using the
 # blueprint from torch_uncertainty.models and we wrap it into mc_dropout.
 #
-# It is important to specify the arguments,``num_estimators`` and the ``dropout_rate``
+# It is important to specify the arguments,``num_estimators``
+# and the ``dropout_rate``
 # to use Monte Carlo dropout.
 
 trainer = Trainer(accelerator="cpu", max_epochs=2, enable_progress_bar=False)
@@ -64,7 +65,7 @@
 model = lenet(
     in_channels=datamodule.num_channels,
     num_classes=datamodule.num_classes,
-    dropout_rate=0.4,
+    dropout_rate=0.5,
 )
 
 mc_model = mc_dropout(model, num_estimators=16, last_layer=False)
@@ -118,22 +119,22 @@ def imshow(img):
 images, labels = next(dataiter)
 
 # print images
-imshow(torchvision.utils.make_grid(images[:4, ...]))
-print("Ground truth: ", " ".join(f"{labels[j]}" for j in range(4)))
+imshow(torchvision.utils.make_grid(images[:6, ...], padding=0))
+print("Ground truth labels: ", " ".join(f"{labels[j]}" for j in range(6)))
 
 routine.eval()
 logits = routine(images).reshape(16, 128, 10)
 
 probs = torch.nn.functional.softmax(logits, dim=-1)
 
 
-for j in range(4):
+for j in range(6):
     values, predicted = torch.max(probs[:, j], 1)
     print(
         f"Predicted digits for the image {j+1}: ",
         " ".join([str(image_id.item()) for image_id in predicted]),
     )
 
 # %%
-# We see that there is some disagreement between the samples of the dropout
+# Most of the time, we see that there is some disagreement between the samples of the dropout
 # approximation of the posterior distribution.
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -153,23 +153,21 @@ Models
 
 .. currentmodule:: torch_uncertainty.models
 
-Deep Ensembles
-^^^^^^^^^^^^^^
+Wrappers
+^^^^^^^^
 
 .. autosummary::
     :toctree: generated/
     :nosignatures:
     :template: class.rst
 
     deep_ensembles
-
-Monte Carlo Dropout
-
-.. autosummary::
-    :toctree: generated/
-    :nosignatures:
-    :template: class.rst
-
+    CheckpointEnsemble
+    EMA
+    StochasticModel
+    SWA
+    SWAG
+    MCDropout
     mc_dropout
 
 Metrics
@@ -242,6 +240,16 @@ Post-Processing Methods
 
 .. currentmodule:: torch_uncertainty.post_processing
 
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+    :template: class_inherited.rst
+    MCBatchNorm
+    LaplaceApprox
+
+Scaling Methods
+^^^^^^^^^^^^^^^
+
 .. autosummary::
     :toctree: generated/
     :nosignatures:
@@ -250,7 +258,6 @@ Post-Processing Methods
     TemperatureScaler
     VectorScaler
     MatrixScaler
-    MCBatchNorm
 
 Datamodules
 -----------

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -32,7 +32,7 @@ To install TorchUncertainty with contribution in mind, check the
 -----
 
 Official Implementations
-^^^^^^^^^^^^^^^^^^^^^^^^ 
+^^^^^^^^^^^^^^^^^^^^^^^^
 
 TorchUncertainty also houses multiple official implementations of papers from major conferences & journals.
 
@@ -56,14 +56,6 @@ TorchUncertainty also houses multiple official implementations of papers from ma
 * Authors: *Gianni Franchi, Xuanlong Yu, Andrei Bursuc, Angel Tena, Rémi Kazmierczak, Séverine Dubuisson, Emanuel Aldea, David Filliat*
 * Paper: `BMVC 2022 <https://arxiv.org/abs/2203.01437>`_.
 
-Packed-Ensembles
-^^^^^^^^^^^^^^^^
-
-**Packed-Ensembles for Efficient Uncertainty Estimation**
-
-* Authors: *Olivier Laurent, Adrien Lafage, Enzo Tartaglione, Geoffrey Daniel, Jean-Marc Martinez, Andrei Bursuc, and Gianni Franchi*
-* Paper: `here <https://arxiv.org/abs/2210.09184>`_.
-
 .. toctree::
    :maxdepth: 2
    :caption: Contents:

diff --git a/docs/source/references.rst b/docs/source/references.rst
@@ -41,10 +41,10 @@ For Deep Evidential Regression, consider citing:
 * Paper: `NeurIPS 2020 <https://arxiv.org/pdf/1910.02600>`__.
 
 
-Bayesian Neural Networks
-^^^^^^^^^^^^^^^^^^^^^^^^
+Variational Inference Bayesian Neural Networks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-For Bayesian Neural Networks, consider citing:
+For Variational Inference Bayesian Neural Networks, consider citing:
 
 **Weight Uncertainty in Neural Networks**
 
@@ -73,6 +73,36 @@ For Monte-Carlo Dropout, consider citing:
 * Authors: *Yarin Gal and Zoubin Ghahramani*
 * Paper: `ICML 2016 <https://arxiv.org/pdf/1506.02142.pdf>`__.
 
+Stochastic Weight Averaging
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For Stochastic Weight Averaging, consider citing:
+
+**Averaging Weights Leads to Wider Optima and Better Generalization**
+
+* Authors: *Pavel Izmailov, Dmitrii Podoprikhin, Timur Garipov, Dmitry Vetrov, Andrew Gordon Wilson*
+* Paper: `UAI 2018 <https://arxiv.org/pdf/1803.05407.pdf>`__.
+
+Stochastic Weight Averaging Gaussian
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For Stochastic Weight Averaging Gaussian, consider citing:
+
+**A simple baseline for Bayesian uncertainty in deep learning**
+
+* Authors: *Wesley Maddox, Timur Garipov, Pavel Izmailov, Dmitry Vetrov, Andrew Gordon Wilson*
+* Paper: `NeurIPS 2019 <https://arxiv.org/pdf/1902.02476.pdf>`__.
+
+
+CheckpointEnsemble
+^^^^^^^^^^^^^^^^^^
+
+For CheckpointEnsemble, consider citing:
+
+**Checkpoint Ensembles: Ensemble Methods from a Single Training Process**
+
+* Authors: *Hugh Chen, Scott Lundberg, Su-In Lee*
+* Paper: `ArXiv <https://arxiv.org/pdf/1710.03282>`__.
 
 BatchEnsemble
 ^^^^^^^^^^^^^
@@ -193,6 +223,16 @@ For Monte-Carlo Batch Normalization, consider citing:
 * Authors: *Mathias Teye, Hossein Azizpour, and Kevin Smith*
 * Paper: `ICML 2018 <https://arxiv.org/pdf/1802.06455.pdf>`__.
 
+Laplace Approximation
+^^^^^^^^^^^^^^^^^^^^^
+
+For Laplace Approximation, consider citing:
+
+**Laplace Redux - Effortless Bayesian Deep Learning**
+
+* Authors: *Erik Daxberger, Agustinus Kristiadi, Alexander Immer, Runa Eschenhagen, Matthias Bauer, Philipp Hennig*
+* Paper: `NeurIPS 2021 <https://arxiv.org/abs/2106.14806>`__.
+
 Metrics
 -------
 

diff --git a/experiments/classification/mnist/configs/lenet.yaml b/experiments/classification/mnist/configs/lenet.yaml
@@ -0,0 +1,59 @@
+# lightning.pytorch==2.1.3
+seed_everything: false
+eval_after_fit: true
+trainer:
+  accelerator: gpu
+  devices: 1
+  precision: 16-mixed
+  max_epochs: 75
+  logger:
+    class_path: lightning.pytorch.loggers.TensorBoardLogger
+    init_args:
+      save_dir: logs/lenet
+      name: standard
+      default_hp_metric: false
+  callbacks:
+  - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+    init_args:
+      monitor: val/cls/Acc
+      mode: max
+      save_last: true
+  - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+    init_args:
+      logging_interval: step
+  - class_path: lightning.pytorch.callbacks.EarlyStopping
+    init_args:
+      monitor: val/cls/Acc
+      patience: 1000
+      check_finite: true
+model:
+  model:
+    class_path: torch_uncertainty.models.lenet._LeNet
+    init_args:
+      in_channels: 1
+      num_classes: 10
+      linear_layer: torch.nn.Linear
+      conv2d_layer: torch.nn.Conv2d
+      activation: torch.nn.ReLU
+      norm: torch.nn.Identity
+      groups: 1
+      dropout_rate: 0
+      last_layer_dropout: false
+      layer_args: {}
+  num_classes: 10
+  loss: CrossEntropyLoss
+data:
+  root: ./data
+  batch_size: 128
+optimizer:
+  lr: 0.05
+  momentum: 0.9
+  weight_decay: 5e-4
+  nesterov: true
+lr_scheduler:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+    - 25
+    - 50
+    gamma: 0.1