diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/batch_ensemble.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/batch_ensemble.yaml new file mode 100644 index 00000000..d61d83da --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/batch_ensemble.yaml @@ -0,0 +1,53 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/normal + name: batch_ensembles + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.batched_mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.RepeatTarget + init_args: + num_repeats: 5 +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/bayesian.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/bayesian.yaml new file mode 100644 index 00000000..5bf28c99 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/bayesian.yaml @@ -0,0 +1,55 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/normal + name: bayesian + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.bayesian_mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_samples: 10 + output_dim: 1 + loss: + class_path: torch_uncertainty.losses.ELBOLoss + init_args: + kl_weight: 0.00002 + inner_loss: torch_uncertainty.losses.DistributionNLLLoss + num_samples: 3 + dist_family: normal + dist_family: normal + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/cauchy.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/cauchy.yaml new file mode 100644 index 00000000..3ae62149 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/cauchy.yaml @@ -0,0 +1,49 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/cauchy + name: standard + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: cauchy + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: cauchy + dist_estimate: mode + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/deep_ensembles.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/deep_ensembles.yaml new file mode 100644 index 00000000..2d09a4d9 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/deep_ensembles.yaml @@ -0,0 +1,58 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/normal + name: deep_ensembles + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.deep_ensembles + init_args: + models: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + task: regression + probabilistic: true + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.RepeatTarget + init_args: + num_repeats: 5 +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/laplace.yaml index 32d25086..6d67f52c 100644 --- a/experiments/regression/uci_datasets/configs/boston/mlp/laplace.yaml +++ b/experiments/regression/uci_datasets/configs/boston/mlp/laplace.yaml @@ -21,21 +21,28 @@ trainer: logging_interval: step - class_path: lightning.pytorch.callbacks.EarlyStopping init_args: - monitor: val/reg/NLL + monitor: val/reg/MSE patience: 1000 check_finite: true model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: laplace output_dim: 1 - in_features: 13 - hidden_dims: - - 50 loss: torch_uncertainty.losses.DistributionNLLLoss - version: std - distribution: laplace + dist_family: laplace + save_in_csv: true data: root: ./data batch_size: 128 dataset_name: boston optimizer: - lr: 5e-3 - weight_decay: 0 + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/masksemble.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/masksemble.yaml new file mode 100644 index 00000000..30c57545 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/masksemble.yaml @@ -0,0 +1,54 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/normal + name: masksemble + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.masked_mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + scale: 2.0 + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.RepeatTarget + init_args: + num_repeats: 5 +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/mc_dropout.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/mc_dropout.yaml new file mode 100644 index 00000000..5d31aeec --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/mc_dropout.yaml @@ -0,0 +1,56 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/normal + name: mc_dropout + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mc_dropout + init_args: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + dropout_rate: 0.1 + num_estimators: 10 + on_batch: false + task: regression + probabilistic: true + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/mimo.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/mimo.yaml new file mode 100644 index 00000000..b794362b --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/mimo.yaml @@ -0,0 +1,55 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/normal + name: mimo + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.mimo_mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.MIMOBatchFormat + init_args: + num_estimators: 5 + rho: 0.5 + batch_repeat: 5 +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/normal.yaml index a7563f94..5c9653ac 100644 --- a/experiments/regression/uci_datasets/configs/boston/mlp/normal.yaml +++ b/experiments/regression/uci_datasets/configs/boston/mlp/normal.yaml @@ -21,21 +21,28 @@ trainer: logging_interval: step - class_path: lightning.pytorch.callbacks.EarlyStopping init_args: - monitor: val/reg/NLL + monitor: val/reg/MSE patience: 1000 check_finite: true model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal output_dim: 1 - in_features: 13 - hidden_dims: - - 50 loss: torch_uncertainty.losses.DistributionNLLLoss - version: std - distribution: normal + dist_family: normal + save_in_csv: true data: root: ./data batch_size: 128 dataset_name: boston optimizer: - lr: 5e-3 - weight_decay: 0 + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/packed_ensembles.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/packed_ensembles.yaml new file mode 100644 index 00000000..b9d86b9b --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/packed_ensembles.yaml @@ -0,0 +1,54 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/normal + name: packed_ensembles + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.packed_mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + alpha: 4 + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.RepeatTarget + init_args: + num_repeats: 5 +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/point_wise.yaml index ff8111fd..3fc763cc 100644 --- a/experiments/regression/uci_datasets/configs/boston/mlp/point_wise.yaml +++ b/experiments/regression/uci_datasets/configs/boston/mlp/point_wise.yaml @@ -23,16 +23,22 @@ trainer: patience: 1000 check_finite: true model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 output_dim: 1 - in_features: 13 - hidden_dims: - - 50 loss: MSELoss - version: std + save_in_csv: true data: root: ./data batch_size: 128 dataset_name: boston optimizer: - lr: 5e-3 - weight_decay: 0 + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/boston/mlp/student.yaml b/experiments/regression/uci_datasets/configs/boston/mlp/student.yaml new file mode 100644 index 00000000..d321d4c9 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/boston/mlp/student.yaml @@ -0,0 +1,48 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/boston/mlp/student + name: standard + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 13 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: student + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: student + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: boston +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/batch_ensemble.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/batch_ensemble.yaml new file mode 100644 index 00000000..a29727d4 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/batch_ensemble.yaml @@ -0,0 +1,53 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/normal + name: batch_ensembles + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.batched_mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.RepeatTarget + init_args: + num_repeats: 5 +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/bayesian.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/bayesian.yaml new file mode 100644 index 00000000..17059dc1 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/bayesian.yaml @@ -0,0 +1,55 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/normal + name: bayesian + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.bayesian_mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_samples: 10 + output_dim: 1 + loss: + class_path: torch_uncertainty.losses.ELBOLoss + init_args: + kl_weight: 0.00002 + inner_loss: torch_uncertainty.losses.DistributionNLLLoss + num_samples: 3 + dist_family: normal + dist_family: normal + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/cauchy.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/cauchy.yaml new file mode 100644 index 00000000..b0006b07 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/cauchy.yaml @@ -0,0 +1,47 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/cauchy + name: standard + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: cauchy + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: cauchy + dist_estimate: mode + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/deep_ensembles.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/deep_ensembles.yaml new file mode 100644 index 00000000..3e87af5e --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/deep_ensembles.yaml @@ -0,0 +1,58 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/normal + name: deep_ensembles + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.deep_ensembles + init_args: + models: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + task: regression + probabilistic: true + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.RepeatTarget + init_args: + num_repeats: 5 +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/laplace.yaml index 9bdfa9f2..6c575a67 100644 --- a/experiments/regression/uci_datasets/configs/concrete/mlp/laplace.yaml +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/laplace.yaml @@ -14,28 +14,33 @@ trainer: default_hp_metric: false callbacks: - class_path: torch_uncertainty.callbacks.TURegCheckpoint - init_args: - probabilistic: true - class_path: lightning.pytorch.callbacks.LearningRateMonitor init_args: logging_interval: step - class_path: lightning.pytorch.callbacks.EarlyStopping init_args: - monitor: val/reg/NLL + monitor: val/reg/MSE patience: 1000 check_finite: true model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: laplace output_dim: 1 - in_features: 8 - hidden_dims: - - 50 loss: torch_uncertainty.losses.DistributionNLLLoss - version: std - distribution: laplace + dist_family: laplace + save_in_csv: true data: root: ./data batch_size: 128 dataset_name: concrete optimizer: - lr: 5e-3 - weight_decay: 0 + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/mc_dropout.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/mc_dropout.yaml new file mode 100644 index 00000000..2fb02790 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/mc_dropout.yaml @@ -0,0 +1,56 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/normal + name: mc_dropout + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mc_dropout + init_args: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + dropout_rate: 0.1 + num_estimators: 10 + on_batch: false + task: regression + probabilistic: true + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/mimo.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/mimo.yaml new file mode 100644 index 00000000..8ffe9954 --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/mimo.yaml @@ -0,0 +1,55 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/normal + name: mimo + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.mimo_mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.MIMOBatchFormat + init_args: + num_estimators: 5 + rho: 0.5 + batch_repeat: 5 +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/normal.yaml index bcc61343..436f9ffa 100644 --- a/experiments/regression/uci_datasets/configs/concrete/mlp/normal.yaml +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/normal.yaml @@ -14,28 +14,33 @@ trainer: default_hp_metric: false callbacks: - class_path: torch_uncertainty.callbacks.TURegCheckpoint - init_args: - probabilistic: true - class_path: lightning.pytorch.callbacks.LearningRateMonitor init_args: logging_interval: step - class_path: lightning.pytorch.callbacks.EarlyStopping init_args: - monitor: val/reg/NLL + monitor: val/reg/MSE patience: 1000 check_finite: true model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal output_dim: 1 - in_features: 8 - hidden_dims: - - 50 loss: torch_uncertainty.losses.DistributionNLLLoss - version: std - distribution: normal + dist_family: normal + save_in_csv: true data: root: ./data batch_size: 128 dataset_name: concrete optimizer: - lr: 5e-3 - weight_decay: 0 + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/packed_ensembles.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/packed_ensembles.yaml new file mode 100644 index 00000000..206c488c --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/packed_ensembles.yaml @@ -0,0 +1,54 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/normal + name: packed_ensembles + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + init_args: + probabilistic: true + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.packed_mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: normal + num_estimators: 5 + alpha: 4 + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: normal + save_in_csv: true + format_batch_fn: + class_path: torch_uncertainty.transforms.RepeatTarget + init_args: + num_repeats: 5 +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/point_wise.yaml index 05cfd417..0fe33248 100644 --- a/experiments/regression/uci_datasets/configs/concrete/mlp/point_wise.yaml +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/point_wise.yaml @@ -23,16 +23,22 @@ trainer: patience: 1000 check_finite: true model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 output_dim: 1 - in_features: 8 - hidden_dims: - - 50 loss: MSELoss - version: std + save_in_csv: true data: root: ./data batch_size: 128 dataset_name: concrete optimizer: - lr: 5e-3 - weight_decay: 0 + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/configs/concrete/mlp/student.yaml b/experiments/regression/uci_datasets/configs/concrete/mlp/student.yaml new file mode 100644 index 00000000..730ca51b --- /dev/null +++ b/experiments/regression/uci_datasets/configs/concrete/mlp/student.yaml @@ -0,0 +1,46 @@ +# lightning.pytorch==2.1.3 +seed_everything: false +eval_after_fit: true +trainer: + accelerator: gpu + devices: 1 + precision: 16-mixed + max_epochs: 40 + logger: + class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: logs/concrete/mlp/student + name: standard + default_hp_metric: false + callbacks: + - class_path: torch_uncertainty.callbacks.TURegCheckpoint + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: step + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/reg/MSE + patience: 1000 + check_finite: true +model: + model: + class_path: torch_uncertainty.models.mlp.mlp + init_args: + in_features: 8 + num_outputs: 1 + hidden_dims: + - 50 + dist_family: student + output_dim: 1 + loss: torch_uncertainty.losses.DistributionNLLLoss + dist_family: student + save_in_csv: true +data: + root: ./data + batch_size: 128 + dataset_name: concrete +optimizer: + class_path: torch.optim.Adam + init_args: + lr: 5e-3 + weight_decay: 0 diff --git a/experiments/regression/uci_datasets/main.py b/experiments/regression/uci_datasets/main.py new file mode 100644 index 00000000..281764d6 --- /dev/null +++ b/experiments/regression/uci_datasets/main.py @@ -0,0 +1,20 @@ +import torch + +from torch_uncertainty import TULightningCLI +from torch_uncertainty.datamodules import UCIRegressionDataModule +from torch_uncertainty.routines import RegressionRoutine + + +def cli_main() -> TULightningCLI: + return TULightningCLI(RegressionRoutine, UCIRegressionDataModule) + + +if __name__ == "__main__": + torch.set_float32_matmul_precision("medium") + cli = cli_main() + if ( + (not cli.trainer.fast_dev_run) + and cli.subcommand == "fit" + and cli._get(cli.config, "eval_after_fit") + ): + cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best") diff --git a/pyproject.toml b/pyproject.toml index aa5e6822..a9412237 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ experiments = ["tensorboard", "huggingface-hub>=0.31", "safetensors"] image = ["kornia", "h5py", "opencv-python"] tabular = ["pandas"] +distribution = ["scipy"] dev = [ "torch_uncertainty[experiments,image]", "ruff==0.11.10", @@ -61,10 +62,9 @@ docs = [ "sphinxcontrib-sass", ] all = [ - "torch_uncertainty[dev,docs,tabular]", + "torch_uncertainty[dev,docs,tabular,distribution]", "scikit-learn", "laplace-torch", - "scipy", "glest==0.0.1a1", ] diff --git a/tests/models/wrappers/test_mc_dropout.py b/tests/models/wrappers/test_mc_dropout.py index b9670718..c48da927 100644 --- a/tests/models/wrappers/test_mc_dropout.py +++ b/tests/models/wrappers/test_mc_dropout.py @@ -2,7 +2,7 @@ import torch from tests._dummies.model import dummy_model -from torch_uncertainty.models import MCDropout, mc_dropout +from torch_uncertainty.models import mc_dropout class TestMCDropout: @@ -36,7 +36,7 @@ def test_mc_dropout_errors(self) -> None: model = dummy_model(10, 5, 0.1) with pytest.raises(ValueError, match="`num_estimators` must be strictly positive"): - MCDropout(model=model, num_estimators=-1, last_layer=True, on_batch=True) + mc_dropout(model=model, num_estimators=-1, last_layer=True, on_batch=True) dropout_model = mc_dropout(model, 5) with pytest.raises(TypeError, match="Training mode is expected to be boolean"): diff --git a/tests/test_utils.py b/tests/test_utils.py index 1771f840..215e353e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -15,6 +15,7 @@ hub, plot_hist, ) +from torch_uncertainty.utils.distributions import TUStudentT class TestUtils: @@ -91,7 +92,7 @@ def test_get_dist_class(self) -> None: dist = distributions.get_dist_class("cauchy") assert dist == torch.distributions.Cauchy dist = distributions.get_dist_class("student") - assert dist == torch.distributions.StudentT + assert dist == TUStudentT def test_get_dist_estimate(self) -> None: dist = torch.distributions.Normal(0.0, 1.0) diff --git a/torch_uncertainty/baselines/regression/mlp.py b/torch_uncertainty/baselines/regression/mlp.py index c677466a..82cb8e9a 100644 --- a/torch_uncertainty/baselines/regression/mlp.py +++ b/torch_uncertainty/baselines/regression/mlp.py @@ -60,5 +60,6 @@ def __init__( dist_family=dist_family, is_ensemble=version in ENSEMBLE_METHODS, format_batch_fn=format_batch_fn, + save_in_csv=True, ) self.save_hyperparameters(ignore=["loss"]) diff --git a/torch_uncertainty/callbacks/checkpoint.py b/torch_uncertainty/callbacks/checkpoint.py index d6075a43..3ff7df30 100644 --- a/torch_uncertainty/callbacks/checkpoint.py +++ b/torch_uncertainty/callbacks/checkpoint.py @@ -151,6 +151,12 @@ def __init__(self, probabilistic: bool = False): mode="min", auto_insert_metric_name=False, ) + self.callbacks["qce"] = ModelCheckpoint( + filename="epoch={epoch}-step={step}-val_qce={val/cal/QCE:.3f}", + monitor="val/cal/QCE", + mode="min", + auto_insert_metric_name=False, + ) @property def best_model_path(self) -> str: diff --git a/torch_uncertainty/datasets/regression/uci_regression.py b/torch_uncertainty/datasets/regression/uci_regression.py index 840b5aa3..ba94db53 100644 --- a/torch_uncertainty/datasets/regression/uci_regression.py +++ b/torch_uncertainty/datasets/regression/uci_regression.py @@ -233,7 +233,7 @@ def _make_dataset(self) -> None: names=boston_column_names, header=None, sep=r"\s+", - ) + ).to_numpy() elif self.dataset_name == "concrete": array = pd.read_excel(path / "Concrete_Data.xls").to_numpy() elif self.dataset_name == "energy-efficiency": diff --git a/torch_uncertainty/metrics/__init__.py b/torch_uncertainty/metrics/__init__.py index 92cd7252..2086d999 100644 --- a/torch_uncertainty/metrics/__init__.py +++ b/torch_uncertainty/metrics/__init__.py @@ -27,6 +27,7 @@ MeanGTRelativeSquaredError, MeanSquaredErrorInverse, MeanSquaredLogError, + QuantileCalibrationError, SILog, ThresholdAccuracy, ) diff --git a/torch_uncertainty/metrics/regression/__init__.py b/torch_uncertainty/metrics/regression/__init__.py index 262641f9..b7253686 100644 --- a/torch_uncertainty/metrics/regression/__init__.py +++ b/torch_uncertainty/metrics/regression/__init__.py @@ -3,6 +3,7 @@ from .log10 import Log10 from .mse_log import MeanSquaredLogError from .nll import DistributionNLL +from .quantile_calibration import QuantileCalibrationError from .relative_error import ( MeanGTRelativeAbsoluteError, MeanGTRelativeSquaredError, diff --git a/torch_uncertainty/metrics/regression/quantile_calibration.py b/torch_uncertainty/metrics/regression/quantile_calibration.py new file mode 100644 index 00000000..1e38d4ad --- /dev/null +++ b/torch_uncertainty/metrics/regression/quantile_calibration.py @@ -0,0 +1,103 @@ +import warnings + +import torch +from torch import Tensor +from torch.distributions import Distribution, Independent +from torchmetrics.classification import BinaryCalibrationError +from torchmetrics.functional.classification.calibration_error import ( + _binning_bucketize, +) +from torchmetrics.utilities.data import dim_zero_cat +from torchmetrics.utilities.plot import _PLOT_OUT_TYPE + +from torch_uncertainty.metrics.classification.calibration_error import reliability_chart + + +class QuantileCalibrationError(BinaryCalibrationError): + def __init__(self, num_bins=15, norm="l1", ignore_index=None, validate_args=True, **kwargs): + super().__init__(num_bins, norm, ignore_index, validate_args, **kwargs) + self.conf_intervals = torch.linspace(0.05, 0.95, self.n_bins + 1) + + self.not_implemented_error = False + + def update( + self, + dist: Distribution, + target: Tensor, + padding_mask: Tensor | None = None, + ) -> None: + reduce_last_dim = False + if isinstance(dist, Independent): + iid_dist = dist.base_dist + reduce_last_dim = True + else: + iid_dist = dist + + try: + iid_dist.icdf((1 - self.conf_intervals[0]) / 2) + + except NotImplementedError: + warnings.warn( + "The distribution does not support the `icdf()` method. " + "This metric will therefore return `nan` values. " + "Please use a distribution that implements `icdf()`.", + UserWarning, + stacklevel=2, + ) + self.not_implemented_error = True + return + + confidences = self.conf_intervals.expand(*dist.batch_shape, -1) + corrects = torch.empty_like(confidences) + + for i, conf in enumerate(self.conf_intervals): + b_min = iid_dist.icdf((1 - conf) / 2) + bound_log_prob = iid_dist.log_prob(b_min) + target_log_prob = dist.log_prob(target) + if reduce_last_dim: + bound_log_prob = bound_log_prob.sum(dim=-1) + corrects[..., i] = bound_log_prob <= target_log_prob + else: + corrects[..., i] = (bound_log_prob <= target_log_prob).prod(dim=-1) + + if padding_mask is not None: + confidences = confidences[~padding_mask] + corrects = corrects[~padding_mask] + + super().update(confidences.flatten(), corrects.flatten()) + + def compute(self) -> Tensor: + if self.not_implemented_error: + return torch.tensor(float("nan")) + return super().compute() + + def plot(self) -> _PLOT_OUT_TYPE: + confidences = dim_zero_cat(self.confidences) + accuracies = dim_zero_cat(self.accuracies) + + bin_boundaries = torch.linspace( + 0, + 1, + self.n_bins + 1, + dtype=torch.float, + device=confidences.device, + ) + + with torch.no_grad(): + acc_bin, conf_bin, prop_bin = _binning_bucketize( + confidences, accuracies, bin_boundaries + ) + + np_acc_bin = acc_bin.cpu().numpy() + np_conf_bin = conf_bin.cpu().numpy() + np_prop_bin = prop_bin.cpu().numpy() + np_bin_boundaries = bin_boundaries.cpu().numpy() + + return reliability_chart( + accuracies=accuracies.cpu().numpy(), + confidences=confidences.cpu().numpy(), + bin_accuracies=np_acc_bin, + bin_confidences=np_conf_bin, + bin_sizes=np_prop_bin, + bins=np_bin_boundaries, + ) diff --git a/torch_uncertainty/models/__init__.py b/torch_uncertainty/models/__init__.py index 688fe11d..2b0659bd 100644 --- a/torch_uncertainty/models/__init__.py +++ b/torch_uncertainty/models/__init__.py @@ -27,7 +27,6 @@ SWAG, BatchEnsemble, CheckpointCollector, - MCDropout, StochasticModel, Zero, batch_ensemble, diff --git a/torch_uncertainty/models/mlp.py b/torch_uncertainty/models/mlp.py index 2f67a570..740c5655 100644 --- a/torch_uncertainty/models/mlp.py +++ b/torch_uncertainty/models/mlp.py @@ -1,11 +1,11 @@ from collections.abc import Callable import torch.nn.functional as F +from einops import rearrange, repeat from torch import Tensor, nn -from torch_uncertainty.layers.bayesian import BayesLinear +from torch_uncertainty.layers import BatchLinear, BayesLinear, MaskedLinear, PackedLinear from torch_uncertainty.layers.distributions import get_dist_linear_layer -from torch_uncertainty.layers.packed import PackedLinear from torch_uncertainty.models import StochasticModel __all__ = ["bayesian_mlp", "mlp", "packed_mlp"] @@ -84,11 +84,16 @@ def __init__( in_features=hidden_dims[-1], out_features=num_outputs, **layer_args ) + self.layer_type = layer + self.layer_args = layer_args + self.probabilistic = dist_family is not None self.layers = layers self.fc_dropout = nn.Dropout(p=dropout_rate) self.last_fc_dropout = nn.Dropout(p=dropout_rate) def forward(self, x: Tensor) -> Tensor | dict[str, Tensor]: + if self.layer_type == BatchLinear or self.layer_type == MaskedLinear: + x = repeat(x, "b ... -> (m b) ...", m=self.layer_args["num_estimators"]) x = x.flatten(self.flatten_start_dim) for i, layer in enumerate(self.layers): dropout = self.fc_dropout if i < len(self.layers) - 1 else self.last_fc_dropout @@ -97,6 +102,57 @@ def forward(self, x: Tensor) -> Tensor | dict[str, Tensor]: return self.final_layer(x) +class _MIMOMLP(_MLP): + def __init__( + self, + in_features: int, + num_outputs: int, + hidden_dims: list[int], + num_estimators: int, + layer: type[nn.Module], + activation: Callable, + layer_args: dict, + dropout_rate: float, + dist_family: str | None, + dist_args: dict, + flatten_start_dim: int, + ) -> None: + super().__init__( + in_features=in_features * num_estimators, + num_outputs=num_outputs * num_estimators, + hidden_dims=hidden_dims, + layer=layer, + activation=activation, + layer_args=layer_args, + dropout_rate=dropout_rate, + dist_family=dist_family, + dist_args=dist_args, + flatten_start_dim=flatten_start_dim, + ) + self.num_estimators = num_estimators + + def forward(self, x: Tensor) -> Tensor | dict[str, Tensor]: + if not self.training: + x = repeat(x, "b ... -> (m b) ...", m=self.num_estimators) + x = rearrange(x, "(m b) ... c -> b ... (m c)", m=self.num_estimators) + out = super().forward(x) + if self.probabilistic: + if not self.training: + out = { + k: rearrange(v, "b ... (m c) -> b m ... c", m=self.num_estimators).mean(1) + for k, v in out.items() + } + else: + out = { + k: rearrange(v, "b ... (m c) -> (m b) ... c", m=self.num_estimators) + for k, v in out.items() + } + + return out + + return rearrange(out, "b ... (m c) -> (m b) ... c", m=self.num_estimators) + + def _mlp( stochastic: bool, in_features: int, @@ -124,7 +180,9 @@ def _mlp( flatten_start_dim=flatten_start_dim, ) if stochastic: - return StochasticModel(model, num_samples) + return StochasticModel( + model=model, num_samples=num_samples, probabilistic=dist_family is not None + ) return model @@ -175,7 +233,7 @@ def packed_mlp( hidden_dims: list[int], num_estimators: int = 4, alpha: float = 2, - gamma: float = 1, + gamma: int = 1, activation: Callable = F.relu, dropout_rate: float = 0.0, dist_family: str | None = None, @@ -202,6 +260,35 @@ def packed_mlp( ) +def batched_mlp( + in_features: int, + num_outputs: int, + hidden_dims: list[int], + num_estimators: int = 4, + activation: Callable = F.relu, + dropout_rate: float = 0.0, + dist_family: str | None = None, + dist_args: dict | None = None, + flatten_start_dim: int = -1, +) -> _MLP: + layer_args = { + "num_estimators": num_estimators, + } + return _mlp( + stochastic=False, + in_features=in_features, + num_outputs=num_outputs, + hidden_dims=hidden_dims, + layer=BatchLinear, + activation=activation, + layer_args=layer_args, + dropout_rate=dropout_rate, + dist_family=dist_family, + dist_args=dist_args, + flatten_start_dim=flatten_start_dim, + ) + + def bayesian_mlp( in_features: int, num_outputs: int, @@ -226,3 +313,29 @@ def bayesian_mlp( dist_args=dist_args, flatten_start_dim=flatten_start_dim, ) + + +def mimo_mlp( + in_features: int, + num_outputs: int, + hidden_dims: list[int], + num_estimators: int, + activation: Callable = F.relu, + dropout_rate: float = 0.0, + dist_family: str | None = None, + dist_args: dict | None = None, + flatten_start_dim: int = -1, +) -> _MIMOMLP: + return _MIMOMLP( + in_features=in_features, + num_outputs=num_outputs, + hidden_dims=hidden_dims, + num_estimators=num_estimators, + layer=nn.Linear, + activation=activation, + layer_args={}, + dropout_rate=dropout_rate, + dist_family=dist_family, + dist_args=dist_args or {}, + flatten_start_dim=flatten_start_dim, + ) diff --git a/torch_uncertainty/models/segmentation/unet/mimo.py b/torch_uncertainty/models/segmentation/unet/mimo.py index 86f47044..57697e06 100644 --- a/torch_uncertainty/models/segmentation/unet/mimo.py +++ b/torch_uncertainty/models/segmentation/unet/mimo.py @@ -26,8 +26,8 @@ def __init__( def forward(self, x: Tensor) -> Tensor: if not self.training: x = repeat(x, "b ... -> (m b) ...", m=self.num_estimators) - out = rearrange(x, "(m b) c ... -> b (m c) ...", m=self.num_estimators) - out = super().forward(out) + x = rearrange(x, "(m b) c ... -> b (m c) ...", m=self.num_estimators) + out = super().forward(x) return rearrange(out, "b (m c) ... -> (m b) c ...", m=self.num_estimators) diff --git a/torch_uncertainty/models/wrappers/__init__.py b/torch_uncertainty/models/wrappers/__init__.py index 53aee470..e86210db 100644 --- a/torch_uncertainty/models/wrappers/__init__.py +++ b/torch_uncertainty/models/wrappers/__init__.py @@ -5,7 +5,7 @@ ) from .deep_ensembles import deep_ensembles from .ema import EMA -from .mc_dropout import MCDropout, mc_dropout +from .mc_dropout import mc_dropout from .stochastic import StochasticModel from .swa import SWA from .swag import SWAG diff --git a/torch_uncertainty/models/wrappers/mc_dropout.py b/torch_uncertainty/models/wrappers/mc_dropout.py index 0bbe1377..4ce8bb07 100644 --- a/torch_uncertainty/models/wrappers/mc_dropout.py +++ b/torch_uncertainty/models/wrappers/mc_dropout.py @@ -1,10 +1,12 @@ +from typing import Literal + import torch from einops import repeat from torch import Tensor, nn from torch.nn.modules.dropout import _DropoutNd -class MCDropout(nn.Module): +class _MCDropout(nn.Module): def __init__( self, model: nn.Module, @@ -90,12 +92,63 @@ def forward( return torch.cat([self.core_model(x) for _ in range(self.num_estimators)], dim=0) +class _RegMCDropout(_MCDropout): + def __init__( + self, + model: nn.Module, + num_estimators: int, + last_layer: bool, + on_batch: bool, + probabilistic: bool, + ): + super().__init__( + model=model, num_estimators=num_estimators, last_layer=last_layer, on_batch=on_batch + ) + self.probabilistic = probabilistic + + def forward( + self, + x: Tensor, + ) -> Tensor: + """Forward pass of the model. + + During training, the forward pass is the same as of the core model. + During evaluation, the forward pass is repeated `num_estimators` times + either on the batch size or in a for loop depending on + :attr:`last_layer`. + + Args: + x (Tensor): input tensor of shape (B, ...) + + Returns: + Tensor: output tensor of shape (:attr:`num_estimators` * B, ...) + """ + if self.training: + return self.core_model(x) + + if self.on_batch: + x = repeat(x, "b ... -> (m b) ...", m=self.num_estimators) + return self.core_model(x) + + out = [self.core_model(x) for _ in range(self.num_estimators)] + if self.probabilistic: + key_set = {tuple(o.keys()) for o in out} + if len(key_set) != 1: + raise ValueError("The output of the models must have the same keys.") + return {k: torch.cat([o[k] for o in out], dim=0) for k in key_set.pop()} + return torch.cat(out, dim=0) + + def mc_dropout( model: nn.Module, num_estimators: int, last_layer: bool = False, on_batch: bool = True, -) -> MCDropout: + task: Literal[ + "classification", "regression", "segmentation", "pixel_regression" + ] = "classification", + probabilistic: bool | None = None, +) -> _MCDropout: """MC Dropout wrapper for a model. Args: @@ -103,16 +156,35 @@ def mc_dropout( num_estimators (int): number of estimators to use last_layer (bool, optional): whether to apply dropout to the last layer only. Defaults to ``False``. on_batch (bool): Increase the batch_size to perform MC-Dropout. Otherwise in a for loop to reduce memory footprint. Defaults to ``True``. last_layer (bool, optional): whether to apply dropout to the last layer only. Defaults to ``False``. + task (Literal[``"classification"``, ``"regression"``, ``"segmentation"``, ``"pixel_regression"``]): The model task. Defaults to ``"classification"``. + probabilistic (bool): Whether the regression model is probabilistic. Warning: Beware that :attr:`on_batch==True` can raise weird errors if not enough memory is available. """ - return MCDropout( - model=model, - num_estimators=num_estimators, - last_layer=last_layer, - on_batch=on_batch, - ) + match task: + case "classification" | "segmentation": + return _MCDropout( + model=model, + num_estimators=num_estimators, + last_layer=last_layer, + on_batch=on_batch, + ) + case "regression" | "pixel_regression": + if probabilistic is None: + raise ValueError("`probabilistic` must be set for regression tasks.") + return _RegMCDropout( + model=model, + num_estimators=num_estimators, + last_layer=last_layer, + on_batch=on_batch, + probabilistic=probabilistic, + ) + case _: + raise ValueError( + f"Task {task} not supported. Supported tasks are: " + "`classification`, `regression`, `segmentation`, `pixel_regression`." + ) def _dropout_checks(filtered_modules: list[nn.Module], num_estimators: int) -> None: diff --git a/torch_uncertainty/models/wrappers/stochastic.py b/torch_uncertainty/models/wrappers/stochastic.py index 64ab8371..d8c7f0df 100644 --- a/torch_uncertainty/models/wrappers/stochastic.py +++ b/torch_uncertainty/models/wrappers/stochastic.py @@ -5,13 +5,25 @@ class StochasticModel(nn.Module): - def __init__(self, model: nn.Module, num_samples: int) -> None: + def __init__( + self, + model: nn.Module, + num_samples: int, + probabilistic: bool = False, + ) -> None: super().__init__() self.core_model = model self.num_samples = num_samples + self.probabilistic = probabilistic def eval_forward(self, x: Tensor) -> Tensor: - return torch.cat([self.core_model.forward(x) for _ in range(self.num_samples)], dim=0) + out = [self.core_model(x) for _ in range(self.num_samples)] + if self.probabilistic: + key_set = {tuple(o.keys()) for o in out} + if len(key_set) != 1: + raise ValueError("The output of the models must have the same keys.") + return {k: torch.cat([o[k] for o in out], dim=0) for k in key_set.pop()} + return torch.cat(out, dim=0) def forward(self, x: Tensor) -> Tensor: if self.training: diff --git a/torch_uncertainty/routines/regression.py b/torch_uncertainty/routines/regression.py index fc5ac13c..620eaa62 100644 --- a/torch_uncertainty/routines/regression.py +++ b/torch_uncertainty/routines/regression.py @@ -1,15 +1,13 @@ from pathlib import Path -import torch from einops import rearrange from lightning.pytorch import LightningModule +from lightning.pytorch.loggers import Logger from lightning.pytorch.utilities.types import STEP_OUTPUT from torch import Tensor, nn from torch.distributions import ( - Categorical, Distribution, Independent, - MixtureSameFamily, ) from torch.optim import Optimizer from torchmetrics import MeanAbsoluteError, MeanSquaredError, MetricCollection @@ -17,6 +15,7 @@ from torch_uncertainty.losses import ELBOLoss from torch_uncertainty.metrics import ( DistributionNLL, + QuantileCalibrationError, ) from torch_uncertainty.models import ( EPOCH_UPDATE_MODEL, @@ -41,6 +40,8 @@ def __init__( optim_recipe: dict | Optimizer | None = None, eval_shift: bool = False, format_batch_fn: nn.Module | None = None, + log_plots: bool = False, + num_bins_cal_err: int = 15, save_in_csv: bool = False, csv_filename: str = "results.csv", ) -> None: @@ -56,6 +57,10 @@ def __init__( optim_recipe (dict or torch.optim.Optimizer, optional): The optimizer and optionally the scheduler to use. Defaults to ``None``. eval_shift (bool, optional): Indicates whether to evaluate the Distribution shift performance. Defaults to ``False``. format_batch_fn (torch.nn.Module, optional): The function to format the batch. Defaults to ``None``. + log_plots (bool, optional): Indicates whether to log figures in the logger. + Defaults to ``False``. + num_bins_cal_err (int, optional): Number of bins to compute calibration + error metrics. Defaults to ``15``. save_in_csv (bool, optional): Save the results in csv. Defaults to ``False``. csv_filename (str, optional): Name of the csv file. Defaults to ``"results.csv"``. Note that this is only used if :attr:`save_in_csv` is ``True``. @@ -87,14 +92,20 @@ def __init__( self.output_dim = output_dim self.loss = loss self.is_ensemble = is_ensemble + self.log_plots = log_plots self.save_in_csv = save_in_csv self.csv_filename = csv_filename self.needs_epoch_update = isinstance(model, EPOCH_UPDATE_MODEL) self.needs_step_update = isinstance(model, STEP_UPDATE_MODEL) + self.num_bins_cal_err = num_bins_cal_err if format_batch_fn is None: format_batch_fn = nn.Identity() + self.is_elbo = isinstance(self.loss, ELBOLoss) + if self.is_elbo: + self.loss.set_model(self.model) + self.optim_recipe = optim_recipe self.format_batch_fn = format_batch_fn self.one_dim_regression = output_dim == 1 @@ -108,14 +119,21 @@ def _init_metrics(self) -> None: "reg/MSE": MeanSquaredError(squared=True), "reg/RMSE": MeanSquaredError(squared=False), }, - compute_groups=True, + compute_groups=[["reg/MAE"], ["reg/MSE", "reg/RMSE"]], ) self.val_metrics = reg_metrics.clone(prefix="val/") self.test_metrics = reg_metrics.clone(prefix="test/") if self.probabilistic: - reg_prob_metrics = MetricCollection({"reg/NLL": DistributionNLL(reduction="mean")}) + reg_prob_metrics = MetricCollection( + { + "reg/NLL": DistributionNLL(reduction="mean"), + "cal/QCE": QuantileCalibrationError( + num_bins=self.num_bins_cal_err, + ), + } + ) self.val_prob_metrics = reg_prob_metrics.clone(prefix="val/") self.test_prob_metrics = reg_prob_metrics.clone(prefix="test/") @@ -226,14 +244,10 @@ def evaluation_forward(self, inputs: Tensor) -> tuple[Tensor, Distribution | Non if self.probabilistic: dist_params = { - k: rearrange(v, "(m b) c -> b m c", b=batch_size) for k, v in preds.items() + k: rearrange(v, "(m b) c -> b m c", b=batch_size).mean(1) for k, v in preds.items() } - # Adding the Independent wrapper to the distribution to create a MixtureSameFamily. - # As required by the torch.distributions API, the last dimension is the event dimension. - comp = Independent(get_dist_class(self.dist_family)(**dist_params), 1) - mix = Categorical(torch.ones(comp.batch_shape, device=self.device)) - dist = MixtureSameFamily(mix, comp) - preds = get_dist_estimate(comp, self.dist_estimate).mean(1) + dist = Independent(get_dist_class(self.dist_family)(**dist_params), 1) + preds = get_dist_estimate(dist, self.dist_estimate) return preds, dist preds = rearrange(preds, "(m b) c -> b m c", b=batch_size) @@ -310,6 +324,12 @@ def on_test_epoch_end(self) -> None: if self.probabilistic: result_dict |= self.test_prob_metrics.compute() + if isinstance(self.logger, Logger) and self.log_plots: + self.logger.experiment.add_figure( + "Calibration/Reliability diagram", + self.test_prob_metrics["cal/QCE"].plot()[0], + ) + self.log_dict(result_dict, sync_dist=True) self.test_metrics.reset() diff --git a/torch_uncertainty/utils/distributions.py b/torch_uncertainty/utils/distributions.py index 860b96d5..4e561d56 100644 --- a/torch_uncertainty/utils/distributions.py +++ b/torch_uncertainty/utils/distributions.py @@ -1,3 +1,4 @@ +from importlib import util from numbers import Number import torch @@ -12,6 +13,13 @@ ) from torch.distributions.utils import broadcast_all +if util.find_spec("scipy"): + from scipy.special import stdtr, stdtrit + + scipy_installed = True +else: # coverage: ignore + scipy_installed = False + def get_dist_class(dist_family: str) -> type[Distribution]: """Get the distribution class from a string. @@ -31,7 +39,7 @@ def get_dist_class(dist_family: str) -> type[Distribution]: if dist_family == "cauchy": return Cauchy if dist_family == "student": - return StudentT + return TUStudentT raise NotImplementedError( f"{dist_family} distribution is not supported. Raise an issue if needed." ) @@ -56,6 +64,37 @@ def get_dist_estimate(dist: Distribution, dist_estimate: str) -> Tensor: ) +class TUStudentT(StudentT): + def cdf(self, value: Tensor) -> Tensor: + if not scipy_installed: # coverage: ignore + raise ImportError( + "Please install torch_uncertainty with the distribution option:" + """pip install -U "torch_uncertainty[distribution]".""" + ) + if self._validate_args: + self._validate_sample(value) + + x = ((value - self.loc) / self.scale).detach().cpu().numpy() + df = self.df.detach().cpu().numpy() + + return torch.tensor(stdtr(df, x), device=self.loc.device) + + def icdf(self, value: Tensor) -> Tensor: + if not scipy_installed: # coverage: ignore + raise ImportError( + "Please install torch_uncertainty with the distribution option:" + """pip install -U "torch_uncertainty[distribution]".""" + ) + + if self._validate_args: + self._validate_sample(value) + + p = value.detach().cpu().numpy() + df = self.df.detach().cpu().numpy() + + return torch.tensor(stdtrit(df, p), device=self.loc.device) * self.scale + self.loc + + class NormalInverseGamma(Distribution): arg_constraints = { "loc": constraints.real,