Updated LIG Parameters/Generalizability

eibarolle · web-flow · commit 204ba31d74f6 · 2025-03-06T02:17:05.000-08:00
diff --git a/botorch_community/acquisition/latent_information_gain.py b/botorch_community/acquisition/latent_information_gain.py
@@ -18,80 +18,93 @@
 """
 
 from __future__ import annotations
-
-import warnings
-from typing import Optional
-
+from typing import Type, Any
 import torch
 from botorch.acquisition import AcquisitionFunction
 from botorch_community.models.np_regression import NeuralProcessModel
 from torch import Tensor
-
-import torch
-#reference: https://arxiv.org/abs/2106.02770 
+# reference: https://arxiv.org/abs/2106.02770
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+
 class LatentInformationGain(AcquisitionFunction):
     def __init__(
-        self, 
-        context_x: torch.Tensor, 
-        context_y: torch.Tensor,
-        model: NeuralProcessModel, 
+        self,
+        model: Type[Any] = NeuralProcessModel,
         num_samples: int = 10,
         min_std: float = 0.01,
-        scaler: float = 0.5
+        scaler: float = 0.5,
     ) -> None:
         """
-        Latent Information Gain (LIG) Acquisition Function, designed for the
-        NeuralProcessModel. This is a subclass of AcquisitionFunction.
+        Latent Information Gain (LIG) Acquisition Function.
+        Uses the model's built-in posterior function to generalize KL computation.
 
         Args:
-            model: Trained NeuralProcessModel.
-            context_x: Context input points, as a Tensor.
-            context_y: Context target points, as a Tensor.
+            model: The model class to be used, defaults to NeuralProcessModel.
             num_samples (int): Number of samples for calculation, defaults to 10.
-            min_std: Float representing the minimum possible standardized std, defaults to 0.1.
-            scaler: Float scaling the std, defaults to 0.9.
+            min_std: Float representing the minimum possible standardized std,
+                defaults to 0.01.
+            scaler: Float scaling the std, defaults to 0.5.
         """
-        super().__init__(model=model)
-        self.model = model.to(device)
+        super().__init__()
+        self.model = model
         self.num_samples = num_samples
         self.min_std = min_std
         self.scaler = scaler
-        self.context_x = context_x.to(device)
-        self.context_y = context_y.to(device)
 
     def forward(self, candidate_x: Tensor) -> Tensor:
         """
-        Conduct the Latent Information Gain acquisition function for the inputs.
+        Conduct the Latent Information Gain acquisition function using the model's
+            posterior.
 
         Args:
-            candidate_x: Candidate input points, as a Tensor. Ideally in the shape (N, q, D), and assumes N = 1 if the given dimensions are 2D.
+            candidate_x: Candidate input points, as a Tensor. Ideally in the shape
+                (N, q, D).
 
         Returns:
             torch.Tensor: The LIG scores of computed KLDs, in the shape (N, q).
         """
         candidate_x = candidate_x.to(device)
         if candidate_x.dim() == 2:
-            candidate_x = candidate_x.unsqueeze(0) 
+            candidate_x = candidate_x.unsqueeze(0)  # Ensure (N, q, D) format
         N, q, D = candidate_x.shape
-        # Encoding and Scaling the context data
-        z_mu_context, z_logvar_context = self.model.data_to_z_params(self.context_x, self.context_y)
+
         kl = torch.zeros(N, q, device=device)
-        for _ in range(self.num_samples):
-            # Taking Samples/Predictions
-            samples = self.model.sample_z(z_mu_context, z_logvar_context)
-            y_pred = self.model.decoder(candidate_x.view(-1, D), samples)
-            # Combining the data
-            combined_x = torch.cat([self.context_x, candidate_x.view(-1, D)], dim=0).to(device)
-            combined_y = torch.cat([self.context_y, y_pred], dim=0).to(device)
-            # Computing posterior variables
-            z_mu_posterior, z_logvar_posterior = self.model.data_to_z_params(combined_x, combined_y)
-            std_prior = self.min_std + self.scaler * torch.sigmoid(z_logvar_context) 
-            std_posterior = self.min_std + self.scaler * torch.sigmoid(z_logvar_posterior)
-            p = torch.distributions.Normal(z_mu_posterior, std_posterior)
-            q = torch.distributions.Normal(z_mu_context, std_prior)
-            kl_divergence = torch.distributions.kl_divergence(p, q).sum()
-            kl += kl_divergence
+
+        if self.model is NeuralProcessModel:
+            z_mu_context, z_logvar_context = self.model.data_to_z_params(
+                self.context_x, self.context_y
+            )
+            for _ in range(self.num_samples):
+                # Taking Samples/Predictions
+                samples = self.model.sample_z(z_mu_context, z_logvar_context)
+                y_pred = self.model.decoder(candidate_x.view(-1, D), samples)
+                # Combining the data
+                combined_x = torch.cat(
+                    [self.context_x, candidate_x.view(-1, D)], dim=0
+                ).to(device)
+                combined_y = torch.cat([self.context_y, y_pred], dim=0).to(device)
+                # Computing posterior variables
+                z_mu_posterior, z_logvar_posterior = self.model.data_to_z_params(
+                    combined_x, combined_y
+                )
+                std_prior = self.min_std + self.scaler * torch.sigmoid(z_logvar_context)
+                std_posterior = self.min_std + self.scaler * torch.sigmoid(
+                    z_logvar_posterior
+                )
+                p = torch.distributions.Normal(z_mu_posterior, std_posterior)
+                q = torch.distributions.Normal(z_mu_context, std_prior)
+                kl_divergence = torch.distributions.kl_divergence(p, q).sum(dim=-1)
+                kl += kl_divergence
+        else:
+            for _ in range(self.num_samples):
+                posterior_prior = self.model.posterior(self.model.train_X)
+                posterior_candidate = self.model.posterior(candidate_x.view(-1, D))
+
+                kl_divergence = torch.distributions.kl_divergence(
+                    posterior_candidate.mvn, posterior_prior.mvn
+                ).sum(dim=-1)
+                kl += kl_divergence
+
         return kl / self.num_samples