switch to F.mse_loss()

brian-dellabetta · brian-dellabetta · commit 0213b9c66f63 · 2025-06-17T18:54:34.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -2,6 +2,7 @@
 from typing import Dict, List, Optional, Tuple, Union
 
 import torch
+import torch.nn.functional as F
 from compressed_tensors.quantization import (
     disable_quantization,
     find_name_or_class_matches,
@@ -593,9 +594,9 @@ def _compute_best_scale(
         x_mean = x_mean.view(-1).to(device)
         w_mean = w_mean.view(-1).to(device)
 
-        for ratio in range(n_grid):
+        for grid_idx in range(n_grid):
             # create new scales
-            ratio = ratio / n_grid
+            ratio = grid_idx / n_grid
 
             # NOTE: s^-1 * x is fused here, according to paper
             if self.duo_scaling:
@@ -632,7 +633,7 @@ def _compute_best_scale(
                 int_w_output = self._get_flattened_output(parent_module)
 
             # compute mean squared error (L2 norm)
-            loss = _compute_loss(fp16_output, int_w_output)
+            loss = F.mse_loss(int_w_output, fp16_output).item()
 
             history.append(loss)
             if loss < best_error:
@@ -642,8 +643,9 @@ def _compute_best_scale(
             parent_module.load_state_dict(org_sd)
 
         if best_ratio == -1:
-            logger.debug(history)
-            raise Exception
+            raise Exception(
+                f"Loss during best scale computation never less than inf: {history}"
+            )
 
         assert (
             torch.isnan(best_scales).sum() == 0
@@ -660,18 +662,6 @@ def _assert_all_activations_consumed(self):
             raise RuntimeError("Some cached activations were not used")
 
 
-@torch.no_grad()
-@torch.compile()
-def _compute_loss(
-    fp16_output: torch.Tensor,
-    int_w_output: torch.Tensor,
-) -> torch.Tensor:
-    """
-    Compute MSE loss over the flattened output of all batches
-    """
-    return (fp16_output - int_w_output).view(-1).float().pow(2).mean()
-
-
 @torch.compile()
 def _pseudo_quantize_tensor(
     w: torch.Tensor, symmetric: bool = False, bit_width: int = 8, group_size: int = -1