pytorch · Xia-Weiwen · May 6, 2025 · May 8, 2025 · May 16, 2025 · jerryzh168
diff --git a/torchao/quantization/pt2e/observer.py b/torchao/quantization/pt2e/observer.py
@@ -1793,7 +1793,7 @@ def get_block_size(
         "Please provide an instance of Granularity, not subclass of it"
     )
     if isinstance(granularity, PerTensor):
-        return input_shape
+        return (-1,) * len(input_shape)
     elif isinstance(granularity, PerAxis):
         block_size = list(input_shape)
         block_size[granularity.axis] = 1
@@ -1891,6 +1891,10 @@ def convert(self, model: torch.fx.GraphModule, observer_node: Node):
             assert self.original_dtype is not None, (
                 "Expecting original_dtype to be populated"
             )
+            # Since input shape & rank may change (e.g. Resnet18), here we need to update block_size for each input
+            self.block_size = get_block_size(
+                observer_node.args[0].meta["tensor_meta"].shape, self.granularity
+            )
             if hasattr(self, "is_dynamic") and self.is_dynamic:
                 choose_qparams_affine = model.graph.call_function(
                     torch.ops.torchao.choose_qparams_affine,