Replace the "quantization_annotation" string with a constant variable (#2525)

cccclai · web-flow · commit 0b62f3f850ff · 2025-07-11T09:54:25.000-07:00
Replace the "quantization_annotation" string with a constant variable (#2525) Summary: Create a const variable `Q_ANNOTATION_KEY` to avoid manually typing `"quantization_annotation"` which can be error prone Differential Revision: D78133734
diff --git a/torchao/quantization/pt2e/prepare.py b/torchao/quantization/pt2e/prepare.py
@@ -13,10 +13,7 @@
 from torch._subclasses import FakeTensor
 from torch.ao.quantization import QConfigMapping
 from torch.ao.quantization.fx.custom_config import PrepareCustomConfig
-from torch.ao.quantization.fx.prepare import (
-    _insert_obs_or_fq,
-    _save_state,
-)
+from torch.ao.quantization.fx.prepare import _insert_obs_or_fq, _save_state
 from torch.ao.quantization.qconfig import QConfigAny
 from torch.fx import Graph, GraphModule, Node
 from torch.fx.node import Argument
@@ -26,9 +23,7 @@
     DerivedObserverOrFakeQuantize,
     ObserverOrFakeQuantize,
 )
-from torchao.quantization.pt2e.fake_quantize import (
-    FixedQParamsFakeQuantize,
-)
+from torchao.quantization.pt2e.fake_quantize import FixedQParamsFakeQuantize
 from torchao.quantization.pt2e.observer import (
     FixedQParamsObserver,
     PartialWrapper,
@@ -42,6 +37,7 @@
     QuantizationSpecBase,
     SharedQuantizationSpec,
 )
+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_6
 
 # TODO: make pt2e folder private?
@@ -208,8 +204,8 @@ def _get_edge_or_node_to_qspec(
     """Get a map from EdgeOrNode to quantization spec based on annotations on the nodes"""
     edge_or_node_to_qspec: dict[EdgeOrNode, QuantizationSpecBase] = {}
     for n in model.graph.nodes:
-        if hasattr(n, "meta") and "quantization_annotation" in n.meta:
-            qa = n.meta["quantization_annotation"]
+        if hasattr(n, "meta") and Q_ANNOTATION_KEY in n.meta:
+            qa = n.meta[Q_ANNOTATION_KEY]
             for input_to_n, qspec in qa.input_qspec_map.items():
                 input_edge = (input_to_n, n)
                 edge_or_node_to_qspec[input_edge] = qspec
@@ -324,7 +320,7 @@ def _get_edge_or_node_to_group_id(
 
             assert isinstance(input_edge, tuple)
             arg, n = input_edge
-            if n.meta["quantization_annotation"].allow_implicit_sharing:
+            if n.meta[Q_ANNOTATION_KEY].allow_implicit_sharing:
                 # NOTE: the order is important here, we first share with other users and then share with previous
                 # output because the reverse order could cause circular dependency
                 # e.g node1 -> node2
@@ -571,9 +567,7 @@ def _maybe_insert_input_and_output_observers_for_node(
     is_qat: bool,
 ):
     this_node_quantization_annotation = (
-        node.meta["quantization_annotation"]
-        if "quantization_annotation" in node.meta
-        else None
+        node.meta[Q_ANNOTATION_KEY] if Q_ANNOTATION_KEY in node.meta else None
     )
     if this_node_quantization_annotation is None:
         return
diff --git a/torchao/quantization/pt2e/quantizer/composable_quantizer.py b/torchao/quantization/pt2e/quantizer/composable_quantizer.py
@@ -8,6 +8,8 @@
 
 from typing import TYPE_CHECKING
 
+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
+
 from .quantizer import QuantizationAnnotation, Quantizer
 
 if TYPE_CHECKING:
@@ -48,18 +50,17 @@ def _record_and_validate_annotations(
         self, gm: torch.fx.GraphModule, quantizer: Quantizer
     ) -> None:
         for n in gm.graph.nodes:
-            if "quantization_annotation" in n.meta:
+            if Q_ANNOTATION_KEY in n.meta:
                 # check if the annotation has been changed by
                 # comparing QuantizationAnnotation object id
                 if n in self._graph_annotations and (
-                    id(self._graph_annotations[n])
-                    != id(n.meta["quantization_annotation"])
+                    id(self._graph_annotations[n]) != id(n.meta[Q_ANNOTATION_KEY])
                 ):
                     raise RuntimeError(
                         f"Quantizer {quantizer.__class__.__name__} has changed annotations on node {n}"
                     )
                 else:
-                    self._graph_annotations[n] = n.meta["quantization_annotation"]
+                    self._graph_annotations[n] = n.meta[Q_ANNOTATION_KEY]
             else:
                 if n in self._graph_annotations:
                     raise RuntimeError(
diff --git a/torchao/quantization/pt2e/quantizer/duplicate_dq_pass.py b/torchao/quantization/pt2e/quantizer/duplicate_dq_pass.py
@@ -12,13 +12,10 @@
 from torch.fx.node import map_arg
 from torch.fx.passes.infra.pass_base import PassBase, PassResult
 
-from torchao.quantization.pt2e.utils import (
-    _filter_sym_size_users,
-)
+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
+from torchao.quantization.pt2e.utils import _filter_sym_size_users
 
-from .utils import (
-    is_valid_annotation,
-)
+from .utils import is_valid_annotation
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARNING)
@@ -41,7 +38,7 @@
 def _maybe_duplicate_dq(
     gm: torch.fx.GraphModule, dq_node: torch.fx.Node, user: torch.fx.Node
 ):
-    annotation = user.meta.get("quantization_annotation", None)
+    annotation = user.meta.get(Q_ANNOTATION_KEY, None)
     if not is_valid_annotation(annotation):
         return
     with gm.graph.inserting_after(dq_node):
diff --git a/torchao/quantization/pt2e/quantizer/embedding_quantizer.py b/torchao/quantization/pt2e/quantizer/embedding_quantizer.py
@@ -21,6 +21,7 @@
     QuantizationSpec,
     Quantizer,
 )
+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
 
 __all__ = [
     "get_embedding_operators_config",
@@ -87,7 +88,7 @@ def _annotate_embedding_ops(self, graph: torch.fx.Graph) -> None:
                     raise ValueError(
                         "Embedding config must have a valid weight quantization spec."
                     )
-                node.meta["quantization_annotation"] = QuantizationAnnotation(
+                node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation(
                     input_qspec_map={
                         node.args[0]: embedding_config.config.weight,
                     }
diff --git a/torchao/quantization/pt2e/quantizer/port_metadata_pass.py b/torchao/quantization/pt2e/quantizer/port_metadata_pass.py
@@ -12,18 +12,13 @@
 from torch._export.error import InternalError
 from torch.fx.passes.infra.pass_base import PassBase, PassResult
 
-from torchao.quantization.pt2e.utils import (
-    _filter_sym_size_users,
-)
+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
+from torchao.quantization.pt2e.utils import _filter_sym_size_users
 from torchao.quantization.quant_primitives import quant_lib  # noqa: F401
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
 
-from .quantizer import (
-    QuantizationSpecBase,
-)
-from .utils import (
-    is_valid_annotation,
-)
+from .quantizer import QuantizationSpecBase
+from .utils import is_valid_annotation
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.ERROR)
@@ -68,7 +63,7 @@ def _add_metadata(to_node: torch.fx.Node, from_node: torch.fx.Node) -> None:
 
 
 def _has_quant_annotation(node: torch.fx.Node) -> bool:
-    return "quantization_annotation" in node.meta
+    return Q_ANNOTATION_KEY in node.meta
 
 
 def _find_choose_qparams_node(node: torch.fx.Node) -> Optional[torch.fx.Node]:
@@ -281,10 +276,10 @@ class PortNodeMetaForQDQ(PassBase):
 
     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         for node in graph_module.graph.nodes:
-            annotation = node.meta.get("quantization_annotation", None)
+            annotation = node.meta.get(Q_ANNOTATION_KEY, None)
             if is_valid_annotation(annotation):
-                input_qspec_map = node.meta["quantization_annotation"].input_qspec_map
-                output_qspec = node.meta["quantization_annotation"].output_qspec
+                input_qspec_map = node.meta[Q_ANNOTATION_KEY].input_qspec_map
+                output_qspec = node.meta[Q_ANNOTATION_KEY].output_qspec
                 for input_node, qspec in input_qspec_map.items():
                     _port_metadata_for_input_quant_nodes(input_node, node, qspec)
                 _port_metadata_for_output_quant_nodes(node, output_qspec)
diff --git a/torchao/quantization/pt2e/quantizer/quantizer.py b/torchao/quantization/pt2e/quantizer/quantizer.py
@@ -30,6 +30,9 @@
 ]
 
 
+Q_ANNOTATION_KEY = "quantization_annotation"
+
+
 class QuantizationSpecBase(ABC):  # noqa: B024
     """Base class for different types of quantization specs that allows users to
     specify how to quantize a Tensor (input/output of a Node) in the model
diff --git a/torchao/quantization/pt2e/quantizer/utils.py b/torchao/quantization/pt2e/quantizer/utils.py
@@ -13,6 +13,8 @@
 import torch
 from torch.fx import Node
 
+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
+
 from .quantizer import QuantizationAnnotation, QuantizationSpec
 
 
@@ -103,21 +105,17 @@ def get_bias_qspec(quantization_config: Optional[QuantizationConfig]):
 
 
 def annotate_input_qspec_map(node: Node, input_node: Node, qspec):
-    quantization_annotation = node.meta.get(
-        "quantization_annotation", QuantizationAnnotation()
-    )
+    quantization_annotation = node.meta.get(Q_ANNOTATION_KEY, QuantizationAnnotation())
     if quantization_annotation.input_qspec_map is None:
         quantization_annotation.input_qspec_map = {}
     quantization_annotation.input_qspec_map[input_node] = qspec
-    node.meta["quantization_annotation"] = quantization_annotation
+    node.meta[Q_ANNOTATION_KEY] = quantization_annotation
 
 
 def annotate_output_qspec(node: Node, qspec):
-    quantization_annotation = node.meta.get(
-        "quantization_annotation", QuantizationAnnotation()
-    )
+    quantization_annotation = node.meta.get(Q_ANNOTATION_KEY, QuantizationAnnotation())
     quantization_annotation.output_qspec = qspec
-    node.meta["quantization_annotation"] = quantization_annotation
+    node.meta[Q_ANNOTATION_KEY] = quantization_annotation
 
 
 def get_module_name_filter(module_name: str):

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`QuantizationSpec,`
`22`	`22`	`Quantizer,`
`23`	`23`	`)`
	`24`	`+from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY`
`24`	`25`
`25`	`26`	`__all__ = [`
`26`	`27`	`"get_embedding_operators_config",`
`@@ -87,7 +88,7 @@ def _annotate_embedding_ops(self, graph: torch.fx.Graph) -> None:`
`87`	`88`	`raise ValueError(`
`88`	`89`	`"Embedding config must have a valid weight quantization spec."`
`89`	`90`	`)`
`90`		`- node.meta["quantization_annotation"] = QuantizationAnnotation(`
	`91`	`+ node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation(`
`91`	`92`	`input_qspec_map={`
`92`	`93`	`node.args[0]: embedding_config.config.weight,`
`93`	`94`	`}`
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,9 @@`
`30`	`30`	`]`
`31`	`31`
`32`	`32`
	`33`	`+Q_ANNOTATION_KEY = "quantization_annotation"`
	`34`	`+`
	`35`	`+`
`33`	`36`	`class QuantizationSpecBase(ABC): # noqa: B024`
`34`	`37`	`"""Base class for different types of quantization specs that allows users to`
`35`	`38`	`specify how to quantize a Tensor (input/output of a Node) in the model`