Update QAT README and API docstrings (#2465)

andrewor14 · web-flow · commit 19237e306cee · 2025-07-02T09:43:22.000-04:00
Previously they pointed to the 0.7.0 code. Now they point to the
corresponding API page on our docs. Also move the docstring from
the private function to the public `IntXQuantizationAwareTrainingConfig`
so it shows up on the doc page.
diff --git a/torchao/quantization/qat/README.md b/torchao/quantization/qat/README.md
@@ -71,9 +71,9 @@ def train_loop(m: torch.nn.Module):
 
 The recommended way to run QAT in torchao is through the `quantize_` API:
 1. **Prepare:** specify how weights and/or activations are to be quantized through
-[`FakeQuantizeConfig`](https://github.com/pytorch/ao/blob/v0.7.0/torchao/quantization/qat/api.py#L29) and passing these to [`IntXQuantizationAwareTrainingConfig`](https://github.com/pytorch/ao/blob/cedadc741954f47a9e9efac2aa584701f125bc73/torchao/quantization/qat/api.py#L242)
+[`FakeQuantizeConfig`](https://docs.pytorch.org/ao/main/generated/torchao.quantization.qat.FakeQuantizeConfig.html#torchao.quantization.qat.FakeQuantizeConfig) and passing these to [`IntXQuantizationAwareTrainingConfig`](https://docs.pytorch.org/ao/main/generated/torchao.quantization.qat.IntXQuantizationAwareTrainingConfig.html#torchao.quantization.qat.IntXQuantizationAwareTrainingConfig)
 2. **Convert:** quantize the model using the standard post-training quantization (PTQ)
-functions such as [`Int8DynamicActivationInt4WeightConfig`](https://github.com/pytorch/ao/blob/v0.7.0/torchao/quantization/quant_api.py#L606)
+functions such as [`Int8DynamicActivationInt4WeightConfig`](https://docs.pytorch.org/ao/main/generated/torchao.quantization.Int8DynamicActivationInt4WeightConfig.html#torchao.quantization.Int8DynamicActivationInt4WeightConfig)
 
 For example:
 
@@ -137,9 +137,9 @@ quantize_(
 
 Alternatively, torchao provides a few hardcoded quantization settings through
 the following Quantizers:
-- [Int8DynActInt4QATQuantizer](https://github.com/pytorch/ao/blob/v0.7.0/torchao/quantization/qat/linear.py#L126) (linear), targeting int8 per-token dynamic asymmetric activation + int4 per-group symmetric weight
-- [Int4WeightOnlyQATQuantizer](https://github.com/pytorch/ao/blob/v0.7.0/torchao/quantization/qat/linear.py#L308) (linear), targeting int4 per-group asymmetric weight using the efficient [int4 tinygemm kernel](https://github.com/pytorch/pytorch/blob/a672f6c84e318bbf455f13dfdd3fd7c68a388bf5/aten/src/ATen/native/cuda/int4mm.cu#L1097) after training)
-- [Int4WeightOnlyEmbeddingQATQuantizer](https://github.com/pytorch/ao/blob/v0.7.0/torchao/quantization/qat/embedding.py#L94) (embedding), targeting int4 per-group symmetric weight
+- [Int8DynActInt4QATQuantizer](https://docs.pytorch.org/ao/main/generated/torchao.quantization.qat.Int8DynActInt4WeightQATQuantizer.html#torchao.quantization.qat.Int8DynActInt4WeightQATQuantizer) (linear), targeting int8 per-token dynamic asymmetric activation + int4 per-group symmetric weight
+- [Int4WeightOnlyQATQuantizer](https://docs.pytorch.org/ao/main/generated/torchao.quantization.qat.Int4WeightOnlyQATQuantizer.html#torchao.quantization.qat.Int4WeightOnlyQATQuantizer) (linear), targeting int4 per-group asymmetric weight using the efficient [int4 tinygemm kernel](https://github.com/pytorch/pytorch/blob/a672f6c84e318bbf455f13dfdd3fd7c68a388bf5/aten/src/ATen/native/cuda/int4mm.cu#L1097) after training)
+- [Int4WeightOnlyEmbeddingQATQuantizer](https://docs.pytorch.org/ao/main/generated/torchao.quantization.qat.Int4WeightOnlyEmbeddingQATQuantizer.html#torchao.quantization.qat.Int4WeightOnlyEmbeddingQATQuantizer) (embedding), targeting int4 per-group symmetric weight
 
 For example:
 ```python
@@ -162,7 +162,7 @@ model = qat_quantizer.convert(model)
 ```
 
 To use multiple Quantizers in the same model for different layer types,
-users can also leverage the [ComposableQATQuantizer](https://github.com/pytorch/ao/blob/v0.7.0/torchao/quantization/qat/api.py#L242)
+users can also leverage the [ComposableQATQuantizer](https://docs.pytorch.org/ao/main/generated/torchao.quantization.qat.ComposableQATQuantizer.html#torchao.quantization.qat.ComposableQATQuantizer)
 as follows:
 
 ```python
diff --git a/torchao/quantization/qat/api.py b/torchao/quantization/qat/api.py
@@ -255,24 +255,8 @@ def __setattr__(self, name: str, value: Any):
 
 @dataclass
 class IntXQuantizationAwareTrainingConfig(AOBaseConfig):
-    activation_config: Optional[FakeQuantizeConfig] = None
-    weight_config: Optional[FakeQuantizeConfig] = None
-
-
-# for BC
-intx_quantization_aware_training = IntXQuantizationAwareTrainingConfig
-
-
-@register_quantize_module_handler(IntXQuantizationAwareTrainingConfig)
-def _intx_quantization_aware_training_transform(
-    module: torch.nn.Module,
-    config: IntXQuantizationAwareTrainingConfig,
-) -> torch.nn.Module:
     """
-    THIS IS NOT A PUBLIC API - any usage of this outside of torchao
-    can break at any time.
-
-    Apply fake quantization to a `torch.nn.Module`.
+    Config for applying fake quantization to a `torch.nn.Module`.
     to be used with :func:`~torchao.quantization.quant_api.quantize_`.
 
     Example usage::
@@ -290,11 +274,25 @@ def _intx_quantization_aware_training_transform(
             IntXQuantizationAwareTrainingConfig(activation_config, weight_config),
         )
 
-    Note: If the returned function is applied on a module that is not
+    Note: If the config is applied on a module that is not
     `torch.nn.Linear` or `torch.nn.Embedding`, or it is applied on
     `torch.nn.Embedding` with an activation config, then we will raise
     ValueError as these are not supported.
     """
+
+    activation_config: Optional[FakeQuantizeConfig] = None
+    weight_config: Optional[FakeQuantizeConfig] = None
+
+
+# for BC
+intx_quantization_aware_training = IntXQuantizationAwareTrainingConfig
+
+
+@register_quantize_module_handler(IntXQuantizationAwareTrainingConfig)
+def _intx_quantization_aware_training_transform(
+    module: torch.nn.Module,
+    config: IntXQuantizationAwareTrainingConfig,
+) -> torch.nn.Module:
     from .embedding import FakeQuantizedEmbedding
     from .linear import FakeQuantizedLinear
 
@@ -320,7 +318,7 @@ def _intx_quantization_aware_training_transform(
 
 class FromIntXQuantizationAwareTrainingConfig(AOBaseConfig):
     """
-    Object that knows how to convert a model with fake quantized modules,
+    Config for converting a model with fake quantized modules,
     such as :func:`~torchao.quantization.qat.linear.FakeQuantizedLinear`
     and :func:`~torchao.quantization.qat.linear.FakeQuantizedEmbedding`,
     back to model with the original, corresponding modules without