Fix docstrings for quantization API docs (#2471)

jainapurva · web-flow · commit ee38153ee06a · 2025-07-09T14:52:44.000-07:00
diff --git a/torchao/quantization/linear_activation_quantized_tensor.py b/torchao/quantization/linear_activation_quantized_tensor.py
@@ -288,7 +288,7 @@ def _(func, types, args, kwargs):
     )
 
 
-to_linear_activation_quantized = LinearActivationQuantizedTensor.from_float
+to_linear_activation_quantized = LinearActivationQuantizedTensor.from_float  # Converts a float tensor to LinearActivationQuantizedTensor for dynamic activation quantization
 
 if TORCH_VERSION_AT_LEAST_2_5:
     # Allow a model with LinearActivationQuantizedTensor weights to be loaded with `weights_only=True`
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -1214,6 +1214,12 @@ def _int4_weight_only_transform(
 class Int8WeightOnlyConfig(AOBaseConfig):
     """
     Configuration for applying int8 weight-only symmetric per-channel quantization to linear layers.
+
+    Args:
+        group_size: Optional[int] = None - Controls the granularity of quantization. If None, applies per-channel quantization.
+            Otherwise, applies per-group quantization with the specified group size.
+        set_inductor_config: bool = True - If True, adjusts `torchinductor` settings to recommended values
+            for better performance with this quantization scheme.
     """
 
     group_size: Optional[int] = None
@@ -1357,7 +1363,17 @@ def _float8_cutlass_quant_sparse(
 class Int8DynamicActivationInt8WeightConfig(AOBaseConfig):
     """
     Configuration for applying int8 dynamic symmetric per-token activation and int8 per-channel weight
-    quantization to linear layers
+    quantization to linear layers.
+
+    Args:
+        layout: Optional[Layout] = PlainLayout() - Tensor layout for the quantized weights. Controls how the
+            quantized data is stored and accessed.
+        act_mapping_type: Optional[MappingType] = MappingType.SYMMETRIC - Mapping type for activation quantization.
+            SYMMETRIC uses symmetric quantization around zero.
+        weight_only_decode: bool = False - If True, only quantizes weights during forward pass and keeps activations
+            in original precision during decode operations.
+        set_inductor_config: bool = True - If True, adjusts `torchinductor` settings to recommended values
+            for better performance with this quantization scheme.
     """
 
     layout: Optional[Layout] = PlainLayout()

Original file line number	Diff line number	Diff line change
`@@ -288,7 +288,7 @@ def _(func, types, args, kwargs):`
`288`	`288`	`)`
`289`	`289`
`290`	`290`
`291`		`-to_linear_activation_quantized = LinearActivationQuantizedTensor.from_float`
	`291`	`+to_linear_activation_quantized = LinearActivationQuantizedTensor.from_float # Converts a float tensor to LinearActivationQuantizedTensor for dynamic activation quantization`
`292`	`292`
`293`	`293`	`if TORCH_VERSION_AT_LEAST_2_5:`
`294`	`294`	# Allow a model with LinearActivationQuantizedTensor weights to be loaded with `weights_only=True`