Skip to content

Commit ee38153

Browse files
authored
Fix docstrings for quantization API docs (#2471)
1 parent 19c009d commit ee38153

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

torchao/quantization/linear_activation_quantized_tensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def _(func, types, args, kwargs):
288288
)
289289

290290

291-
to_linear_activation_quantized = LinearActivationQuantizedTensor.from_float
291+
to_linear_activation_quantized = LinearActivationQuantizedTensor.from_float # Converts a float tensor to LinearActivationQuantizedTensor for dynamic activation quantization
292292

293293
if TORCH_VERSION_AT_LEAST_2_5:
294294
# Allow a model with LinearActivationQuantizedTensor weights to be loaded with `weights_only=True`

torchao/quantization/quant_api.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,12 @@ def _int4_weight_only_transform(
12141214
class Int8WeightOnlyConfig(AOBaseConfig):
12151215
"""
12161216
Configuration for applying int8 weight-only symmetric per-channel quantization to linear layers.
1217+
1218+
Args:
1219+
group_size: Optional[int] = None - Controls the granularity of quantization. If None, applies per-channel quantization.
1220+
Otherwise, applies per-group quantization with the specified group size.
1221+
set_inductor_config: bool = True - If True, adjusts `torchinductor` settings to recommended values
1222+
for better performance with this quantization scheme.
12171223
"""
12181224

12191225
group_size: Optional[int] = None
@@ -1357,7 +1363,17 @@ def _float8_cutlass_quant_sparse(
13571363
class Int8DynamicActivationInt8WeightConfig(AOBaseConfig):
13581364
"""
13591365
Configuration for applying int8 dynamic symmetric per-token activation and int8 per-channel weight
1360-
quantization to linear layers
1366+
quantization to linear layers.
1367+
1368+
Args:
1369+
layout: Optional[Layout] = PlainLayout() - Tensor layout for the quantized weights. Controls how the
1370+
quantized data is stored and accessed.
1371+
act_mapping_type: Optional[MappingType] = MappingType.SYMMETRIC - Mapping type for activation quantization.
1372+
SYMMETRIC uses symmetric quantization around zero.
1373+
weight_only_decode: bool = False - If True, only quantizes weights during forward pass and keeps activations
1374+
in original precision during decode operations.
1375+
set_inductor_config: bool = True - If True, adjusts `torchinductor` settings to recommended values
1376+
for better performance with this quantization scheme.
13611377
"""
13621378

13631379
layout: Optional[Layout] = PlainLayout()

0 commit comments

Comments
 (0)