@@ -697,6 +697,9 @@ class Int8DynamicActivationIntxWeightConfig(AOBaseConfig):
697
697
Weights are quantized with scales and optionally zeros (controlled by weight_zero_point_domain) in a groupwise or
698
698
channelwise manner using the number of bits specified by weight_dtype.
699
699
700
+ This layout is identical to Int8DynamicActivationInt4WeightConfig when weight_dtype = torch.int4 and other args
701
+ are the same. However, this layout is more general and supports other weight dtypes.
702
+
700
703
args:
701
704
weight_dtype: The dtype to use for weight quantization. Must be torch.intx, where 1 <= x <= 8.
702
705
torch.intx with x < 8 requires TORCH_VERSION_AT_LEAST_2_6
@@ -796,6 +799,9 @@ def _int8_dynamic_activation_intx_weight_transform(
796
799
797
800
# We quantize with QDQLayout, and then construct the packed weight tensor later
798
801
has_weight_zeros = weight_zero_point_domain == ZeroPointDomain .INT
802
+ preserve_zero = (weight_mapping_type == MappingType .SYMMETRIC ) or (
803
+ weight_zero_point_domain == ZeroPointDomain .NONE
804
+ )
799
805
weight = to_affine_quantized_intx (
800
806
input_float = weight ,
801
807
mapping_type = weight_mapping_type ,
@@ -806,8 +812,7 @@ def _int8_dynamic_activation_intx_weight_transform(
806
812
eps = torch .finfo (torch .float32 ).eps ,
807
813
scale_dtype = weight_scale_dtype ,
808
814
zero_point_dtype = torch .int8 if has_weight_zeros else None ,
809
- preserve_zero = has_weight_zeros
810
- or (weight_mapping_type == MappingType .SYMMETRIC ),
815
+ preserve_zero = preserve_zero ,
811
816
zero_point_domain = weight_zero_point_domain ,
812
817
_layout = QDQLayout (),
813
818
)
0 commit comments