@@ -743,10 +743,10 @@ def _add_quantized_conv_matmul_add_ops(
743
743
weight_quantize_node : NodeProto ,
744
744
input_quantize_params : QuantizationParams ,
745
745
weight_quantize_params : QuantizationParams ,
746
- bias_initializer : onnx .TensorProto ,
747
- bias_add_name : str ,
748
746
target_output : str ,
749
747
transpose_weight : bool ,
748
+ bias_add_name : str ,
749
+ bias_initializer : Optional [onnx .TensorProto ] = None ,
750
750
output_quantize_node : Optional [NodeProto ] = None ,
751
751
output_dequantize_node : Optional [NodeProto ] = None ,
752
752
):
@@ -806,65 +806,62 @@ def _add_quantized_conv_matmul_add_ops(
806
806
)
807
807
model .graph .node .append (integer_op_node )
808
808
809
+ output_scale = input_quantize_params .scale * weight_quantize_params .scale
810
+ output_scale_name = "{}_output.scale" .format (node .name )
811
+ model .graph .initializer .append (
812
+ numpy_helper .from_array (numpy .asarray (output_scale ), name = output_scale_name )
813
+ )
814
+
815
+ last_output = integer_op_output
816
+
809
817
# Add bias + zero point correction
810
818
# quantize bias
811
- bias_initializer = numpy_helper .to_array (bias_initializer )
812
- bias_scale = input_quantize_params .scale * weight_quantize_params .scale
813
- bias_zero_point = 0
814
- quantized_bias = _quantize_array (
815
- bias_initializer , bias_scale , bias_zero_point , dtype = numpy .int32
816
- )
817
- if node .op_type == "Conv" and len (quantized_bias .shape ) == 1 :
818
- # reshape for bias add broadcasting
819
- quantized_bias = quantized_bias .reshape (1 , quantized_bias .shape [0 ], 1 , 1 )
819
+ if bias_initializer is not None :
820
+ bias_initializer = numpy_helper .to_array (bias_initializer )
820
821
821
- quantized_bias_name = "{}.bias_quantized" .format (bias_add_name )
822
- quantized_bias_initializer = numpy_helper .from_array (
823
- quantized_bias , name = quantized_bias_name
824
- )
825
- model .graph .initializer .append (quantized_bias_initializer )
826
- quantized_bias_scale_name = "{}.scale" .format (quantized_bias_name )
827
- model .graph .initializer .append (
828
- numpy_helper .from_array (
829
- numpy .asarray (bias_scale ), name = quantized_bias_scale_name
822
+ bias_zero_point = 0
823
+ quantized_bias = _quantize_array (
824
+ bias_initializer , output_scale , bias_zero_point , dtype = numpy .int32
830
825
)
831
- )
832
- quantized_bias_zero_point_name = "{}.zero_point" .format (quantized_bias_name )
833
- model .graph .initializer .append (
834
- numpy_helper .from_array (
835
- numpy .asarray (bias_zero_point , dtype = numpy .uint8 ),
836
- name = quantized_bias_zero_point_name ,
826
+ if node .op_type == "Conv" and len (quantized_bias .shape ) == 1 :
827
+ # reshape for bias add broadcasting
828
+ quantized_bias = quantized_bias .reshape (1 , quantized_bias .shape [0 ], 1 , 1 )
829
+
830
+ quantized_bias_name = "{}.bias_quantized" .format (bias_add_name )
831
+ quantized_bias_initializer = numpy_helper .from_array (
832
+ quantized_bias , name = quantized_bias_name
837
833
)
838
- )
834
+ model . graph . initializer . append ( quantized_bias_initializer )
839
835
840
- # get INT32 Add inputs and outputs
841
- quant_add_inputs = [
842
- integer_op_output , # MatMul/Conv integer outputs (INT32)
843
- quantized_bias_name , # Quantized bias (INT32)
844
- ]
836
+ # get INT32 Add inputs and outputs
837
+ quant_add_inputs = [
838
+ last_output , # MatMul/Conv integer outputs (INT32)
839
+ quantized_bias_name , # Quantized bias (INT32)
840
+ ]
845
841
846
- quant_add_name = "{}_bias_add_quant" .format (node .name )
847
- quant_add_output = (
848
- output_quantize_node .output [0 ]
849
- if output_quantize_node
850
- else f"{ quant_add_name } _output"
851
- )
842
+ quant_add_name = "{}_bias_add_quant" .format (node .name )
843
+ quant_add_output = (
844
+ output_quantize_node .output [0 ]
845
+ if output_quantize_node
846
+ else f"{ quant_add_name } _output"
847
+ )
852
848
853
- # create Add node and add it to graph
854
- qadd_node = onnx .helper .make_node (
855
- "Add" ,
856
- quant_add_inputs ,
857
- [quant_add_output ],
858
- quant_add_name ,
859
- )
860
- model .graph .node .append (qadd_node )
849
+ # create Add node and add it to graph
850
+ qadd_node = onnx .helper .make_node (
851
+ "Add" ,
852
+ quant_add_inputs ,
853
+ [quant_add_output ],
854
+ quant_add_name ,
855
+ )
856
+ model .graph .node .append (qadd_node )
857
+ last_output = quant_add_output
861
858
862
859
# create Cast node and add it to graph
863
- cast_node_name = "{}_cast" .format (quant_add_name )
864
- cast_node_output = "{}_cast " .format (quant_add_output )
860
+ cast_node_name = "{}_cast" .format (node . name )
861
+ cast_node_output = "{}_output " .format (cast_node_name )
865
862
cast_node = onnx .helper .make_node (
866
863
"Cast" ,
867
- [quant_add_output ],
864
+ [last_output ],
868
865
[cast_node_output ],
869
866
cast_node_name ,
870
867
to = getattr (onnx .TensorProto , "FLOAT" ), # get Float32 enum id
@@ -874,9 +871,9 @@ def _add_quantized_conv_matmul_add_ops(
874
871
# create Mul node for rescale
875
872
mul_node_inputs = [
876
873
cast_node_output , # a
877
- quantized_bias_scale_name , # b -> rescale factor
874
+ output_scale_name , # b -> rescale factor
878
875
]
879
- mul_node_name = "{}_rescale_mul" .format (quant_add_name )
876
+ mul_node_name = "{}_rescale_mul" .format (cast_node_name )
880
877
mul_node = onnx .helper .make_node (
881
878
"Mul" ,
882
879
mul_node_inputs ,
@@ -979,10 +976,10 @@ def _convert_quantizable_gemm_no_activations(model: ModelProto):
979
976
weight_quantize_node = weight_quantize_node ,
980
977
input_quantize_params = input_quantize_params ,
981
978
weight_quantize_params = weight_quantize_params ,
982
- bias_initializer = bias_initializer ,
983
- bias_add_name = "{}_bias_add" .format (gemm_node .name ),
984
979
target_output = gemm_node .output [0 ],
985
980
transpose_weight = transpose_weight ,
981
+ bias_add_name = "{}_bias_add" .format (gemm_node .name ),
982
+ bias_initializer = bias_initializer ,
986
983
)
987
984
988
985
# Cleanup
@@ -1108,14 +1105,14 @@ def _convert_quantizable_matmul_and_add(model: ModelProto):
1108
1105
weight_quantize_node = weight_quantize_node ,
1109
1106
input_quantize_params = input_quantize_params ,
1110
1107
weight_quantize_params = weight_quantize_params ,
1111
- bias_initializer = bias_initializer ,
1112
- bias_add_name = bias_add_node .name ,
1113
1108
target_output = (
1114
1109
output_dequantize_node .output [0 ]
1115
1110
if output_dequantize_node
1116
1111
else bias_add_node .output [0 ]
1117
1112
),
1118
1113
transpose_weight = True ,
1114
+ bias_add_name = bias_add_node .name ,
1115
+ bias_initializer = bias_initializer ,
1119
1116
output_quantize_node = output_quantize_node ,
1120
1117
output_dequantize_node = output_dequantize_node ,
1121
1118
)
@@ -1164,7 +1161,7 @@ def _convert_quantizable_conv_integer(model: ModelProto):
1164
1161
| | |
1165
1162
| DequantizeLinear |
1166
1163
| | |
1167
- | Conv (with bias)
1164
+ | Conv (with optional bias)
1168
1165
| |
1169
1166
| OUTPUT
1170
1167
| We end up converting to:
@@ -1174,7 +1171,7 @@ def _convert_quantizable_conv_integer(model: ModelProto):
1174
1171
| |
1175
1172
| ConvInteger (with constant uint8 kernel)
1176
1173
| |
1177
- | Add (constant bias + zero point correction)
1174
+ | Add (optional, constant bias + zero point correction)
1178
1175
| |
1179
1176
| Cast (INT32 -> FP32)
1180
1177
| |
@@ -1187,10 +1184,10 @@ def _convert_quantizable_conv_integer(model: ModelProto):
1187
1184
conv_nodes = [n for n in model .graph .node if n .op_type in ["Conv" ]]
1188
1185
orig_conv_weight_name_to_node_ids = defaultdict (list )
1189
1186
for conv_node in conv_nodes :
1190
- if len (conv_node .input ) != 3 :
1191
- # this function currently only converts Conv nodes with bias param
1192
- # (i.e. from folded batch norm value)
1193
- continue
1187
+ # if len(conv_node.input) != 3:
1188
+ # # this function currently only converts Conv nodes with bias param
1189
+ # # (i.e. from folded batch norm value)
1190
+ # continue
1194
1191
1195
1192
graph = ONNXGraph (model )
1196
1193
@@ -1226,12 +1223,15 @@ def _convert_quantizable_conv_integer(model: ModelProto):
1226
1223
if input_quantize_node .op_type != "DequantizeLinear" :
1227
1224
continue
1228
1225
1229
- bias_initializer = graph . get_init_by_name (conv_node .input [ 2 ])
1230
- if bias_initializer is None :
1231
- _LOGGER . debug ( f"Unable to find bias initializer: { conv_node . input [ 2 ] } " )
1232
- continue
1226
+ if len (conv_node .input ) == 3 :
1227
+ bias_initializer = graph . get_init_by_name ( conv_node . input [ 2 ])
1228
+ else :
1229
+ bias_initializer = None
1233
1230
1234
- _LOGGER .debug (f"Matched quantizable Conv weight and bias: { conv_node .name } " )
1231
+ if bias_initializer is None :
1232
+ _LOGGER .debug (f"Matched quantizable Conv weight: { conv_node .name } " )
1233
+ else :
1234
+ _LOGGER .debug (f"Matched quantizable Conv weight and bias: { conv_node .name } " )
1235
1235
1236
1236
# Conversion
1237
1237
_add_quantized_conv_matmul_add_ops (
@@ -1241,10 +1241,10 @@ def _convert_quantizable_conv_integer(model: ModelProto):
1241
1241
weight_quantize_node = weight_quantize_node ,
1242
1242
input_quantize_params = input_quantize_params ,
1243
1243
weight_quantize_params = weight_quantize_params ,
1244
- bias_initializer = bias_initializer ,
1245
- bias_add_name = "{}_bias_add" .format (conv_node .name ),
1246
1244
target_output = conv_node .output [0 ],
1247
1245
transpose_weight = False ,
1246
+ bias_add_name = "{}_bias_add" .format (conv_node .name ),
1247
+ bias_initializer = bias_initializer ,
1248
1248
)
1249
1249
orig_conv_weight_name_to_node_ids [input_quantize_node .input [0 ]].append (
1250
1250
"{}_quant" .format (conv_node .output [0 ])
0 commit comments