Enable transpose_b=true in new matmul API (#1129)

Zhiwei35 · web-flow · commit cca21bbde52e · 2022-08-09T13:51:56.000+08:00
diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py
@@ -276,7 +276,6 @@ def __init__(self, model, device='cpu'):
 
     def do_transformation(self):
         fuse_pattern = [["_QuantizedMatMul"], ['Requantize'], ['Dequantize'], ('Softmax',)]
-
         target_nodes = self.graph_analyzer.query_fusion_pattern_nodes(fuse_pattern)
         for i in target_nodes:
             quantized_node_name = i[0]
@@ -301,6 +300,10 @@ def do_transformation(self):
                 new_node.attr["T1"].CopyFrom(quantized_node.attr['T1'])
             if 'T2' in quantized_node.attr:
                 new_node.attr["T2"].CopyFrom(quantized_node.attr['T2'])
+            if 'transpose_b' in quantized_node.attr:
+                new_node.attr["transpose_b"].CopyFrom(quantized_node.attr['transpose_b'])
+            if 'transpose_a' in quantized_node.attr:
+                new_node.attr["transpose_a"].CopyFrom(quantized_node.attr['transpose_a'])               
             if 'Tbias' in quantized_node.attr:
                 new_node.attr["Tbias"].CopyFrom(quantized_node.attr['Tbias'])
             if 'fused_ops' in quantized_node.attr:
@@ -358,7 +361,6 @@ def do_transformation(self):
                 [["_QuantizedMatMul"], ['Requantize']])
             if len(target_nodes) == 0:
                 break
-
             i = target_nodes[0]
             quantized_node_name = i[0]
             quantized_node = self.graph_info[quantized_node_name].node
@@ -381,6 +383,10 @@ def do_transformation(self):
             new_node.input.append(requested_output_min_name)
             new_node.input.append(requested_output_max_name)
 
+            if 'transpose_b' in quantized_node.attr:
+                new_node.attr["transpose_b"].CopyFrom(quantized_node.attr['transpose_b'])
+            if 'transpose_a' in quantized_node.attr:
+                new_node.attr["transpose_a"].CopyFrom(quantized_node.attr['transpose_a'])  
             if 'T1' in quantized_node.attr:
                 new_node.attr["T1"].CopyFrom(quantized_node.attr['T1'])
             if 'T2' in quantized_node.attr:
diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/qdq/insert_qdq_pattern.py
@@ -423,10 +423,9 @@ def _ignore_insert_qdq_pattern(self, matched_node_name):
             return True
 
         #TODO Remove below two lines once the TF enabled the QuantizedMatMul while
-        # transpose_a/transpose_a could be set to True.
+        # transpose_a could be set to True.
         if self.graph_info[matched_node_name].node.op == "MatMul":
-            if self.graph_info[matched_node_name].node.attr["transpose_a"].b == True or \
-               self.graph_info[matched_node_name].node.attr["transpose_b"].b == True:
+            if self.graph_info[matched_node_name].node.attr["transpose_a"].b == True:
                 return True
 
         return False
diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py b/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_matmul.py
@@ -187,17 +187,15 @@ def apply_matmul_biasadd_fusion(self, match_node_name):
         weights_max_name = weights_name[2]
 
         weight_node = self.node_name_mapping[helper.node_name_from_input(weights_name[0])].node
-
         # FIXME We only quantize the MatMul op which second input node type is const. This is a
         # workaround for RNN model like LTSM.
         if weight_node.op != 'Const':
             self.output_graph = self.input_graph
             return []
 
         #TODO Remove below two lines once the TF enabled the QuantizedMatMul while
-        # transpose_a/transpose_a could be set to True.
-        if matched_node.node.attr["transpose_a"].b == True or \
-            matched_node.node.attr["transpose_b"].b == True:
+        # transpose_a could be set to True.
+        if matched_node.node.attr["transpose_a"].b == True:
             self.output_graph = self.input_graph
             return []
 
@@ -582,9 +580,8 @@ def _is_match_matmul(self, patterns, qdq_inserted=False):
 
                     if cur_node.op == "MatMul":
                         #TODO Remove below two lines once the TF enabled the QuantizedMatMul while
-                        # transpose_a/transpose_a could be set to True.
-                        if cur_node.attr["transpose_a"].b == True or \
-                           cur_node.attr["transpose_b"].b == True:
+                        # transpose_a could be set to True.
+                        if cur_node.attr["transpose_a"].b == True:
                             continue
 
                         weights_content =  tensor_util.MakeNdarray(weight_node.attr['value'].tensor)
diff --git a/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py b/neural_compressor/adaptor/tf_utils/quantize_graph/quantize_graph_matmul.py
@@ -150,10 +150,10 @@ def apply_matmul_biasadd_fusion(self, match_node_name):
             self.output_graph = self.input_graph
             return []
 
-        #TODO Remove below two lines once the TF enabled the QuantizedMatMul while
-        # transpose_a/transpose_a could be set to True.
+        #TODO Remove below two lines once the TF enabled the old QuantizedMatMul while
+        # transpose_a/transpose_b could be set to True.
         if matched_node.node.attr["transpose_a"].b == True or \
-            matched_node.node.attr["transpose_b"].b == True:
+                matched_node.node.attr["transpose_b"].b == True:
             self.output_graph = self.input_graph
             return []
 
diff --git a/test/tfnewapi/test_tensorflow_graph_qdq_matmul_fusion.py b/test/tfnewapi/test_tensorflow_graph_qdq_matmul_fusion.py
@@ -176,15 +176,15 @@ def test_matmul_biasadd_requantize_dequantize_last_fusion(self):
             self.assertEqual(found_quantized_matmul, True)
 
     @disable_random()
-    def test_disable_matmul_fusion(self):
+    def test_matmul_fusion_with_transpose_b_true(self):
         g = tf.Graph()
         with g.as_default():
 
             x_data = np.array([[0.1, 0.2], [0.2, 0.3]])
             y_data = np.array([[1, 2], [3, 4]], dtype=np.float)
             x = tf.placeholder(tf.float32, shape=[2, 2], name='x')
             y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2])
-            z = tf.matmul(x, y, name='no_quant_matmul')
+            z = tf.matmul(x, y, name='no_quant_matmul', transpose_b=True)
             z = tf.nn.relu6(z, name='op_to_store')
             found_quantized_matmul = False
 
@@ -201,21 +201,21 @@ def test_disable_matmul_fusion(self):
                 output_graph = quantizer.fit()
 
                 for i in output_graph.graph_def.node:
-                    if i.op == '_QuantizedMatMul' and i.name == 'op_to_store':
+                    if i.op == '_QuantizedMatMul':
                         found_quantized_matmul = True
                         break
-            self.assertEqual(found_quantized_matmul, False)
-
+            self.assertEqual(found_quantized_matmul, True)
+            
     @disable_random()
-    def test_disable_matmul_fusion_with_transpose_b_true(self):
+    def test_disable_matmul_fusion(self):
         g = tf.Graph()
         with g.as_default():
 
             x_data = np.array([[0.1, 0.2], [0.2, 0.3]])
             y_data = np.array([[1, 2], [3, 4]], dtype=np.float)
             x = tf.placeholder(tf.float32, shape=[2, 2], name='x')
             y = tf.constant(y_data, dtype=tf.float32, shape=[2, 2])
-            z = tf.matmul(x, y, name='no_quant_matmul', transpose_b=True)
+            z = tf.matmul(x, y, name='no_quant_matmul')
             z = tf.nn.relu6(z, name='op_to_store')
             found_quantized_matmul = False