Skip to content

Commit c5d9106

Browse files
Fix densenet-121 throughput issue (#1113)
1 parent 0439adc commit c5d9106

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ def get_optimized_model(self, itex_mode=False):
108108

109109
self._tmp_graph_def = SplitSharedInputOptimizer(self._tmp_graph_def).do_transformation()
110110

111+
# Put FuseDecomposedBNOptimizer before GraphFoldConstantOptimizer
112+
# The 'Sub' op in the small decomposed ops of BN will be converted to const by GraphFoldConstantOptimizer.
113+
# Then the FuseDecomposedBNOptimizer can't fuse the small decomposed ops to BN.
114+
self._tmp_graph_def = FuseDecomposedBNOptimizer(self._tmp_graph_def).do_transformation()
115+
111116
# disable fold constant for itex qdq mode
112117
if not itex_mode:
113118
self._tmp_graph_def = GraphFoldConstantOptimizer(self._tmp_graph_def).do_transformation()
@@ -121,8 +126,6 @@ def get_optimized_model(self, itex_mode=False):
121126

122127
self._tmp_graph_def = GraphCseOptimizer(self._tmp_graph_def).do_transformation()
123128

124-
self._tmp_graph_def = FuseDecomposedBNOptimizer(self._tmp_graph_def).do_transformation()
125-
126129
self._tmp_graph_def = FoldBatchNormNodesOptimizer(
127130
self._tmp_graph_def).do_transformation()
128131

0 commit comments

Comments
 (0)