Skip to content

Commit a9b0305

Browse files
committed
Merge pull request opencv#17295 from dkurt:dnn_fusion_ftz
2 parents d4a9c73 + 68d59a2 commit a9b0305

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,13 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
12301230
CV_TRACE_FUNCTION();
12311231
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
12321232

1233+
#if CV_TRY_SSE
1234+
uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE();
1235+
uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE();
1236+
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
1237+
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
1238+
#endif
1239+
12331240
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
12341241
forward_ocl(inputs_arr, outputs_arr, internals_arr))
12351242

@@ -1312,6 +1319,10 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
13121319
ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
13131320
kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
13141321
}
1322+
#if CV_TRY_SSE
1323+
_MM_SET_FLUSH_ZERO_MODE(ftzMode);
1324+
_MM_SET_DENORMALS_ZERO_MODE(dazMode);
1325+
#endif
13151326
}
13161327

13171328
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,

0 commit comments

Comments
 (0)