Skip to content

Commit 0e1c7ed

Browse files
committed
Merge pull request opencv#17092 from alalek:imgproc_ipp_parallel_gaussuanBlur
2 parents 2b2bcc9 + a3b109e commit 0e1c7ed

File tree

4 files changed

+63
-12
lines changed

4 files changed

+63
-12
lines changed

modules/core/include/opencv2/core/private.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,6 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); }
208208
#define IPP_DISABLE_HOUGH 1 // improper integration/results
209209
#define IPP_DISABLE_FILTER2D_BIG_MASK 1 // different results on masks > 7x7
210210

211-
#define IPP_DISABLE_GAUSSIANBLUR_PARALLEL 1 // not supported (2017u2 / 2017u3)
212-
213211
// Temporary disabled named IPP region. Performance
214212
#define IPP_DISABLE_PERF_COPYMAKE 1 // performance variations
215213
#define IPP_DISABLE_PERF_LUT 1 // there are no performance benefits (PR #2653)

modules/imgproc/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,9 @@ ocv_add_dispatched_file(smooth SSE2 SSE4_1 AVX2)
1212
ocv_add_dispatched_file(sumpixels SSE2 AVX2 AVX512_SKX)
1313
ocv_add_dispatched_file(undistort SSE2 AVX2)
1414
ocv_define_module(imgproc opencv_core WRAP java python js)
15+
16+
ocv_check_environment_variables(OPENCV_IPP_GAUSSIAN_BLUR)
17+
option(OPENCV_IPP_GAUSSIAN_BLUR "Enable IPP optimizations for GaussianBlur (+8Mb in binary size)" OFF)
18+
if(OPENCV_IPP_GAUSSIAN_BLUR)
19+
ocv_append_source_file_compile_definitions(${CMAKE_CURRENT_SOURCE_DIR}/src/smooth.dispatch.cpp "ENABLE_IPP_GAUSSIAN_BLUR=1")
20+
endif()

modules/imgproc/perf/opencl/perf_filters.cpp

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,15 +238,13 @@ OCL_PERF_TEST_P(ScharrFixture, Scharr,
238238

239239
///////////// GaussianBlur ////////////////////////
240240

241-
typedef FilterFixture GaussianBlurFixture;
241+
typedef FilterFixture OCL_GaussianBlurFixture;
242242

243-
OCL_PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
244-
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, OCL_PERF_ENUM(3, 5, 7)))
243+
PERF_TEST_P_(OCL_GaussianBlurFixture, GaussianBlur)
245244
{
246-
const FilterParams params = GetParam();
245+
const FilterParams& params = GetParam();
247246
const Size srcSize = get<0>(params);
248247
const int type = get<1>(params), ksize = get<2>(params);
249-
const double eps = CV_MAT_DEPTH(type) <= CV_32S ? 2 + DBL_EPSILON : 3e-4;
250248

251249
checkDeviceMaxMemoryAllocSize(srcSize, type);
252250

@@ -255,9 +253,42 @@ OCL_PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
255253

256254
OCL_TEST_CYCLE() cv::GaussianBlur(src, dst, Size(ksize, ksize), 1, 1, cv::BORDER_CONSTANT);
257255

258-
SANITY_CHECK(dst, eps);
256+
SANITY_CHECK_NOTHING();
259257
}
260258

259+
INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_GaussianBlurFixture,
260+
::testing::Combine(
261+
OCL_TEST_SIZES,
262+
OCL_TEST_TYPES,
263+
OCL_PERF_ENUM(3, 5, 7)
264+
)
265+
);
266+
267+
INSTANTIATE_TEST_CASE_P(SIFT, OCL_GaussianBlurFixture,
268+
::testing::Combine(
269+
::testing::Values(sz1080p),
270+
::testing::Values(CV_32FC1),
271+
OCL_PERF_ENUM(11, 13, 17, 21, 27)
272+
)
273+
);
274+
275+
INSTANTIATE_TEST_CASE_P(DISABLED_FULL, OCL_GaussianBlurFixture,
276+
::testing::Combine(
277+
::testing::Values(sz1080p),
278+
::testing::Values(
279+
CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
280+
CV_8SC1, CV_8SC2, CV_8SC3, CV_8SC4,
281+
CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4,
282+
CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
283+
CV_32SC1, CV_32SC2, CV_32SC3, CV_32SC4,
284+
CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4,
285+
CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4
286+
),
287+
OCL_PERF_ENUM(3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29)
288+
)
289+
);
290+
291+
261292
///////////// Filter2D ////////////////////////
262293

263294
typedef FilterFixture Filter2DFixture;

modules/imgproc/src/smooth.dispatch.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -470,9 +470,14 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
470470

471471
#endif
472472

473-
#if 0 //defined HAVE_IPP
473+
#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
474+
475+
#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
476+
#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
477+
#define IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH 1
478+
474479
// IW 2017u2 has bug which doesn't allow use of partial inMem with tiling
475-
#if IPP_DISABLE_GAUSSIANBLUR_PARALLEL
480+
#if IPP_VERSION_X100 < 201900
476481
#define IPP_GAUSSIANBLUR_PARALLEL 0
477482
#else
478483
#define IPP_GAUSSIANBLUR_PARALLEL 1
@@ -555,6 +560,14 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
555560
return false;
556561

557562
const int threads = ippiSuggestThreadsNum(iwDst, 2);
563+
564+
if (IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH && (threads == 1 && ksize.width > 25))
565+
return false;
566+
if (IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH && (threads == 1 && src.type() == CV_16SC4))
567+
return false;
568+
if (IPP_DISABLE_GAUSSIAN_BLUR_32FC4_1TH && (threads == 1 && src.type() == CV_32FC4))
569+
return false;
570+
558571
if(IPP_GAUSSIANBLUR_PARALLEL && threads > 1) {
559572
bool ok;
560573
ipp_gaussianBlurParallel invoker(iwSrc, iwDst, ksize.width, (float) sigma1, ippBorder, &ok);
@@ -655,8 +668,6 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
655668
CV_OVX_RUN(true,
656669
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
657670

658-
//CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
659-
660671
if(sdepth == CV_8U && ((borderType & BORDER_ISOLATED) || !_src.getMat().isSubmatrix()))
661672
{
662673
std::vector<ufixedpoint16> fkx, fky;
@@ -681,6 +692,11 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
681692
}
682693
}
683694

695+
#if defined ENABLE_IPP_GAUSSIAN_BLUR
696+
// IPP is not bit-exact to OpenCV implementation
697+
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
698+
#endif
699+
684700
sepFilter2D(src, dst, sdepth, kx, ky, Point(-1, -1), 0, borderType);
685701
}
686702

0 commit comments

Comments
 (0)