Skip to content

Commit 698b2bf

Browse files
authored
Merge pull request opencv#18167 from Yosshi999:bit-exact-gaussian
Bit exact gaussian blur for 16bit unsigned int * bit-exact gaussian kernel for CV_16U * SIMD optimization * template GaussianBlurFixedPoint * remove template specialization * simd support for h3N121 uint16 * test for u16 gaussian blur * remove unnecessary comments * fix return type of raw() * add typedef of native internal type in fixedpoint * update return type of raw()
1 parent 1d42560 commit 698b2bf

File tree

4 files changed

+266
-123
lines changed

4 files changed

+266
-123
lines changed

modules/imgproc/src/fixedpoint.inl.hpp

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@ namespace {
1414
class fixedpoint64
1515
{
1616
private:
17-
static const int fixedShift = 32;
18-
1917
int64_t val;
2018
fixedpoint64(int64_t _val) : val(_val) {}
2119
static CV_ALWAYS_INLINE uint64_t fixedround(const uint64_t& _val) { return (_val + ((1LL << fixedShift) >> 1)); }
2220
public:
21+
static const int fixedShift = 32;
22+
2323
typedef fixedpoint64 WT;
24+
typedef int64_t raw_t;
2425
CV_ALWAYS_INLINE fixedpoint64() { val = 0; }
2526
CV_ALWAYS_INLINE fixedpoint64(const fixedpoint64& v) { val = v.val; }
2627
CV_ALWAYS_INLINE fixedpoint64(const int8_t& _val) { val = ((int64_t)_val) << fixedShift; }
@@ -97,13 +98,14 @@ class fixedpoint64
9798
class ufixedpoint64
9899
{
99100
private:
100-
static const int fixedShift = 32;
101-
102101
uint64_t val;
103102
ufixedpoint64(uint64_t _val) : val(_val) {}
104103
static CV_ALWAYS_INLINE uint64_t fixedround(const uint64_t& _val) { return (_val + ((1LL << fixedShift) >> 1)); }
105104
public:
105+
static const int fixedShift = 32;
106+
106107
typedef ufixedpoint64 WT;
108+
typedef uint64_t raw_t;
107109
CV_ALWAYS_INLINE ufixedpoint64() { val = 0; }
108110
CV_ALWAYS_INLINE ufixedpoint64(const ufixedpoint64& v) { val = v.val; }
109111
CV_ALWAYS_INLINE ufixedpoint64(const uint8_t& _val) { val = ((uint64_t)_val) << fixedShift; }
@@ -157,20 +159,24 @@ class ufixedpoint64
157159
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
158160
static CV_ALWAYS_INLINE ufixedpoint64 zero() { return ufixedpoint64(); }
159161
static CV_ALWAYS_INLINE ufixedpoint64 one() { return ufixedpoint64((uint64_t)(1ULL << fixedShift)); }
162+
163+
static CV_ALWAYS_INLINE ufixedpoint64 fromRaw(uint64_t v) { return ufixedpoint64(v); }
164+
CV_ALWAYS_INLINE uint64_t raw() { return val; }
160165
CV_ALWAYS_INLINE uint32_t cvFloor() const { return cv::saturate_cast<uint32_t>(val >> fixedShift); }
161166
friend class ufixedpoint32;
162167
};
163168

164169
class fixedpoint32
165170
{
166171
private:
167-
static const int fixedShift = 16;
168-
169172
int32_t val;
170173
fixedpoint32(int32_t _val) : val(_val) {}
171174
static CV_ALWAYS_INLINE uint32_t fixedround(const uint32_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
172175
public:
176+
static const int fixedShift = 16;
177+
173178
typedef fixedpoint64 WT;
179+
typedef int32_t raw_t;
174180
CV_ALWAYS_INLINE fixedpoint32() { val = 0; }
175181
CV_ALWAYS_INLINE fixedpoint32(const fixedpoint32& v) { val = v.val; }
176182
CV_ALWAYS_INLINE fixedpoint32(const int8_t& _val) { val = ((int32_t)_val) << fixedShift; }
@@ -218,13 +224,14 @@ class fixedpoint32
218224
class ufixedpoint32
219225
{
220226
private:
221-
static const int fixedShift = 16;
222-
223227
uint32_t val;
224228
ufixedpoint32(uint32_t _val) : val(_val) {}
225229
static CV_ALWAYS_INLINE uint32_t fixedround(const uint32_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
226230
public:
231+
static const int fixedShift = 16;
232+
227233
typedef ufixedpoint64 WT;
234+
typedef uint32_t raw_t;
228235
CV_ALWAYS_INLINE ufixedpoint32() { val = 0; }
229236
CV_ALWAYS_INLINE ufixedpoint32(const ufixedpoint32& v) { val = v.val; }
230237
CV_ALWAYS_INLINE ufixedpoint32(const uint8_t& _val) { val = ((uint32_t)_val) << fixedShift; }
@@ -262,19 +269,23 @@ class ufixedpoint32
262269
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
263270
static CV_ALWAYS_INLINE ufixedpoint32 zero() { return ufixedpoint32(); }
264271
static CV_ALWAYS_INLINE ufixedpoint32 one() { return ufixedpoint32((1U << fixedShift)); }
272+
273+
static CV_ALWAYS_INLINE ufixedpoint32 fromRaw(uint32_t v) { return ufixedpoint32(v); }
274+
CV_ALWAYS_INLINE uint32_t raw() { return val; }
265275
friend class ufixedpoint16;
266276
};
267277

268278
class fixedpoint16
269279
{
270280
private:
271-
static const int fixedShift = 8;
272-
273281
int16_t val;
274282
fixedpoint16(int16_t _val) : val(_val) {}
275283
static CV_ALWAYS_INLINE uint16_t fixedround(const uint16_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
276284
public:
285+
static const int fixedShift = 8;
286+
277287
typedef fixedpoint32 WT;
288+
typedef int16_t raw_t;
278289
CV_ALWAYS_INLINE fixedpoint16() { val = 0; }
279290
CV_ALWAYS_INLINE fixedpoint16(const fixedpoint16& v) { val = v.val; }
280291
CV_ALWAYS_INLINE fixedpoint16(const int8_t& _val) { val = ((int16_t)_val) << fixedShift; }
@@ -315,13 +326,14 @@ class fixedpoint16
315326
class ufixedpoint16
316327
{
317328
private:
318-
static const int fixedShift = 8;
319-
320329
uint16_t val;
321330
ufixedpoint16(uint16_t _val) : val(_val) {}
322331
static CV_ALWAYS_INLINE uint16_t fixedround(const uint16_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
323332
public:
333+
static const int fixedShift = 8;
334+
324335
typedef ufixedpoint32 WT;
336+
typedef uint16_t raw_t;
325337
CV_ALWAYS_INLINE ufixedpoint16() { val = 0; }
326338
CV_ALWAYS_INLINE ufixedpoint16(const ufixedpoint16& v) { val = v.val; }
327339
CV_ALWAYS_INLINE ufixedpoint16(const uint8_t& _val) { val = ((uint16_t)_val) << fixedShift; }
@@ -358,7 +370,7 @@ class ufixedpoint16
358370
static CV_ALWAYS_INLINE ufixedpoint16 one() { return ufixedpoint16((uint16_t)(1 << fixedShift)); }
359371

360372
static CV_ALWAYS_INLINE ufixedpoint16 fromRaw(uint16_t v) { return ufixedpoint16(v); }
361-
CV_ALWAYS_INLINE ufixedpoint16 raw() { return val; }
373+
CV_ALWAYS_INLINE uint16_t raw() { return val; }
362374
};
363375

364376
}

modules/imgproc/src/smooth.dispatch.cpp

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -258,23 +258,20 @@ softdouble getGaussianKernelFixedPoint_ED(CV_OUT std::vector<int64_t>& result, c
258258
}
259259

260260
static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); }
261-
template <typename T> static void getGaussianKernel(int n, double sigma, int, std::vector<T>& res);
262-
//{ res = getFixedpointGaussianKernel<T>(n, sigma); }
263-
264-
template<> void getGaussianKernel<ufixedpoint16>(int n, double sigma, int, std::vector<ufixedpoint16>& res)
261+
template <typename FT> static void getGaussianKernel(int n, double sigma, int, std::vector<FT>& res)
265262
{
266263
std::vector<softdouble> res_sd;
267264
softdouble s0 = getGaussianKernelBitExact(res_sd, n, sigma);
268265
CV_UNUSED(s0);
269266

270267
std::vector<int64_t> fixed_256;
271-
softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, 8);
268+
softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, FT::fixedShift);
272269
CV_UNUSED(approx_err);
273270

274271
res.resize(n);
275272
for (int i = 0; i < n; i++)
276273
{
277-
res[i] = ufixedpoint16::fromRaw((uint16_t)fixed_256[i]);
274+
res[i] = FT::fromRaw((typename FT::raw_t)fixed_256[i]);
278275
//printf("%03d: %d\n", i, res[i].raw());
279276
}
280277
}
@@ -688,6 +685,43 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
688685
return;
689686
}
690687
}
688+
if(sdepth == CV_16U && ((borderType & BORDER_ISOLATED) || !_src.isSubmatrix()))
689+
{
690+
CV_LOG_INFO(NULL, "GaussianBlur: running bit-exact version...");
691+
692+
std::vector<ufixedpoint32> fkx, fky;
693+
createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2);
694+
695+
static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool("OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS", false);
696+
if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fkx))
697+
{
698+
CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2));
699+
}
700+
else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fky))
701+
{
702+
CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2));
703+
}
704+
else
705+
{
706+
// TODO: implement ocl_sepFilter2D_BitExact -- how to deal with bdepth?
707+
// CV_OCL_RUN(useOpenCL,
708+
// ocl_sepFilter2D_BitExact(_src, _dst, sdepth,
709+
// ksize,
710+
// (const uint32_t*)&fkx[0], (const uint32_t*)&fky[0],
711+
// Point(-1, -1), 0, borderType,
712+
// 16/*shift_bits*/)
713+
// );
714+
715+
Mat src = _src.getMat();
716+
Mat dst = _dst.getMat();
717+
718+
if (src.data == dst.data)
719+
src = src.clone();
720+
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType),
721+
CV_CPU_DISPATCH_MODES_ALL);
722+
return;
723+
}
724+
}
691725

692726
#ifdef HAVE_OPENCL
693727
if (useOpenCL)

0 commit comments

Comments
 (0)