Skip to content

Commit 783fe72

Browse files
WanliZhongvpisarev
andauthored
Resolve Compilation Error for v_func Function in SIMD Emulator (opencv#25891)
* use 2 parms for now to identify the error * Revert "use 2 parms for now to identify the error" This reverts commit 86faf99. * replace += with = * add v_log ref * refactor intrin_math code * Add include guard to `intrin_math.hpp` to prevent multiple inclusions * rename VX to V; make fp64 impl in neon be optional * add v_setall, v_setzero for all backends; rewrite the intrin_math * fix error on rvv_scalable * let v_erf use v_exp_default_32f function * 1. replaced 'v_setzero(VecType dummy)' with 'v_setzero_<VecType>()' 2. replaced 'v_setall(LaneType x, VecType dummy)' with 'v_setall_<VecType>(LaneType x)' 3. added tests for the new v_setzero_<> and v_setall_<>. * gcc does not seem to like static_assert in functions even when they are not used * trying to fix compile errors in Debug mode on Linux --------- Co-authored-by: Vadim Pisarevsky <vadim.pisarevsky@gmail.com>
1 parent 73b3b24 commit 783fe72

15 files changed

+584
-424
lines changed

modules/core/include/opencv2/core/hal/intrin.hpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,19 @@ CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double)
191191
#endif // CV_CPU_OPTIMIZATION_HAL_NAMESPACE
192192

193193
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
194+
195+
template <typename _VecTp> inline _VecTp v_setzero_();
196+
template <typename _VecTp> inline _VecTp v_setall_(uchar);
197+
template <typename _VecTp> inline _VecTp v_setall_(schar);
198+
template <typename _VecTp> inline _VecTp v_setall_(ushort);
199+
template <typename _VecTp> inline _VecTp v_setall_(short);
200+
template <typename _VecTp> inline _VecTp v_setall_(unsigned);
201+
template <typename _VecTp> inline _VecTp v_setall_(int);
202+
template <typename _VecTp> inline _VecTp v_setall_(uint64);
203+
template <typename _VecTp> inline _VecTp v_setall_(int64);
204+
template <typename _VecTp> inline _VecTp v_setall_(float);
205+
template <typename _VecTp> inline _VecTp v_setall_(double);
206+
194207
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
195208
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
196209
#endif
@@ -958,7 +971,6 @@ namespace CV__SIMD_NAMESPACE {
958971
#define CV_SIMD 0
959972
#endif
960973

961-
#include "intrin_math.hpp"
962974
#include "simd_utils.impl.hpp"
963975

964976
#ifndef CV_DOXYGEN

modules/core/include/opencv2/core/hal/intrin_avx.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,10 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE_FLT(v_float64x4, double, pd, __m128d)
447447
{ return _Tpvec(_mm256_setzero_si256()); } \
448448
inline _Tpvec v256_setall_##suffix(_Tp v) \
449449
{ return _Tpvec(_mm256_set1_##ssuffix((ctype_s)v)); } \
450+
template <> inline _Tpvec v_setzero_() \
451+
{ return v256_setzero_##suffix(); } \
452+
template <> inline _Tpvec v_setall_(_Tp v) \
453+
{ return v256_setall_##suffix(v); } \
450454
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \
451455
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \
452456
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \
@@ -472,6 +476,10 @@ OPENCV_HAL_IMPL_AVX_INIT(v_int64x4, int64, s64, epi64x, int64)
472476
{ return _Tpvec(_mm256_setzero_##zsuffix()); } \
473477
inline _Tpvec v256_setall_##suffix(_Tp v) \
474478
{ return _Tpvec(_mm256_set1_##zsuffix(v)); } \
479+
template <> inline _Tpvec v_setzero_() \
480+
{ return v256_setzero_##suffix(); } \
481+
template <> inline _Tpvec v_setall_(_Tp v) \
482+
{ return v256_setall_##suffix(v); } \
475483
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint8x32, suffix, cast) \
476484
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_int8x32, suffix, cast) \
477485
OPENCV_HAL_IMPL_AVX_CAST(_Tpvec, v_uint16x16, suffix, cast) \
@@ -3158,6 +3166,14 @@ inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
31583166

31593167
inline void v256_cleanup() { _mm256_zeroall(); }
31603168

3169+
#include "intrin_math.hpp"
3170+
inline v_float32x8 v_exp(v_float32x8 x) { return v_exp_default_32f<v_float32x8, v_int32x8>(x); }
3171+
inline v_float32x8 v_log(v_float32x8 x) { return v_log_default_32f<v_float32x8, v_int32x8>(x); }
3172+
inline v_float32x8 v_erf(v_float32x8 x) { return v_erf_default_32f<v_float32x8, v_int32x8>(x); }
3173+
3174+
inline v_float64x4 v_exp(v_float64x4 x) { return v_exp_default_64f<v_float64x4, v_int64x4>(x); }
3175+
inline v_float64x4 v_log(v_float64x4 x) { return v_log_default_64f<v_float64x4, v_int64x4>(x); }
3176+
31613177
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
31623178

31633179
//! @endcond

modules/core/include/opencv2/core/hal/intrin_avx512.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,10 @@ OPENCV_HAL_IMPL_AVX512_LOADSTORE_FLT(v_float64x8, double, pd, __m256d)
458458
{ return _Tpvec(_mm512_setzero_si512()); } \
459459
inline _Tpvec v512_setall_##suffix(_Tp v) \
460460
{ return _Tpvec(_mm512_set1_##ssuffix((ctype_s)v)); } \
461+
template <> inline _Tpvec v_setzero_() \
462+
{ return v512_setzero_##suffix(); } \
463+
template <> inline _Tpvec v_setall_(_Tp v) \
464+
{ return v512_setall_##suffix(v); } \
461465
OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, OPENCV_HAL_NOP) \
462466
OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, OPENCV_HAL_NOP) \
463467
OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, OPENCV_HAL_NOP) \
@@ -483,6 +487,10 @@ OPENCV_HAL_IMPL_AVX512_INIT(v_int64x8, int64, s64, epi64, int64)
483487
{ return _Tpvec(_mm512_setzero_##zsuffix()); } \
484488
inline _Tpvec v512_setall_##suffix(_Tp v) \
485489
{ return _Tpvec(_mm512_set1_##zsuffix(v)); } \
490+
template <> inline _Tpvec v_setzero_() \
491+
{ return v512_setzero_##suffix(); } \
492+
template <> inline _Tpvec v_setall_(_Tp v) \
493+
{ return v512_setall_##suffix(v); } \
486494
OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint8x64, suffix, cast) \
487495
OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_int8x64, suffix, cast) \
488496
OPENCV_HAL_IMPL_AVX512_CAST(_Tpvec, v_uint16x32, suffix, cast) \
@@ -3070,6 +3078,14 @@ inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signm
30703078

30713079
inline void v512_cleanup() { _mm256_zeroall(); }
30723080

3081+
#include "intrin_math.hpp"
3082+
inline v_float32x16 v_exp(v_float32x16 x) { return v_exp_default_32f<v_float32x16, v_int32x16>(x); }
3083+
inline v_float32x16 v_log(v_float32x16 x) { return v_log_default_32f<v_float32x16, v_int32x16>(x); }
3084+
inline v_float32x16 v_erf(v_float32x16 x) { return v_erf_default_32f<v_float32x16, v_int32x16>(x); }
3085+
3086+
inline v_float64x8 v_exp(v_float64x8 x) { return v_exp_default_64f<v_float64x8, v_int64x8>(x); }
3087+
inline v_float64x8 v_log(v_float64x8 x) { return v_log_default_64f<v_float64x8, v_int64x8>(x); }
3088+
30733089
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
30743090

30753091
//! @endcond

modules/core/include/opencv2/core/hal/intrin_cpp.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ Most of these operations return only one value.
263263
264264
### Other math
265265
266-
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp,
266+
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp, @ref v_log,
267267
@ref v_erf
268268
- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
269269
@@ -2801,7 +2801,8 @@ inline void v_transpose4x4( v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
28012801
//! @brief Helper macro
28022802
//! @ingroup core_hal_intrin_impl
28032803
#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, prefix, suffix) \
2804-
inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); }
2804+
inline _Tpvec prefix##_setzero_##suffix() { return _Tpvec::zero(); } \
2805+
template <> inline _Tpvec v_setzero_() { return _Tpvec::zero(); }
28052806

28062807
//! @name Init with zero
28072808
//! @{
@@ -2847,7 +2848,8 @@ OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x8, v512, s64)
28472848
//! @brief Helper macro
28482849
//! @ingroup core_hal_intrin_impl
28492850
#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, prefix, suffix) \
2850-
inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
2851+
inline _Tpvec prefix##_setall_##suffix(_Tp val) { return _Tpvec::all(val); } \
2852+
template <> inline _Tpvec v_setall_(_Tp val) { return _Tpvec::all(val); }
28512853

28522854
//! @name Init with value
28532855
//! @{

modules/core/include/opencv2/core/hal/intrin_lasx.hpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,10 @@ inline __m256i _lasx_256_castpd_si256(const __m256d& v)
557557
{ return _Tpvec(__lasx_xvreplgr2vr_d(0)); } \
558558
inline _Tpvec v256_setall_##suffix(_Tp v) \
559559
{ return _Tpvec(__lasx_xvreplgr2vr_##ssuffix((ctype_s)v)); } \
560+
template <> inline _Tpvec v_setzero_() \
561+
{ return v256_setzero_##suffix(); } \
562+
template <> inline _Tpvec v_setall_(_Tp v) \
563+
{ return v256_setall_##suffix(v); } \
560564
OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint8x32, suffix, OPENCV_HAL_NOP) \
561565
OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_int8x32, suffix, OPENCV_HAL_NOP) \
562566
OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint16x16, suffix, OPENCV_HAL_NOP) \
@@ -588,7 +592,11 @@ inline __m256d _lasx_256_castsi256_pd(const __m256i &v)
588592
inline _Tpvec v256_setzero_##suffix() \
589593
{ return _Tpvec(__lasx_xvreplgr2vr_d(0)); } \
590594
inline _Tpvec v256_setall_##suffix(_Tp v) \
591-
{ return _Tpvec(_v256_setall_##zsuffix(v)); } \
595+
{ return _Tpvec(_v256_setall_##zsuffix(v)); } \
596+
template <> inline _Tpvec v_setzero_() \
597+
{ return v256_setzero_##suffix(); } \
598+
template <> inline _Tpvec v_setall_(_Tp v) \
599+
{ return v256_setall_##suffix(v); } \
592600
OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint8x32, suffix, cast) \
593601
OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_int8x32, suffix, cast) \
594602
OPENCV_HAL_IMPL_LASX_CAST(_Tpvec, v_uint16x16, suffix, cast) \
@@ -3005,6 +3013,14 @@ inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
30053013

30063014
inline void v256_cleanup() {}
30073015

3016+
#include "intrin_math.hpp"
3017+
inline v_float32x8 v_exp(v_float32x8 x) { return v_exp_default_32f<v_float32x8, v_int32x8>(x); }
3018+
inline v_float32x8 v_log(v_float32x8 x) { return v_log_default_32f<v_float32x8, v_int32x8>(x); }
3019+
inline v_float32x8 v_erf(v_float32x8 x) { return v_erf_default_32f<v_float32x8, v_int32x8>(x); }
3020+
3021+
inline v_float64x4 v_exp(v_float64x4 x) { return v_exp_default_64f<v_float64x4, v_int64x4>(x); }
3022+
inline v_float64x4 v_log(v_float64x4 x) { return v_log_default_64f<v_float64x4, v_int64x4>(x); }
3023+
30083024
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
30093025

30103026
//! @endcond

modules/core/include/opencv2/core/hal/intrin_lsx.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,10 @@ inline __m128i _lsx_128_castpd_si128(const __m128d& v)
417417
{ return _Tpvec(__lsx_vldi(0)); } \
418418
inline _Tpvec v_setall_##suffix(_Tp v) \
419419
{ return _Tpvec(__lsx_vreplgr2vr_##ssuffix((ctype_s)v)); } \
420+
template <> inline _Tpvec v_setzero_() \
421+
{ return v_setzero_##suffix(); } \
422+
template <> inline _Tpvec v_setall_(_Tp v) \
423+
{ return v_setall_##suffix(v); } \
420424
OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint8x16, suffix, OPENCV_HAL_NOP) \
421425
OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_int8x16, suffix, OPENCV_HAL_NOP) \
422426
OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint16x8, suffix, OPENCV_HAL_NOP) \
@@ -448,6 +452,10 @@ inline __m128d _lsx_128_castsi128_pd(const __m128i &v)
448452
{ return _Tpvec(__lsx_vldi(0)); } \
449453
inline _Tpvec v_setall_##suffix(_Tp v) \
450454
{ return _Tpvec(_v128_setall_##zsuffix(v)); } \
455+
template <> inline _Tpvec v_setzero_() \
456+
{ return v_setzero_##suffix(); } \
457+
template <> inline _Tpvec v_setall_(_Tp v) \
458+
{ return v_setall_##suffix(v); } \
451459
OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint8x16, suffix, cast) \
452460
OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_int8x16, suffix, cast) \
453461
OPENCV_HAL_IMPL_LSX_CAST(_Tpvec, v_uint16x8, suffix, cast) \
@@ -2515,6 +2523,14 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& a)
25152523

25162524
inline void v_cleanup() {}
25172525

2526+
#include "intrin_math.hpp"
2527+
inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f<v_float32x4, v_int32x4>(x); }
2528+
inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f<v_float32x4, v_int32x4>(x); }
2529+
inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f<v_float32x4, v_int32x4>(x); }
2530+
2531+
inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f<v_float64x2, v_int64x2>(x); }
2532+
inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f<v_float64x2, v_int64x2>(x); }
2533+
25182534
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
25192535

25202536
//! @endcond

0 commit comments

Comments
 (0)