Skip to content

Commit b2e118e

Browse files
authored
Merge pull request opencv#26166 from mshabunin:fix-intrin-ops
build: fix AVX2/AVX512 builds failed due to intrinsics operator usage
2 parents f503890 + 6ef357f commit b2e118e

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

modules/gapi/src/backends/fluid/gfluidimgproc_simd_avx2.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,15 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[],
8585
v_deinterleave(low1, high1, s00, s01);
8686

8787
// v_float32 res0 = s00*alpha0 + s01*alpha1;
88-
v_float32x8 res0 = v_fma(s00 - s01, alpha0, s01);
88+
v_float32x8 res0 = v_fma(v_sub(s00, s01), alpha0, s01);
8989

9090
v_gather_pairs(src1[line], &mapsx[x], low2, high2);
9191
v_deinterleave(low2, high2, s10, s11);
9292

9393
// v_float32 res1 = s10*alpha0 + s11*alpha1;
94-
v_float32x8 res1 = v_fma(s10 - s11, alpha0, s11);
94+
v_float32x8 res1 = v_fma(v_sub(s10, s11), alpha0, s11);
9595
// v_float32 d = res0*beta0 + res1*beta1;
96-
v_float32x8 d = v_fma(res0 - res1, v_beta0, res1);
96+
v_float32x8 d = v_fma(v_sub(res0, res1), v_beta0, res1);
9797

9898
v_store(&dst[line][x], d);
9999
}
@@ -126,7 +126,7 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[],
126126
v_deinterleave(low, high, s00, s01);
127127

128128
// v_float32 d = s00*alpha0 + s01*alpha1;
129-
v_float32x8 d = v_fma(s00 - s01, alpha0, s01);
129+
v_float32x8 d = v_fma(v_sub(s00, s01), alpha0, s01);
130130

131131
v_store(&dst[line][x], d);
132132
}
@@ -157,7 +157,7 @@ CV_ALWAYS_INLINE void calcRowLinear32FC1Impl(float *dst[],
157157
v_float32x8 s1 = v256_load(&src1[line][x]);
158158

159159
// v_float32 d = s0*beta0 + s1*beta1;
160-
v_float32x8 d = v_fma(s0 - s1, v_beta0, s1);
160+
v_float32x8 d = v_fma(v_sub(s0, s1), v_beta0, s1);
161161

162162
v_store(&dst[line][x], d);
163163
}

modules/imgproc/src/resize.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,7 +2536,7 @@ class ResizeAreaFastVec_SIMD_8u
25362536
#elif CV_SIMD_WIDTH == 64
25372537
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
25382538
v_zip(s0, s3, t0, t1); v_zip(s1, s4, t2, t3); v_zip(s2, s5, t4, t5);
2539-
bl = t0 + t3; gl = t1 + t4; rl = t2 + t5;
2539+
bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5);
25402540
#endif
25412541
s0 = v_add(vx_load_expand(S0 + 6 * VTraits<v_uint16>::vlanes()), vx_load_expand(S1 + 6 * VTraits<v_uint16>::vlanes()));
25422542
s1 = v_add(vx_load_expand(S0 + 7 * VTraits<v_uint16>::vlanes()), vx_load_expand(S1 + 7 * VTraits<v_uint16>::vlanes()));
@@ -2556,7 +2556,7 @@ class ResizeAreaFastVec_SIMD_8u
25562556
#elif CV_SIMD_WIDTH == 64
25572557
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
25582558
v_zip(s0, s3, t0, t1); v_zip(s1, s4, t2, t3); v_zip(s2, s5, t4, t5);
2559-
bh = t0 + t3; gh = t1 + t4; rh = t2 + t5;
2559+
bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5);
25602560
#endif
25612561
v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh));
25622562
}
@@ -2643,7 +2643,7 @@ class ResizeAreaFastVec_SIMD_16u
26432643
bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5);
26442644
#else //CV_SIMD_WIDTH == 64
26452645
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
2646-
bl = s0 + s3; gl = s1 + s4; rl = s2 + s5;
2646+
bl = v_add(s0, s3); gl = v_add(s1, s4); rl = v_add(s2, s5);
26472647
#endif
26482648
s0 = v_add(vx_load_expand(S0 + 6 * VTraits<v_uint32>::vlanes()), vx_load_expand(S1 + 6 * VTraits<v_uint32>::vlanes()));
26492649
s1 = v_add(vx_load_expand(S0 + 7 * VTraits<v_uint32>::vlanes()), vx_load_expand(S1 + 7 * VTraits<v_uint32>::vlanes()));
@@ -2659,7 +2659,7 @@ class ResizeAreaFastVec_SIMD_16u
26592659
bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5);
26602660
#else //CV_SIMD_WIDTH == 64
26612661
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
2662-
bh = s0 + s3; gh = s1 + s4; rh = s2 + s5;
2662+
bh = v_add(s0, s3); gh = v_add(s1, s4); rh = v_add(s2, s5);
26632663
#endif
26642664
v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh));
26652665
}
@@ -2697,7 +2697,7 @@ class ResizeAreaFastVec_SIMD_16u
26972697
v_expand(v_reinterpret_as_u16(r01), r01l, r01h);
26982698
v_expand(v_reinterpret_as_u16(r10), r10l, r10h);
26992699
v_expand(v_reinterpret_as_u16(r11), r11l, r11h);
2700-
v_store(D, v_rshr_pack<2>(r00l + r01l + r10l + r11l, r00h + r01h + r10h + r11h));
2700+
v_store(D, v_rshr_pack<2>(v_add(r00l, r01l, r10l, r11l), v_add(r00h, r01h, r10h, r11h)));
27012701
}
27022702
#else
27032703
for ( ; dx <= w - VTraits<v_uint32>::vlanes(); dx += VTraits<v_uint32>::vlanes(), S0 += VTraits<v_uint16>::vlanes(), S1 += VTraits<v_uint16>::vlanes(), D += VTraits<v_uint32>::vlanes())
@@ -2773,7 +2773,7 @@ class ResizeAreaFastVec_SIMD_16s
27732773
bl = v_add(t0, t3); gl = v_add(t1, t4); rl = v_add(t2, t5);
27742774
#else //CV_SIMD_WIDTH == 64
27752775
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
2776-
bl = s0 + s3; gl = s1 + s4; rl = s2 + s5;
2776+
bl = v_add(s0, s3); gl = v_add(s1, s4); rl = v_add(s2, s5);
27772777
#endif
27782778
s0 = v_add(vx_load_expand(S0 + 6 * VTraits<v_int32>::vlanes()), vx_load_expand(S1 + 6 * VTraits<v_int32>::vlanes()));
27792779
s1 = v_add(vx_load_expand(S0 + 7 * VTraits<v_int32>::vlanes()), vx_load_expand(S1 + 7 * VTraits<v_int32>::vlanes()));
@@ -2789,7 +2789,7 @@ class ResizeAreaFastVec_SIMD_16s
27892789
bh = v_add(t0, t3); gh = v_add(t1, t4); rh = v_add(t2, t5);
27902790
#else //CV_SIMD_WIDTH == 64
27912791
v_zip(t0, t3, s0, s1); v_zip(t1, t4, s2, s3); v_zip(t2, t5, s4, s5);
2792-
bh = s0 + s3; gh = s1 + s4; rh = s2 + s5;
2792+
bh = v_add(s0, s3); gh = v_add(s1, s4); rh = v_add(s2, s5);
27932793
#endif
27942794
v_store_interleave(D, v_rshr_pack<2>(bl, bh), v_rshr_pack<2>(gl, gh), v_rshr_pack<2>(rl, rh));
27952795
}
@@ -2826,7 +2826,7 @@ class ResizeAreaFastVec_SIMD_16s
28262826
v_expand(v_reinterpret_as_s16(r01), r01l, r01h);
28272827
v_expand(v_reinterpret_as_s16(r10), r10l, r10h);
28282828
v_expand(v_reinterpret_as_s16(r11), r11l, r11h);
2829-
v_store(D, v_rshr_pack<2>(r00l + r01l + r10l + r11l, r00h + r01h + r10h + r11h));
2829+
v_store(D, v_rshr_pack<2>(v_add(r00l, r01l, r10l, r11l), v_add(r00h, r01h, r10h, r11h)));
28302830
#else
28312831
v_int32 r0, r1, r2, r3;
28322832
r0 = v_add(vx_load_expand(S0), vx_load_expand(S1));

0 commit comments

Comments
 (0)