Skip to content

Commit f0bb67d

Browse files
authored
Merge pull request #3572 from hanliutong:clean-up
Use new Universal Intrinsic API to fix compilation.
2 parents 82c2b70 + b9a460b commit f0bb67d

16 files changed

+549
-558
lines changed

modules/optflow/src/rlof/berlof_invoker.hpp

Lines changed: 112 additions & 112 deletions
Large diffs are not rendered by default.

modules/optflow/src/rlof/plk_invoker.hpp

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -229,25 +229,25 @@ class TrackerInvoker : public cv::ParallelLoopBody
229229
v_int16x8 v01 = v_reinterpret_as_s16(v_load_expand(Jptr + x + cn));
230230
v_int16x8 v10 = v_reinterpret_as_s16(v_load_expand(Jptr1 + x));
231231
v_int16x8 v11 = v_reinterpret_as_s16(v_load_expand(Jptr1 + x + cn));
232-
v_int16x8 vmask = v_reinterpret_as_s16(v_load_expand(maskPtr + x)) * vmax_val_16;
232+
v_int16x8 vmask = v_mul(v_reinterpret_as_s16(v_load_expand(maskPtr + x)), vmax_val_16);
233233

234234
v_int32x4 t0, t1;
235235
v_int16x8 t00, t01, t10, t11;
236236
v_zip(v00, v01, t00, t01);
237237
v_zip(v10, v11, t10, t11);
238238

239239
//subpixel interpolation
240-
t0 = v_dotprod(t00, vqw0, vdelta) + v_dotprod(t10, vqw1);
241-
t1 = v_dotprod(t01, vqw0, vdelta) + v_dotprod(t11, vqw1);
242-
t0 = t0 >> (W_BITS - 5);
243-
t1 = t1 >> (W_BITS - 5);
240+
t0 = v_add(v_dotprod(t00, vqw0, vdelta), v_dotprod(t10, vqw1));
241+
t1 = v_add(v_dotprod(t01, vqw0, vdelta), v_dotprod(t11, vqw1));
242+
t0 = v_shr(t0, W_BITS - 5);
243+
t1 = v_shr(t1, W_BITS - 5);
244244

245245
// diff = J - I
246-
diff0 = v_pack(t0, t1) - vI;
246+
diff0 = v_sub(v_pack(t0, t1), vI);
247247
// I*gain.x + gain.x
248248
v_mul_expand(vI, vgain_value, t0, t1);
249-
diff0 = diff0 + v_pack(t0 >> bitShift, t1 >> bitShift) + vconst_value;
250-
diff0 = diff0 & vmask;
249+
diff0 = v_add(v_add(diff0, v_pack(v_shr(t0, bitShift), v_shr(t1, bitShift))), vconst_value);
250+
diff0 = v_and(diff0, vmask);
251251
v_zip(diff0, diff0, diff2, diff1);
252252

253253
v_int32x4 diff0_0;
@@ -259,16 +259,16 @@ class TrackerInvoker : public cv::ParallelLoopBody
259259
v_zip(vIxy_0, vIxy_1, v10, v11);
260260
v_zip(diff2, diff1, v00, v01);
261261

262-
vqb0 += v_cvt_f32(v_dotprod(v00, v10));
263-
vqb1 += v_cvt_f32(v_dotprod(v01, v11));
262+
vqb0 = v_add(vqb0, v_cvt_f32(v_dotprod(v00, v10)));
263+
vqb1 = v_add(vqb1, v_cvt_f32(v_dotprod(v01, v11)));
264264

265265
v_int32x4 vI0, vI1;
266266
v_expand(vI, vI0, vI1);
267-
vqb2 += v_cvt_f32(diff0_0 * vI0);
268-
vqb2 += v_cvt_f32(diff0_1 * vI1);
267+
vqb2 = v_add(vqb2, v_cvt_f32(v_mul(diff0_0, vI0)));
268+
vqb2 = v_add(vqb2, v_cvt_f32(v_mul(diff0_1, vI1)));
269269

270-
vqb3 += v_cvt_f32(diff0_0);
271-
vqb3 += v_cvt_f32(diff0_1);
270+
vqb3 = v_add(vqb3, v_cvt_f32(diff0_0));
271+
vqb3 = v_add(vqb3, v_cvt_f32(diff0_1));
272272

273273
if (j == 0)
274274
{
@@ -285,17 +285,17 @@ class TrackerInvoker : public cv::ParallelLoopBody
285285
vAxx = v_muladd(fx, fx, vAxx);
286286

287287
// sumIx und sumIy
288-
vsumIx += fx;
289-
vsumIy += fy;
288+
vsumIx = v_add(vsumIx, fx);
289+
vsumIy = v_add(vsumIy, fy);
290290

291-
vsumW1 += vI_ps * fx;
292-
vsumW2 += vI_ps * fy;
291+
vsumW1 = v_add(vsumW1, v_mul(vI_ps, fx));
292+
vsumW2 = v_add(vsumW2, v_mul(vI_ps, fy));
293293

294294
// sumI
295-
vsumI += vI_ps;
295+
vsumI = v_add(vsumI, vI_ps);
296296

297297
// sumDI
298-
vsumDI += vI_ps * vI_ps;
298+
vsumDI = v_add(vsumDI, v_mul(vI_ps, vI_ps));
299299

300300
v01 = v_reinterpret_as_s16(v_interleave_pairs(v_reinterpret_as_s32(v_interleave_pairs(vIxy_1))));
301301
v_expand(v01, t1, t0);
@@ -309,17 +309,17 @@ class TrackerInvoker : public cv::ParallelLoopBody
309309
vAxx = v_muladd(fx, fx, vAxx);
310310

311311
// sumIx und sumIy
312-
vsumIx += fx;
313-
vsumIy += fy;
312+
vsumIx = v_add(vsumIx, fx);
313+
vsumIy = v_add(vsumIy, fy);
314314

315-
vsumW1 += vI_ps * fx;
316-
vsumW2 += vI_ps * fy;
315+
vsumW1 = v_add(vsumW1, v_mul(vI_ps, fx));
316+
vsumW2 = v_add(vsumW2, v_mul(vI_ps, fy));
317317

318318
// sumI
319-
vsumI += vI_ps;
319+
vsumI = v_add(vsumI, vI_ps);
320320

321321
// sumDI
322-
vsumDI += vI_ps * vI_ps;
322+
vsumDI = v_add(vsumDI, v_mul(vI_ps, vI_ps));
323323
}
324324
}
325325
#else
@@ -388,7 +388,7 @@ class TrackerInvoker : public cv::ParallelLoopBody
388388

389389
#if CV_SIMD128
390390
float CV_DECL_ALIGNED(16) bbuf[4];
391-
v_store_aligned(bbuf, vqb0 + vqb1);
391+
v_store_aligned(bbuf, v_add(vqb0, vqb1));
392392
b1 = bbuf[0] + bbuf[2];
393393
b2 = bbuf[1] + bbuf[3];
394394
b3 = v_reduce_sum(vqb2);
@@ -696,19 +696,19 @@ class TrackerInvoker : public cv::ParallelLoopBody
696696
v_int16x8 v01 = v_reinterpret_as_s16(v_load_expand(Jptr + x + cn));
697697
v_int16x8 v10 = v_reinterpret_as_s16(v_load_expand(Jptr1 + x));
698698
v_int16x8 v11 = v_reinterpret_as_s16(v_load_expand(Jptr1 + x + cn));
699-
v_int16x8 vmask = v_reinterpret_as_s16(v_load_expand(maskPtr + x)) * vmax_val_16;
699+
v_int16x8 vmask = v_mul(v_reinterpret_as_s16(v_load_expand(maskPtr + x)), vmax_val_16);
700700

701701
v_int32x4 t0, t1;
702702
v_int16x8 t00, t01, t10, t11;
703703
v_zip(v00, v01, t00, t01);
704704
v_zip(v10, v11, t10, t11);
705705

706-
t0 = v_dotprod(t00, vqw0, vdelta) + v_dotprod(t10, vqw1);
707-
t1 = v_dotprod(t01, vqw0, vdelta) + v_dotprod(t11, vqw1);
708-
t0 = t0 >> (W_BITS - 5);
709-
t1 = t1 >> (W_BITS - 5);
710-
diff0 = v_pack(t0, t1) - diff0;
711-
diff0 = diff0 & vmask;
706+
t0 = v_add(v_dotprod(t00, vqw0, vdelta), v_dotprod(t10, vqw1));
707+
t1 = v_add(v_dotprod(t01, vqw0, vdelta), v_dotprod(t11, vqw1));
708+
t0 = v_shr(t0, W_BITS - 5);
709+
t1 = v_shr(t1, W_BITS - 5);
710+
diff0 = v_sub(v_pack(t0, t1), diff0);
711+
diff0 = v_and(diff0, vmask);
712712

713713
v_zip(diff0, diff0, diff2, diff1); // It0 It0 It1 It1 ...
714714

@@ -717,8 +717,8 @@ class TrackerInvoker : public cv::ParallelLoopBody
717717
v_zip(vIxy_0, vIxy_1, v10, v11);
718718
v_zip(diff2, diff1, v00, v01);
719719

720-
vqb0 += v_cvt_f32(v_dotprod(v00, v10));
721-
vqb1 += v_cvt_f32(v_dotprod(v01, v11));
720+
vqb0 = v_add(vqb0, v_cvt_f32(v_dotprod(v00, v10)));
721+
vqb1 = v_add(vqb1, v_cvt_f32(v_dotprod(v01, v11)));
722722
}
723723
#else
724724
for( ; x < winSize.width*cn; x++, dIptr += 2 )
@@ -737,7 +737,7 @@ class TrackerInvoker : public cv::ParallelLoopBody
737737

738738
#if CV_SIMD128
739739
float CV_DECL_ALIGNED(16) bbuf[4];
740-
v_store_aligned(bbuf, vqb0 + vqb1);
740+
v_store_aligned(bbuf, v_add(vqb0, vqb1));
741741
b1 = bbuf[0] + bbuf[2];
742742
b2 = bbuf[1] + bbuf[3];
743743
#endif

0 commit comments

Comments
 (0)