@@ -262,26 +262,26 @@ void LearningBasedWBImpl::getAverageAndBrightestColorChromaticity(Vec2f &average
262
262
uint sumB = 0 , sumG = 0 , sumR = 0 ;
263
263
uchar *src_ptr = src.ptr <uchar>();
264
264
#if CV_SIMD128
265
- v_uint8x16 v_inB, v_inG, v_inR, v_mask;
266
- v_uint16x8 v_sR1, v_sR2, v_sG1, v_sG2, v_sB1, v_sB2, v_sum;
267
- v_uint16x8 v_max_sum = v_setall_u16 (0 ), v_max_mask, v_brightestR, v_brightestG, v_brightestB;
268
- v_uint32x4 v_uint1, v_uint2, v_SB = v_setzero_u32 (), v_SG = v_setzero_u32 (), v_SR = v_setzero_u32 ();
265
+ v_uint16x8 v_max_sum = v_setall_u16 (0 ), v_brightestR = v_setall_u16 (0 ), v_brightestG = v_setall_u16 (0 ), v_brightestB = v_setall_u16 (0 );
266
+ v_uint32x4 v_SB = v_setzero_u32 (), v_SG = v_setzero_u32 (), v_SR = v_setzero_u32 ();
269
267
for (; i < src_len - 15 ; i += 16 )
270
268
{
269
+ v_uint8x16 v_inB, v_inG, v_inR;
271
270
v_load_deinterleave (src_ptr + 3 * i, v_inB, v_inG, v_inR);
272
- v_mask = v_load (mask_ptr + i);
271
+ v_uint8x16 v_mask = v_load (mask_ptr + i);
273
272
274
273
v_inB &= v_mask;
275
274
v_inG &= v_mask;
276
275
v_inR &= v_mask;
277
276
277
+ v_uint16x8 v_sR1, v_sR2, v_sG1, v_sG2, v_sB1, v_sB2;
278
278
v_expand (v_inB, v_sB1, v_sB2);
279
279
v_expand (v_inG, v_sG1, v_sG2);
280
280
v_expand (v_inR, v_sR1, v_sR2);
281
281
282
282
// update the brightest (R,G,B) tuple (process left half):
283
- v_sum = v_sB1 + v_sG1 + v_sR1;
284
- v_max_mask = (v_sum > v_max_sum);
283
+ v_uint16x8 v_sum = v_sB1 + v_sG1 + v_sR1;
284
+ v_uint16x8 v_max_mask = (v_sum > v_max_sum);
285
285
v_max_sum = v_max (v_sum, v_max_sum);
286
286
v_brightestB = (v_sB1 & v_max_mask) + (v_brightestB & (~v_max_mask));
287
287
v_brightestG = (v_sG1 & v_max_mask) + (v_brightestG & (~v_max_mask));
@@ -299,6 +299,8 @@ void LearningBasedWBImpl::getAverageAndBrightestColorChromaticity(Vec2f &average
299
299
v_sB1 = v_sB1 + v_sB2;
300
300
v_sG1 = v_sG1 + v_sG2;
301
301
v_sR1 = v_sR1 + v_sR2;
302
+
303
+ v_uint32x4 v_uint1, v_uint2;
302
304
v_expand (v_sB1, v_uint1, v_uint2);
303
305
v_SB += v_uint1 + v_uint2;
304
306
v_expand (v_sG1, v_uint1, v_uint2);
@@ -351,27 +353,28 @@ void LearningBasedWBImpl::getAverageAndBrightestColorChromaticity(Vec2f &average
351
353
uint64 sumB = 0 , sumG = 0 , sumR = 0 ;
352
354
ushort *src_ptr = src.ptr <ushort>();
353
355
#if CV_SIMD128
354
- v_uint16x8 v_inB, v_inG, v_inR, v_mask, v_mask_lower = v_setall_u16 (255 );
355
- v_uint32x4 v_iR1, v_iR2, v_iG1, v_iG2, v_iB1, v_iB2, v_sum;
356
- v_uint32x4 v_max_sum = v_setall_u32 (0 ), v_max_mask, v_brightestR, v_brightestG, v_brightestB;
357
- v_uint64x2 v_uint64_1, v_uint64_2, v_SB = v_setzero_u64 (), v_SG = v_setzero_u64 (), v_SR = v_setzero_u64 ();
356
+ const v_uint16x8 v_mask_lower = v_setall_u16 (255 );
357
+ v_uint32x4 v_max_sum = v_setall_u32 (0 ), v_brightestR = v_setall_u32 (0 ), v_brightestG = v_setall_u32 (0 ), v_brightestB = v_setall_u32 (0 );
358
+ v_uint64x2 v_SB = v_setzero_u64 (), v_SG = v_setzero_u64 (), v_SR = v_setzero_u64 ();
358
359
for (; i < src_len - 7 ; i += 8 )
359
360
{
361
+ v_uint16x8 v_inB, v_inG, v_inR;
360
362
v_load_deinterleave (src_ptr + 3 * i, v_inB, v_inG, v_inR);
361
- v_mask = v_load_expand (mask_ptr + i);
363
+ v_uint16x8 v_mask = v_load_expand (mask_ptr + i);
362
364
v_mask = v_mask | ((v_mask & v_mask_lower) << 8 );
363
365
364
366
v_inB &= v_mask;
365
367
v_inG &= v_mask;
366
368
v_inR &= v_mask;
367
369
370
+ v_uint32x4 v_iR1, v_iR2, v_iG1, v_iG2, v_iB1, v_iB2;
368
371
v_expand (v_inB, v_iB1, v_iB2);
369
372
v_expand (v_inG, v_iG1, v_iG2);
370
373
v_expand (v_inR, v_iR1, v_iR2);
371
374
372
375
// update the brightest (R,G,B) tuple (process left half):
373
- v_sum = v_iB1 + v_iG1 + v_iR1;
374
- v_max_mask = (v_sum > v_max_sum);
376
+ v_uint32x4 v_sum = v_iB1 + v_iG1 + v_iR1;
377
+ v_uint32x4 v_max_mask = (v_sum > v_max_sum);
375
378
v_max_sum = v_max (v_sum, v_max_sum);
376
379
v_brightestB = (v_iB1 & v_max_mask) + (v_brightestB & (~v_max_mask));
377
380
v_brightestG = (v_iG1 & v_max_mask) + (v_brightestG & (~v_max_mask));
@@ -389,6 +392,7 @@ void LearningBasedWBImpl::getAverageAndBrightestColorChromaticity(Vec2f &average
389
392
v_iB1 = v_iB1 + v_iB2;
390
393
v_iG1 = v_iG1 + v_iG2;
391
394
v_iR1 = v_iR1 + v_iR2;
395
+ v_uint64x2 v_uint64_1, v_uint64_2;
392
396
v_expand (v_iB1, v_uint64_1, v_uint64_2);
393
397
v_SB += v_uint64_1 + v_uint64_2;
394
398
v_expand (v_iG1, v_uint64_1, v_uint64_2);
0 commit comments