@@ -150,7 +150,7 @@ void findScaleSpaceExtrema(
150
150
151
151
void calcSIFTDescriptor (
152
152
const Mat& img, Point2f ptf, float ori, float scl,
153
- int d, int n, Mat& dst, int row
153
+ const int d, const int n, Mat& dst, int row
154
154
);
155
155
156
156
@@ -708,7 +708,7 @@ void findScaleSpaceExtrema(
708
708
709
709
void calcSIFTDescriptor (
710
710
const Mat& img, Point2f ptf, float ori, float scl,
711
- int d, int n, Mat& dstMat, int row
711
+ const int d, const int n, Mat& dstMat, int row
712
712
)
713
713
{
714
714
CV_TRACE_FUNCTION ();
@@ -725,7 +725,10 @@ void calcSIFTDescriptor(
725
725
cos_t /= hist_width;
726
726
sin_t /= hist_width;
727
727
728
- int i, j, k, len = (radius*2 +1 )*(radius*2 +1 ), histlen = (d+2 )*(d+2 )*(n+2 );
728
+ int i, j, k;
729
+ const int len = (radius*2 +1 )*(radius*2 +1 );
730
+ const int len_hist = (d+2 )*(d+2 )*(n+2 );
731
+ const int len_ddn = d * d * n;
729
732
int rows = img.rows , cols = img.cols ;
730
733
731
734
cv::utils::BufferArea area;
@@ -736,8 +739,8 @@ void calcSIFTDescriptor(
736
739
area.allocate (W, len, CV_SIMD_WIDTH);
737
740
area.allocate (RBin, len, CV_SIMD_WIDTH);
738
741
area.allocate (CBin, len, CV_SIMD_WIDTH);
739
- area.allocate (hist, histlen , CV_SIMD_WIDTH);
740
- area.allocate (rawDst, len , CV_SIMD_WIDTH);
742
+ area.allocate (hist, len_hist , CV_SIMD_WIDTH);
743
+ area.allocate (rawDst, len_ddn , CV_SIMD_WIDTH);
741
744
area.commit ();
742
745
Mag = Y;
743
746
@@ -771,10 +774,10 @@ void calcSIFTDescriptor(
771
774
}
772
775
}
773
776
774
- len = k;
775
- cv::hal::fastAtan2 (Y, X, Ori, len , true );
776
- cv::hal::magnitude32f (X, Y, Mag, len );
777
- cv::hal::exp32f (W, W, len );
777
+ const int len_left = k;
778
+ cv::hal::fastAtan2 (Y, X, Ori, len_left , true );
779
+ cv::hal::magnitude32f (X, Y, Mag, len_left );
780
+ cv::hal::exp32f (W, W, len_left );
778
781
779
782
k = 0 ;
780
783
#if (CV_SIMD || CV_SIMD_SCALABLE)
@@ -788,7 +791,7 @@ void calcSIFTDescriptor(
788
791
const v_int32 __1 = vx_setall_s32 (1 );
789
792
const v_int32 __d_plus_2 = vx_setall_s32 (d+2 );
790
793
const v_int32 __n_plus_2 = vx_setall_s32 (n+2 );
791
- for ( ; k <= len - vecsize; k += vecsize )
794
+ for ( ; k <= len_left - vecsize; k += vecsize )
792
795
{
793
796
v_float32 rbin = vx_load_aligned (RBin + k);
794
797
v_float32 cbin = vx_load_aligned (CBin + k);
@@ -839,7 +842,7 @@ void calcSIFTDescriptor(
839
842
}
840
843
}
841
844
#endif
842
- for ( ; k < len ; k++ )
845
+ for ( ; k < len_left ; k++ )
843
846
{
844
847
float rbin = RBin[k], cbin = CBin[k];
845
848
float obin = (Ori[k] - ori)*bins_per_rad;
@@ -892,24 +895,23 @@ void calcSIFTDescriptor(
892
895
// and scale the result, so that it can be easily converted
893
896
// to byte array
894
897
float nrm2 = 0 ;
895
- len = d*d*n;
896
898
k = 0 ;
897
899
#if (CV_SIMD || CV_SIMD_SCALABLE)
898
900
{
899
901
v_float32 __nrm2 = vx_setzero_f32 ();
900
902
v_float32 __rawDst;
901
- for ( ; k <= len - VTraits<v_float32>::vlanes (); k += VTraits<v_float32>::vlanes () )
903
+ for ( ; k <= len_ddn - VTraits<v_float32>::vlanes (); k += VTraits<v_float32>::vlanes () )
902
904
{
903
905
__rawDst = vx_load_aligned (rawDst + k);
904
906
__nrm2 = v_fma (__rawDst, __rawDst, __nrm2);
905
907
}
906
908
nrm2 = (float )v_reduce_sum (__nrm2);
907
909
}
908
910
#endif
909
- for ( ; k < len ; k++ )
911
+ for ( ; k < len_ddn ; k++ )
910
912
nrm2 += rawDst[k]*rawDst[k];
911
913
912
- float thr = std::sqrt (nrm2)*SIFT_DESCR_MAG_THR;
914
+ const float thr = std::sqrt (nrm2)*SIFT_DESCR_MAG_THR;
913
915
914
916
i = 0 , nrm2 = 0 ;
915
917
#if 0 //CV_AVX2
@@ -920,7 +922,7 @@ void calcSIFTDescriptor(
920
922
__m256 __dst;
921
923
__m256 __nrm2 = _mm256_setzero_ps();
922
924
__m256 __thr = _mm256_set1_ps(thr);
923
- for( ; i <= len - 8; i += 8 )
925
+ for( ; i <= len_ddn - 8; i += 8 )
924
926
{
925
927
__dst = _mm256_loadu_ps(&rawDst[i]);
926
928
__dst = _mm256_min_ps(__dst, __thr);
@@ -936,7 +938,7 @@ void calcSIFTDescriptor(
936
938
nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7];
937
939
}
938
940
#endif
939
- for ( ; i < len ; i++ )
941
+ for ( ; i < len_ddn ; i++ )
940
942
{
941
943
float val = std::min (rawDst[i], thr);
942
944
rawDst[i] = val;
@@ -954,7 +956,7 @@ if( dstMat.type() == CV_32F )
954
956
v_float32 __min = vx_setzero_f32 ();
955
957
v_float32 __max = vx_setall_f32 (255 .0f ); // max of uchar
956
958
v_float32 __nrm2 = vx_setall_f32 (nrm2);
957
- for ( k = 0 ; k <= len - VTraits<v_float32>::vlanes (); k += VTraits<v_float32>::vlanes () )
959
+ for ( k = 0 ; k <= len_ddn - VTraits<v_float32>::vlanes (); k += VTraits<v_float32>::vlanes () )
958
960
{
959
961
__dst = vx_load_aligned (rawDst + k);
960
962
__dst = v_min (v_max (v_cvt_f32 (v_round (v_mul (__dst, __nrm2))), __min), __max);
@@ -965,7 +967,7 @@ if( dstMat.type() == CV_32F )
965
967
#pragma GCC diagnostic push
966
968
#pragma GCC diagnostic ignored "-Waggressive-loop-optimizations" // iteration XX invokes undefined behavior
967
969
#endif
968
- for ( ; k < len ; k++ )
970
+ for ( ; k < len_ddn ; k++ )
969
971
{
970
972
dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
971
973
}
@@ -980,7 +982,7 @@ else // CV_8U
980
982
v_float32 __dst0, __dst1;
981
983
v_uint16 __pack01;
982
984
v_float32 __nrm2 = vx_setall_f32 (nrm2);
983
- for ( k = 0 ; k <= len - VTraits<v_float32>::vlanes () * 2 ; k += VTraits<v_float32>::vlanes () * 2 )
985
+ for ( k = 0 ; k <= len_ddn - VTraits<v_float32>::vlanes () * 2 ; k += VTraits<v_float32>::vlanes () * 2 )
984
986
{
985
987
__dst0 = vx_load_aligned (rawDst + k);
986
988
__dst1 = vx_load_aligned (rawDst + k + VTraits<v_float32>::vlanes ());
@@ -994,7 +996,7 @@ else // CV_8U
994
996
#pragma GCC diagnostic push
995
997
#pragma GCC diagnostic ignored "-Waggressive-loop-optimizations" // iteration XX invokes undefined behavior
996
998
#endif
997
- for ( ; k < len ; k++ )
999
+ for ( ; k < len_ddn ; k++ )
998
1000
{
999
1001
dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
1000
1002
}
@@ -1004,7 +1006,7 @@ else // CV_8U
1004
1006
}
1005
1007
#else
1006
1008
float nrm1 = 0;
1007
- for( k = 0; k < len ; k++ )
1009
+ for( k = 0; k < len_ddn ; k++ )
1008
1010
{
1009
1011
rawDst[k] *= nrm2;
1010
1012
nrm1 += rawDst[k];
@@ -1013,15 +1015,15 @@ else // CV_8U
1013
1015
if( dstMat.type() == CV_32F )
1014
1016
{
1015
1017
float *dst = dstMat.ptr<float>(row);
1016
- for( k = 0; k < len ; k++ )
1018
+ for( k = 0; k < len_ddn ; k++ )
1017
1019
{
1018
1020
dst[k] = std::sqrt(rawDst[k] * nrm1);
1019
1021
}
1020
1022
}
1021
1023
else // CV_8U
1022
1024
{
1023
1025
uint8_t *dst = dstMat.ptr<uint8_t>(row);
1024
- for( k = 0; k < len ; k++ )
1026
+ for( k = 0; k < len_ddn ; k++ )
1025
1027
{
1026
1028
dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
1027
1029
}
0 commit comments