features2d: fixed out of bounds access in SIFT

mshabunin · mshabunin · commit 6308739638ab · 2024-09-13T14:30:27.000+03:00
diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp
@@ -150,7 +150,7 @@ void findScaleSpaceExtrema(
 
 void calcSIFTDescriptor(
         const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, Mat& dst, int row
+        const int d, const int n, Mat& dst, int row
 );
 
 
@@ -708,7 +708,7 @@ void findScaleSpaceExtrema(
 
 void calcSIFTDescriptor(
         const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, Mat& dstMat, int row
+        const int d, const int n, Mat& dstMat, int row
 )
 {
     CV_TRACE_FUNCTION();
@@ -725,7 +725,10 @@ void calcSIFTDescriptor(
     cos_t /= hist_width;
     sin_t /= hist_width;
 
-    int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
+    int i, j, k;
+    const int len = (radius*2+1)*(radius*2+1);
+    const int len_hist = (d+2)*(d+2)*(n+2);
+    const int len_ddn = d * d * n;
     int rows = img.rows, cols = img.cols;
 
     cv::utils::BufferArea area;
@@ -736,8 +739,8 @@ void calcSIFTDescriptor(
     area.allocate(W, len, CV_SIMD_WIDTH);
     area.allocate(RBin, len, CV_SIMD_WIDTH);
     area.allocate(CBin, len, CV_SIMD_WIDTH);
-    area.allocate(hist, histlen, CV_SIMD_WIDTH);
-    area.allocate(rawDst, len, CV_SIMD_WIDTH);
+    area.allocate(hist, len_hist, CV_SIMD_WIDTH);
+    area.allocate(rawDst, len_ddn, CV_SIMD_WIDTH);
     area.commit();
     Mag = Y;
 
@@ -771,10 +774,10 @@ void calcSIFTDescriptor(
             }
         }
 
-    len = k;
-    cv::hal::fastAtan2(Y, X, Ori, len, true);
-    cv::hal::magnitude32f(X, Y, Mag, len);
-    cv::hal::exp32f(W, W, len);
+    const int len_left = k;
+    cv::hal::fastAtan2(Y, X, Ori, len_left, true);
+    cv::hal::magnitude32f(X, Y, Mag, len_left);
+    cv::hal::exp32f(W, W, len_left);
 
     k = 0;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
@@ -788,7 +791,7 @@ void calcSIFTDescriptor(
         const v_int32 __1 = vx_setall_s32(1);
         const v_int32 __d_plus_2 = vx_setall_s32(d+2);
         const v_int32 __n_plus_2 = vx_setall_s32(n+2);
-        for( ; k <= len - vecsize; k += vecsize )
+        for( ; k <= len_left - vecsize; k += vecsize )
         {
             v_float32 rbin = vx_load_aligned(RBin + k);
             v_float32 cbin = vx_load_aligned(CBin + k);
@@ -839,7 +842,7 @@ void calcSIFTDescriptor(
         }
     }
 #endif
-    for( ; k < len; k++ )
+    for( ; k < len_left; k++ )
     {
         float rbin = RBin[k], cbin = CBin[k];
         float obin = (Ori[k] - ori)*bins_per_rad;
@@ -892,24 +895,23 @@ void calcSIFTDescriptor(
     // and scale the result, so that it can be easily converted
     // to byte array
     float nrm2 = 0;
-    len = d*d*n;
     k = 0;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
     {
         v_float32 __nrm2 = vx_setzero_f32();
         v_float32 __rawDst;
-        for( ; k <= len - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
+        for( ; k <= len_ddn - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
         {
             __rawDst = vx_load_aligned(rawDst + k);
             __nrm2 = v_fma(__rawDst, __rawDst, __nrm2);
         }
         nrm2 = (float)v_reduce_sum(__nrm2);
     }
 #endif
-    for( ; k < len; k++ )
+    for( ; k < len_ddn; k++ )
         nrm2 += rawDst[k]*rawDst[k];
 
-    float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
+    const float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
 
     i = 0, nrm2 = 0;
 #if 0 //CV_AVX2
@@ -920,7 +922,7 @@ void calcSIFTDescriptor(
         __m256 __dst;
         __m256 __nrm2 = _mm256_setzero_ps();
         __m256 __thr = _mm256_set1_ps(thr);
-        for( ; i <= len - 8; i += 8 )
+        for( ; i <= len_ddn - 8; i += 8 )
         {
             __dst = _mm256_loadu_ps(&rawDst[i]);
             __dst = _mm256_min_ps(__dst, __thr);
@@ -936,7 +938,7 @@ void calcSIFTDescriptor(
                nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7];
     }
 #endif
-    for( ; i < len; i++ )
+    for( ; i < len_ddn; i++ )
     {
         float val = std::min(rawDst[i], thr);
         rawDst[i] = val;
@@ -954,7 +956,7 @@ if( dstMat.type() == CV_32F )
     v_float32 __min = vx_setzero_f32();
     v_float32 __max = vx_setall_f32(255.0f); // max of uchar
     v_float32 __nrm2 = vx_setall_f32(nrm2);
-    for( k = 0; k <= len - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
+    for( k = 0; k <= len_ddn - VTraits<v_float32>::vlanes(); k += VTraits<v_float32>::vlanes() )
     {
         __dst = vx_load_aligned(rawDst + k);
         __dst = v_min(v_max(v_cvt_f32(v_round(v_mul(__dst, __nrm2))), __min), __max);
@@ -965,7 +967,7 @@ if( dstMat.type() == CV_32F )
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Waggressive-loop-optimizations"  // iteration XX invokes undefined behavior
 #endif
-    for( ; k < len; k++ )
+    for( ; k < len_ddn; k++ )
     {
         dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
     }
@@ -980,7 +982,7 @@ else // CV_8U
     v_float32 __dst0, __dst1;
     v_uint16 __pack01;
     v_float32 __nrm2 = vx_setall_f32(nrm2);
-    for( k = 0; k <= len - VTraits<v_float32>::vlanes() * 2; k += VTraits<v_float32>::vlanes() * 2 )
+    for( k = 0; k <= len_ddn - VTraits<v_float32>::vlanes() * 2; k += VTraits<v_float32>::vlanes() * 2 )
     {
         __dst0 = vx_load_aligned(rawDst + k);
         __dst1 = vx_load_aligned(rawDst + k + VTraits<v_float32>::vlanes());
@@ -994,7 +996,7 @@ else // CV_8U
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Waggressive-loop-optimizations"  // iteration XX invokes undefined behavior
 #endif
-    for( ; k < len; k++ )
+    for( ; k < len_ddn; k++ )
     {
         dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
     }
@@ -1004,7 +1006,7 @@ else // CV_8U
 }
 #else
     float nrm1 = 0;
-    for( k = 0; k < len; k++ )
+    for( k = 0; k < len_ddn; k++ )
     {
         rawDst[k] *= nrm2;
         nrm1 += rawDst[k];
@@ -1013,15 +1015,15 @@ else // CV_8U
     if( dstMat.type() == CV_32F )
     {
         float *dst = dstMat.ptr<float>(row);
-        for( k = 0; k < len; k++ )
+        for( k = 0; k < len_ddn; k++ )
         {
             dst[k] = std::sqrt(rawDst[k] * nrm1);
         }
     }
     else // CV_8U
     {
         uint8_t *dst = dstMat.ptr<uint8_t>(row);
-        for( k = 0; k < len; k++ )
+        for( k = 0; k < len_ddn; k++ )
         {
             dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
         }
diff --git a/modules/features2d/test/test_sift.cpp b/modules/features2d/test/test_sift.cpp
@@ -30,5 +30,17 @@ TEST(Features2d_SIFT, descriptor_type)
     ASSERT_EQ(countNonZero(diff), 0) << "descriptors are not identical";
 }
 
+TEST(Features2d_SIFT, regression_26139)
+{
+    auto extractor = cv::SIFT::create();
+    cv::Mat1b image{cv::Size{300, 300}, 0};
+    std::vector<cv::KeyPoint> kps {
+        cv::KeyPoint(154.076813f, 136.160904f, 111.078636f, 216.195618f, 0.00000899323549f, 7)
+    };
+    cv::Mat descriptors;
+    extractor->compute(image, kps, descriptors); // we expect no memory corruption
+    ASSERT_EQ(descriptors.size(), Size(128, 1));
+}
+
 
 }} // namespace