Merge pull request opencv#18001 from Yosshi999:sift-8bit-descr

Yosshi999 · web-flow · commit 1834eed8098a · 2020-08-17T10:28:44.000Z
* 8-bit SIFT descriptors

* use clearer parameter

* update docs

* propagate type info

* overload function for avoiding ABI-break

* bugfix: some values are undefined when CV_SIMD is absent
diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp
@@ -301,6 +301,33 @@ class CV_EXPORTS_W SIFT : public Feature2D
         double contrastThreshold = 0.04, double edgeThreshold = 10,
         double sigma = 1.6);
 
+    /** @brief Create SIFT with specified descriptorType.
+    @param nfeatures The number of best features to retain. The features are ranked by their scores
+    (measured in SIFT algorithm as the local contrast)
+
+    @param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The
+    number of octaves is computed automatically from the image resolution.
+
+    @param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform
+    (low-contrast) regions. The larger the threshold, the less features are produced by the detector.
+
+    @note The contrast threshold will be divided by nOctaveLayers when the filtering is applied. When
+    nOctaveLayers is set to default and if you want to use the value used in D. Lowe paper, 0.03, set
+    this argument to 0.09.
+
+    @param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning
+    is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are
+    filtered out (more features are retained).
+
+    @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image
+    is captured with a weak camera with soft lenses, you might want to reduce the number.
+
+    @param descriptorType The type of descriptors. Only CV_32F and CV_8U are supported.
+    */
+    CV_WRAP static Ptr<SIFT> create(int nfeatures, int nOctaveLayers,
+        double contrastThreshold, double edgeThreshold,
+        double sigma, int descriptorType);
+
     CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
 };
 
diff --git a/modules/features2d/src/sift.dispatch.cpp b/modules/features2d/src/sift.dispatch.cpp
@@ -88,7 +88,7 @@ class SIFT_Impl : public SIFT
 public:
     explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3,
                           double contrastThreshold = 0.04, double edgeThreshold = 10,
-                          double sigma = 1.6);
+                          double sigma = 1.6, int descriptorType = CV_32F );
 
     //! returns the descriptor size in floats (128)
     int descriptorSize() const CV_OVERRIDE;
@@ -117,13 +117,25 @@ class SIFT_Impl : public SIFT
     CV_PROP_RW double contrastThreshold;
     CV_PROP_RW double edgeThreshold;
     CV_PROP_RW double sigma;
+    CV_PROP_RW int descriptor_type;
 };
 
 Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
                      double _contrastThreshold, double _edgeThreshold, double _sigma )
 {
     CV_TRACE_FUNCTION();
-    return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma);
+
+    return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, CV_32F);
+}
+
+Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
+                     double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
+{
+    CV_TRACE_FUNCTION();
+
+    // SIFT descriptor supports 32bit floating point and 8bit unsigned int.
+    CV_Assert(_descriptorType == CV_32F || _descriptorType == CV_8U);
+    return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, _descriptorType);
 }
 
 String SIFT::getDefaultName() const
@@ -362,12 +374,12 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const
 static
 void calcSIFTDescriptor(
         const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, float* dst
+        int d, int n, Mat& dst, int row
 )
 {
     CV_TRACE_FUNCTION();
 
-    CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst),
+    CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst, row),
         CV_CPU_DISPATCH_MODES_ALL);
 }
 
@@ -408,7 +420,7 @@ class calcDescriptorsComputer : public ParallelLoopBody
             float angle = 360.f - kpt.angle;
             if(std::abs(angle - 360.f) < FLT_EPSILON)
                 angle = 0.f;
-            calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i));
+            calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors, i);
         }
     }
 private:
@@ -429,9 +441,9 @@ static void calcDescriptors(const std::vector<Mat>& gpyr, const std::vector<KeyP
 //////////////////////////////////////////////////////////////////////////////////////////
 
 SIFT_Impl::SIFT_Impl( int _nfeatures, int _nOctaveLayers,
-           double _contrastThreshold, double _edgeThreshold, double _sigma )
+           double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
     : nfeatures(_nfeatures), nOctaveLayers(_nOctaveLayers),
-    contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma)
+    contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma), descriptor_type(_descriptorType)
 {
 }
 
@@ -442,7 +454,7 @@ int SIFT_Impl::descriptorSize() const
 
 int SIFT_Impl::descriptorType() const
 {
-    return CV_32F;
+    return descriptor_type;
 }
 
 int SIFT_Impl::defaultNorm() const
@@ -533,9 +545,9 @@ void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask,
     {
         //t = (double)getTickCount();
         int dsize = descriptorSize();
-        _descriptors.create((int)keypoints.size(), dsize, CV_32F);
-        Mat descriptors = _descriptors.getMat();
+        _descriptors.create((int)keypoints.size(), dsize, descriptor_type);
 
+        Mat descriptors = _descriptors.getMat();
         calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
         //t = (double)getTickCount() - t;
         //printf("descriptor extraction time: %g\n", t*1000./tf);
diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp
@@ -150,7 +150,7 @@ void findScaleSpaceExtrema(
 
 void calcSIFTDescriptor(
         const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, float* dst
+        int d, int n, Mat& dst, int row
 );
 
 
@@ -555,7 +555,7 @@ void findScaleSpaceExtrema(
 
 void calcSIFTDescriptor(
         const Mat& img, Point2f ptf, float ori, float scl,
-        int d, int n, float* dst
+        int d, int n, Mat& dstMat, int row
 )
 {
     CV_TRACE_FUNCTION();
@@ -575,9 +575,18 @@ void calcSIFTDescriptor(
     int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
     int rows = img.rows, cols = img.cols;
 
-    AutoBuffer<float> buf(len*6 + histlen);
-    float *X = buf.data(), *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len;
-    float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len;
+    cv::utils::BufferArea area;
+    float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *RBin = 0, *CBin = 0, *hist = 0, *rawDst = 0;
+    area.allocate(X, len, CV_SIMD_WIDTH);
+    area.allocate(Y, len, CV_SIMD_WIDTH);
+    area.allocate(Ori, len, CV_SIMD_WIDTH);
+    area.allocate(W, len, CV_SIMD_WIDTH);
+    area.allocate(RBin, len, CV_SIMD_WIDTH);
+    area.allocate(CBin, len, CV_SIMD_WIDTH);
+    area.allocate(hist, histlen, CV_SIMD_WIDTH);
+    area.allocate(rawDst, len, CV_SIMD_WIDTH);
+    area.commit();
+    Mag = Y;
 
     for( i = 0; i < d+2; i++ )
     {
@@ -628,10 +637,10 @@ void calcSIFTDescriptor(
         const v_int32 __n_plus_2 = vx_setall_s32(n+2);
         for( ; k <= len - vecsize; k += vecsize )
         {
-            v_float32 rbin = vx_load(RBin + k);
-            v_float32 cbin = vx_load(CBin + k);
-            v_float32 obin = (vx_load(Ori + k) - __ori) * __bins_per_rad;
-            v_float32 mag = vx_load(Mag + k) * vx_load(W + k);
+            v_float32 rbin = vx_load_aligned(RBin + k);
+            v_float32 cbin = vx_load_aligned(CBin + k);
+            v_float32 obin = (vx_load_aligned(Ori + k) - __ori) * __bins_per_rad;
+            v_float32 mag = vx_load_aligned(Mag + k) * vx_load_aligned(W + k);
 
             v_int32 r0 = v_floor(rbin);
             v_int32 c0 = v_floor(cbin);
@@ -723,7 +732,7 @@ void calcSIFTDescriptor(
             hist[idx] += hist[idx+n];
             hist[idx+1] += hist[idx+n+1];
             for( k = 0; k < n; k++ )
-                dst[(i*d + j)*n + k] = hist[idx+k];
+                rawDst[(i*d + j)*n + k] = hist[idx+k];
         }
     // copy histogram to the descriptor,
     // apply hysteresis thresholding
@@ -735,17 +744,17 @@ void calcSIFTDescriptor(
 #if CV_SIMD
     {
         v_float32 __nrm2 = vx_setzero_f32();
-        v_float32 __dst;
+        v_float32 __rawDst;
         for( ; k <= len - v_float32::nlanes; k += v_float32::nlanes )
         {
-            __dst = vx_load(dst + k);
-            __nrm2 = v_fma(__dst, __dst, __nrm2);
+            __rawDst = vx_load_aligned(rawDst + k);
+            __nrm2 = v_fma(__rawDst, __rawDst, __nrm2);
         }
         nrm2 = (float)v_reduce_sum(__nrm2);
     }
 #endif
     for( ; k < len; k++ )
-        nrm2 += dst[k]*dst[k];
+        nrm2 += rawDst[k]*rawDst[k];
 
     float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
 
@@ -760,9 +769,9 @@ void calcSIFTDescriptor(
         __m256 __thr = _mm256_set1_ps(thr);
         for( ; i <= len - 8; i += 8 )
         {
-            __dst = _mm256_loadu_ps(&dst[i]);
+            __dst = _mm256_loadu_ps(&rawDst[i]);
             __dst = _mm256_min_ps(__dst, __thr);
-            _mm256_storeu_ps(&dst[i], __dst);
+            _mm256_storeu_ps(&rawDst[i], __dst);
 #if CV_FMA3
             __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2);
 #else
@@ -776,44 +785,78 @@ void calcSIFTDescriptor(
 #endif
     for( ; i < len; i++ )
     {
-        float val = std::min(dst[i], thr);
-        dst[i] = val;
+        float val = std::min(rawDst[i], thr);
+        rawDst[i] = val;
         nrm2 += val*val;
     }
     nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON);
 
 #if 1
     k = 0;
+if( dstMat.type() == CV_32F )
+{
+    float* dst = dstMat.ptr<float>(row);
 #if CV_SIMD
+    v_float32 __dst;
+    v_float32 __min = vx_setzero_f32();
+    v_float32 __max = vx_setall_f32(255.0f); // max of uchar
+    v_float32 __nrm2 = vx_setall_f32(nrm2);
+    for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
     {
-        v_float32 __dst;
-        v_float32 __min = vx_setzero_f32();
-        v_float32 __max = vx_setall_f32(255.0f); // max of uchar
-        v_float32 __nrm2 = vx_setall_f32(nrm2);
-        for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
-        {
-            __dst = vx_load(dst + k);
-            __dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
-            v_store(dst + k, __dst);
-        }
+        __dst = vx_load_aligned(rawDst + k);
+        __dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
+        v_store(dst + k, __dst);
     }
 #endif
     for( ; k < len; k++ )
     {
-        dst[k] = saturate_cast<uchar>(dst[k]*nrm2);
+        dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
     }
+}
+else // CV_8U
+{
+    uint8_t* dst = dstMat.ptr<uint8_t>(row);
+#if CV_SIMD
+    v_float32 __dst0, __dst1;
+    v_uint16 __pack01;
+    v_float32 __nrm2 = vx_setall_f32(nrm2);
+    for( k = 0; k <= len - v_float32::nlanes * 2; k += v_float32::nlanes * 2 )
+    {
+        __dst0 = vx_load_aligned(rawDst + k);
+        __dst1 = vx_load_aligned(rawDst + k + v_float32::nlanes);
+
+        __pack01 = v_pack_u(v_round(__dst0 * __nrm2), v_round(__dst1 * __nrm2));
+        v_pack_store(dst + k, __pack01);
+    }
+#endif
+    for( ; k < len; k++ )
+    {
+        dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
+    }
+}
 #else
+    float* dst = dstMat.ptr<float>(row);
     float nrm1 = 0;
     for( k = 0; k < len; k++ )
     {
-        dst[k] *= nrm2;
-        nrm1 += dst[k];
+        rawDst[k] *= nrm2;
+        nrm1 += rawDst[k];
     }
     nrm1 = 1.f/std::max(nrm1, FLT_EPSILON);
+if( dstMat.type() == CV_32F )
+{
     for( k = 0; k < len; k++ )
     {
-        dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
+        dst[k] = std::sqrt(rawDst[k] * nrm1);
     }
+}
+else // CV_8U
+{
+    for( k = 0; k < len; k++ )
+    {
+        dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
+    }
+}
 #endif
 }
 
diff --git a/modules/features2d/test/test_sift.cpp b/modules/features2d/test/test_sift.cpp
@@ -0,0 +1,34 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+TEST(Features2d_SIFT, descriptor_type)
+{
+    Mat image = imread(cvtest::findDataFile("features2d/tsukuba.png"));
+    ASSERT_FALSE(image.empty());
+
+    Mat gray;
+    cvtColor(image, gray, COLOR_BGR2GRAY);
+
+    vector<KeyPoint> keypoints;
+    Mat descriptorsFloat, descriptorsUchar;
+    Ptr<SIFT> siftFloat = cv::SIFT::create(0, 3, 0.04, 10, 1.6, CV_32F);
+    siftFloat->detectAndCompute(gray, Mat(), keypoints, descriptorsFloat, false);
+    ASSERT_EQ(descriptorsFloat.type(), CV_32F) << "type mismatch";
+
+    Ptr<SIFT> siftUchar = cv::SIFT::create(0, 3, 0.04, 10, 1.6, CV_8U);
+    siftUchar->detectAndCompute(gray, Mat(), keypoints, descriptorsUchar, false);
+    ASSERT_EQ(descriptorsUchar.type(), CV_8U) << "type mismatch";
+
+    Mat descriptorsFloat2;
+    descriptorsUchar.assignTo(descriptorsFloat2, CV_32F);
+    Mat diff = descriptorsFloat != descriptorsFloat2;
+    ASSERT_EQ(countNonZero(diff), 0) << "descriptors are not identical";
+}
+
+
+}} // namespace