Skip to content

Commit 1834eed

Browse files
authored
Merge pull request opencv#18001 from Yosshi999:sift-8bit-descr
* 8-bit SIFT descriptors * use clearer parameter * update docs * propagate type info * overload function for avoiding ABI-break * bugfix: some values are undefined when CV_SIMD is absent
1 parent b34234a commit 1834eed

File tree

4 files changed

+158
-42
lines changed

4 files changed

+158
-42
lines changed

modules/features2d/include/opencv2/features2d.hpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,33 @@ class CV_EXPORTS_W SIFT : public Feature2D
301301
double contrastThreshold = 0.04, double edgeThreshold = 10,
302302
double sigma = 1.6);
303303

304+
/** @brief Create SIFT with specified descriptorType.
305+
@param nfeatures The number of best features to retain. The features are ranked by their scores
306+
(measured in SIFT algorithm as the local contrast)
307+
308+
@param nOctaveLayers The number of layers in each octave. 3 is the value used in D. Lowe paper. The
309+
number of octaves is computed automatically from the image resolution.
310+
311+
@param contrastThreshold The contrast threshold used to filter out weak features in semi-uniform
312+
(low-contrast) regions. The larger the threshold, the less features are produced by the detector.
313+
314+
@note The contrast threshold will be divided by nOctaveLayers when the filtering is applied. When
315+
nOctaveLayers is set to default and if you want to use the value used in D. Lowe paper, 0.03, set
316+
this argument to 0.09.
317+
318+
@param edgeThreshold The threshold used to filter out edge-like features. Note that the its meaning
319+
is different from the contrastThreshold, i.e. the larger the edgeThreshold, the less features are
320+
filtered out (more features are retained).
321+
322+
@param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image
323+
is captured with a weak camera with soft lenses, you might want to reduce the number.
324+
325+
@param descriptorType The type of descriptors. Only CV_32F and CV_8U are supported.
326+
*/
327+
CV_WRAP static Ptr<SIFT> create(int nfeatures, int nOctaveLayers,
328+
double contrastThreshold, double edgeThreshold,
329+
double sigma, int descriptorType);
330+
304331
CV_WRAP virtual String getDefaultName() const CV_OVERRIDE;
305332
};
306333

modules/features2d/src/sift.dispatch.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ class SIFT_Impl : public SIFT
8888
public:
8989
explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3,
9090
double contrastThreshold = 0.04, double edgeThreshold = 10,
91-
double sigma = 1.6);
91+
double sigma = 1.6, int descriptorType = CV_32F );
9292

9393
//! returns the descriptor size in floats (128)
9494
int descriptorSize() const CV_OVERRIDE;
@@ -117,13 +117,25 @@ class SIFT_Impl : public SIFT
117117
CV_PROP_RW double contrastThreshold;
118118
CV_PROP_RW double edgeThreshold;
119119
CV_PROP_RW double sigma;
120+
CV_PROP_RW int descriptor_type;
120121
};
121122

122123
Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
123124
double _contrastThreshold, double _edgeThreshold, double _sigma )
124125
{
125126
CV_TRACE_FUNCTION();
126-
return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma);
127+
128+
return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, CV_32F);
129+
}
130+
131+
Ptr<SIFT> SIFT::create( int _nfeatures, int _nOctaveLayers,
132+
double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
133+
{
134+
CV_TRACE_FUNCTION();
135+
136+
// SIFT descriptor supports 32bit floating point and 8bit unsigned int.
137+
CV_Assert(_descriptorType == CV_32F || _descriptorType == CV_8U);
138+
return makePtr<SIFT_Impl>(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, _descriptorType);
127139
}
128140

129141
String SIFT::getDefaultName() const
@@ -362,12 +374,12 @@ void SIFT_Impl::findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const
362374
static
363375
void calcSIFTDescriptor(
364376
const Mat& img, Point2f ptf, float ori, float scl,
365-
int d, int n, float* dst
377+
int d, int n, Mat& dst, int row
366378
)
367379
{
368380
CV_TRACE_FUNCTION();
369381

370-
CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst),
382+
CV_CPU_DISPATCH(calcSIFTDescriptor, (img, ptf, ori, scl, d, n, dst, row),
371383
CV_CPU_DISPATCH_MODES_ALL);
372384
}
373385

@@ -408,7 +420,7 @@ class calcDescriptorsComputer : public ParallelLoopBody
408420
float angle = 360.f - kpt.angle;
409421
if(std::abs(angle - 360.f) < FLT_EPSILON)
410422
angle = 0.f;
411-
calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i));
423+
calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors, i);
412424
}
413425
}
414426
private:
@@ -429,9 +441,9 @@ static void calcDescriptors(const std::vector<Mat>& gpyr, const std::vector<KeyP
429441
//////////////////////////////////////////////////////////////////////////////////////////
430442

431443
SIFT_Impl::SIFT_Impl( int _nfeatures, int _nOctaveLayers,
432-
double _contrastThreshold, double _edgeThreshold, double _sigma )
444+
double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType )
433445
: nfeatures(_nfeatures), nOctaveLayers(_nOctaveLayers),
434-
contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma)
446+
contrastThreshold(_contrastThreshold), edgeThreshold(_edgeThreshold), sigma(_sigma), descriptor_type(_descriptorType)
435447
{
436448
}
437449

@@ -442,7 +454,7 @@ int SIFT_Impl::descriptorSize() const
442454

443455
int SIFT_Impl::descriptorType() const
444456
{
445-
return CV_32F;
457+
return descriptor_type;
446458
}
447459

448460
int SIFT_Impl::defaultNorm() const
@@ -533,9 +545,9 @@ void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask,
533545
{
534546
//t = (double)getTickCount();
535547
int dsize = descriptorSize();
536-
_descriptors.create((int)keypoints.size(), dsize, CV_32F);
537-
Mat descriptors = _descriptors.getMat();
548+
_descriptors.create((int)keypoints.size(), dsize, descriptor_type);
538549

550+
Mat descriptors = _descriptors.getMat();
539551
calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
540552
//t = (double)getTickCount() - t;
541553
//printf("descriptor extraction time: %g\n", t*1000./tf);

modules/features2d/src/sift.simd.hpp

Lines changed: 75 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ void findScaleSpaceExtrema(
150150

151151
void calcSIFTDescriptor(
152152
const Mat& img, Point2f ptf, float ori, float scl,
153-
int d, int n, float* dst
153+
int d, int n, Mat& dst, int row
154154
);
155155

156156

@@ -555,7 +555,7 @@ void findScaleSpaceExtrema(
555555

556556
void calcSIFTDescriptor(
557557
const Mat& img, Point2f ptf, float ori, float scl,
558-
int d, int n, float* dst
558+
int d, int n, Mat& dstMat, int row
559559
)
560560
{
561561
CV_TRACE_FUNCTION();
@@ -575,9 +575,18 @@ void calcSIFTDescriptor(
575575
int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
576576
int rows = img.rows, cols = img.cols;
577577

578-
AutoBuffer<float> buf(len*6 + histlen);
579-
float *X = buf.data(), *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len;
580-
float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len;
578+
cv::utils::BufferArea area;
579+
float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *RBin = 0, *CBin = 0, *hist = 0, *rawDst = 0;
580+
area.allocate(X, len, CV_SIMD_WIDTH);
581+
area.allocate(Y, len, CV_SIMD_WIDTH);
582+
area.allocate(Ori, len, CV_SIMD_WIDTH);
583+
area.allocate(W, len, CV_SIMD_WIDTH);
584+
area.allocate(RBin, len, CV_SIMD_WIDTH);
585+
area.allocate(CBin, len, CV_SIMD_WIDTH);
586+
area.allocate(hist, histlen, CV_SIMD_WIDTH);
587+
area.allocate(rawDst, len, CV_SIMD_WIDTH);
588+
area.commit();
589+
Mag = Y;
581590

582591
for( i = 0; i < d+2; i++ )
583592
{
@@ -628,10 +637,10 @@ void calcSIFTDescriptor(
628637
const v_int32 __n_plus_2 = vx_setall_s32(n+2);
629638
for( ; k <= len - vecsize; k += vecsize )
630639
{
631-
v_float32 rbin = vx_load(RBin + k);
632-
v_float32 cbin = vx_load(CBin + k);
633-
v_float32 obin = (vx_load(Ori + k) - __ori) * __bins_per_rad;
634-
v_float32 mag = vx_load(Mag + k) * vx_load(W + k);
640+
v_float32 rbin = vx_load_aligned(RBin + k);
641+
v_float32 cbin = vx_load_aligned(CBin + k);
642+
v_float32 obin = (vx_load_aligned(Ori + k) - __ori) * __bins_per_rad;
643+
v_float32 mag = vx_load_aligned(Mag + k) * vx_load_aligned(W + k);
635644

636645
v_int32 r0 = v_floor(rbin);
637646
v_int32 c0 = v_floor(cbin);
@@ -723,7 +732,7 @@ void calcSIFTDescriptor(
723732
hist[idx] += hist[idx+n];
724733
hist[idx+1] += hist[idx+n+1];
725734
for( k = 0; k < n; k++ )
726-
dst[(i*d + j)*n + k] = hist[idx+k];
735+
rawDst[(i*d + j)*n + k] = hist[idx+k];
727736
}
728737
// copy histogram to the descriptor,
729738
// apply hysteresis thresholding
@@ -735,17 +744,17 @@ void calcSIFTDescriptor(
735744
#if CV_SIMD
736745
{
737746
v_float32 __nrm2 = vx_setzero_f32();
738-
v_float32 __dst;
747+
v_float32 __rawDst;
739748
for( ; k <= len - v_float32::nlanes; k += v_float32::nlanes )
740749
{
741-
__dst = vx_load(dst + k);
742-
__nrm2 = v_fma(__dst, __dst, __nrm2);
750+
__rawDst = vx_load_aligned(rawDst + k);
751+
__nrm2 = v_fma(__rawDst, __rawDst, __nrm2);
743752
}
744753
nrm2 = (float)v_reduce_sum(__nrm2);
745754
}
746755
#endif
747756
for( ; k < len; k++ )
748-
nrm2 += dst[k]*dst[k];
757+
nrm2 += rawDst[k]*rawDst[k];
749758

750759
float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
751760

@@ -760,9 +769,9 @@ void calcSIFTDescriptor(
760769
__m256 __thr = _mm256_set1_ps(thr);
761770
for( ; i <= len - 8; i += 8 )
762771
{
763-
__dst = _mm256_loadu_ps(&dst[i]);
772+
__dst = _mm256_loadu_ps(&rawDst[i]);
764773
__dst = _mm256_min_ps(__dst, __thr);
765-
_mm256_storeu_ps(&dst[i], __dst);
774+
_mm256_storeu_ps(&rawDst[i], __dst);
766775
#if CV_FMA3
767776
__nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2);
768777
#else
@@ -776,44 +785,78 @@ void calcSIFTDescriptor(
776785
#endif
777786
for( ; i < len; i++ )
778787
{
779-
float val = std::min(dst[i], thr);
780-
dst[i] = val;
788+
float val = std::min(rawDst[i], thr);
789+
rawDst[i] = val;
781790
nrm2 += val*val;
782791
}
783792
nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON);
784793

785794
#if 1
786795
k = 0;
796+
if( dstMat.type() == CV_32F )
797+
{
798+
float* dst = dstMat.ptr<float>(row);
787799
#if CV_SIMD
800+
v_float32 __dst;
801+
v_float32 __min = vx_setzero_f32();
802+
v_float32 __max = vx_setall_f32(255.0f); // max of uchar
803+
v_float32 __nrm2 = vx_setall_f32(nrm2);
804+
for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
788805
{
789-
v_float32 __dst;
790-
v_float32 __min = vx_setzero_f32();
791-
v_float32 __max = vx_setall_f32(255.0f); // max of uchar
792-
v_float32 __nrm2 = vx_setall_f32(nrm2);
793-
for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
794-
{
795-
__dst = vx_load(dst + k);
796-
__dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
797-
v_store(dst + k, __dst);
798-
}
806+
__dst = vx_load_aligned(rawDst + k);
807+
__dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
808+
v_store(dst + k, __dst);
799809
}
800810
#endif
801811
for( ; k < len; k++ )
802812
{
803-
dst[k] = saturate_cast<uchar>(dst[k]*nrm2);
813+
dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
804814
}
815+
}
816+
else // CV_8U
817+
{
818+
uint8_t* dst = dstMat.ptr<uint8_t>(row);
819+
#if CV_SIMD
820+
v_float32 __dst0, __dst1;
821+
v_uint16 __pack01;
822+
v_float32 __nrm2 = vx_setall_f32(nrm2);
823+
for( k = 0; k <= len - v_float32::nlanes * 2; k += v_float32::nlanes * 2 )
824+
{
825+
__dst0 = vx_load_aligned(rawDst + k);
826+
__dst1 = vx_load_aligned(rawDst + k + v_float32::nlanes);
827+
828+
__pack01 = v_pack_u(v_round(__dst0 * __nrm2), v_round(__dst1 * __nrm2));
829+
v_pack_store(dst + k, __pack01);
830+
}
831+
#endif
832+
for( ; k < len; k++ )
833+
{
834+
dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
835+
}
836+
}
805837
#else
838+
float* dst = dstMat.ptr<float>(row);
806839
float nrm1 = 0;
807840
for( k = 0; k < len; k++ )
808841
{
809-
dst[k] *= nrm2;
810-
nrm1 += dst[k];
842+
rawDst[k] *= nrm2;
843+
nrm1 += rawDst[k];
811844
}
812845
nrm1 = 1.f/std::max(nrm1, FLT_EPSILON);
846+
if( dstMat.type() == CV_32F )
847+
{
813848
for( k = 0; k < len; k++ )
814849
{
815-
dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
850+
dst[k] = std::sqrt(rawDst[k] * nrm1);
816851
}
852+
}
853+
else // CV_8U
854+
{
855+
for( k = 0; k < len; k++ )
856+
{
857+
dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
858+
}
859+
}
817860
#endif
818861
}
819862

modules/features2d/test/test_sift.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html
4+
5+
#include "test_precomp.hpp"
6+
7+
namespace opencv_test { namespace {
8+
9+
TEST(Features2d_SIFT, descriptor_type)
10+
{
11+
Mat image = imread(cvtest::findDataFile("features2d/tsukuba.png"));
12+
ASSERT_FALSE(image.empty());
13+
14+
Mat gray;
15+
cvtColor(image, gray, COLOR_BGR2GRAY);
16+
17+
vector<KeyPoint> keypoints;
18+
Mat descriptorsFloat, descriptorsUchar;
19+
Ptr<SIFT> siftFloat = cv::SIFT::create(0, 3, 0.04, 10, 1.6, CV_32F);
20+
siftFloat->detectAndCompute(gray, Mat(), keypoints, descriptorsFloat, false);
21+
ASSERT_EQ(descriptorsFloat.type(), CV_32F) << "type mismatch";
22+
23+
Ptr<SIFT> siftUchar = cv::SIFT::create(0, 3, 0.04, 10, 1.6, CV_8U);
24+
siftUchar->detectAndCompute(gray, Mat(), keypoints, descriptorsUchar, false);
25+
ASSERT_EQ(descriptorsUchar.type(), CV_8U) << "type mismatch";
26+
27+
Mat descriptorsFloat2;
28+
descriptorsUchar.assignTo(descriptorsFloat2, CV_32F);
29+
Mat diff = descriptorsFloat != descriptorsFloat2;
30+
ASSERT_EQ(countNonZero(diff), 0) << "descriptors are not identical";
31+
}
32+
33+
34+
}} // namespace

0 commit comments

Comments
 (0)