Skip to content

Commit 4f051f0

Browse files
committed
Merge pull request #1371 from sovrasov:ocl_surf
2 parents 1e35407 + bac7c26 commit 4f051f0

File tree

7 files changed

+167
-84
lines changed

7 files changed

+167
-84
lines changed

modules/xfeatures2d/perf/perf_surf.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ PERF_TEST_P(surf, extract, testing::Values(SURF_IMAGES))
4040

4141
Ptr<SURF> detector = SURF::create();
4242
vector<KeyPoint> points;
43-
vector<float> descriptors;
43+
Mat descriptors;
4444
detector->detect(frame, points, mask);
4545

4646
TEST_CYCLE() detector->compute(frame, points, descriptors);
@@ -58,7 +58,7 @@ PERF_TEST_P(surf, full, testing::Values(SURF_IMAGES))
5858
declare.in(frame).time(90);
5959
Ptr<SURF> detector = SURF::create();
6060
vector<KeyPoint> points;
61-
vector<float> descriptors;
61+
Mat descriptors;
6262

6363
TEST_CYCLE() detector->detectAndCompute(frame, mask, points, descriptors, false);
6464

modules/xfeatures2d/src/opencl/surf.cl

Lines changed: 76 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -875,9 +875,6 @@ inline float linearFilter(
875875
float centerX, float centerY, float win_offset,
876876
float cos_dir, float sin_dir, float y, float x )
877877
{
878-
x -= 0.5f;
879-
y -= 0.5f;
880-
881878
float out = 0.0f;
882879

883880
const int x1 = round(x);
@@ -900,6 +897,60 @@ inline float linearFilter(
900897
return out;
901898
}
902899

900+
inline float areaFilter( __PARAM_imgTex__, int img_rows, int img_cols,
901+
float centerX, float centerY, float win_offset,
902+
float cos_dir, float sin_dir, float x, float y, float s)
903+
{
904+
float fsx1 = x * s;
905+
float fsx2 = fsx1 + s;
906+
907+
int sx1 = convert_int_rtp(fsx1);
908+
int sx2 = convert_int_rtn(fsx2);
909+
910+
float fsy1 = y * s;
911+
float fsy2 = fsy1 + s;
912+
913+
int sy1 = convert_int_rtp(fsy1);
914+
int sy2 = convert_int_rtn(fsy2);
915+
916+
float scale = 1.f / (s * s);
917+
float out = 0.f;
918+
919+
for (int dy = sy1; dy < sy2; ++dy)
920+
{
921+
for (int dx = sx1; dx < sx2; ++dx)
922+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, dx) * scale;
923+
924+
if (sx1 > fsx1)
925+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, (sx1 -1)) * ((sx1 - fsx1) * scale);
926+
927+
if (sx2 < fsx2)
928+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, dy, sx2) * ((fsx2 -sx2) * scale);
929+
}
930+
931+
if (sy1 > fsy1)
932+
for (int dx = sx1; dx < sx2; ++dx)
933+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
934+
935+
if (sy2 < fsy2)
936+
for (int dx = sx1; dx < sx2; ++dx)
937+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, dx) * ((fsy2 -sy2) * scale);
938+
939+
if ((sy1 > fsy1) && (sx1 > fsx1))
940+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
941+
942+
if ((sy1 > fsy1) && (sx2 < fsx2))
943+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
944+
945+
if ((sy2 < fsy2) && (sx2 < fsx2))
946+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
947+
948+
if ((sy2 < fsy2) && (sx1 > fsx1))
949+
out = out + readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
950+
951+
return out;
952+
}
953+
903954
void calc_dx_dy(
904955
__PARAM_imgTex__,
905956
int img_rows, int img_cols,
@@ -946,9 +997,18 @@ void calc_dx_dy(
946997
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
947998
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
948999

949-
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
950-
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
951-
win_offset, cos_dir, sin_dir, icoo, jcoo);
1000+
if (s > 1)
1001+
{
1002+
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
1003+
areaFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
1004+
win_offset, cos_dir, sin_dir, xIndex, yIndex, s);
1005+
}
1006+
else
1007+
{
1008+
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
1009+
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
1010+
win_offset, cos_dir, sin_dir, icoo, jcoo);
1011+
}
9521012

9531013
barrier(CLK_LOCAL_MEM_FENCE);
9541014

@@ -1075,18 +1135,16 @@ void SURF_computeDescriptors64(
10751135
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
10761136

10771137
barrier(CLK_LOCAL_MEM_FENCE);
1078-
if (tid < 25)
1138+
if (tid == 0)
10791139
{
10801140
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
10811141

10821142
// write dx, dy, |dx|, |dy|
1083-
if (tid == 0)
1084-
{
1085-
descriptors_block[0] = sdx[0];
1086-
descriptors_block[1] = sdy[0];
1087-
descriptors_block[2] = sdxabs[0];
1088-
descriptors_block[3] = sdyabs[0];
1089-
}
1143+
1144+
descriptors_block[0] = sdx[0];
1145+
descriptors_block[1] = sdy[0];
1146+
descriptors_block[2] = sdxabs[0];
1147+
descriptors_block[3] = sdyabs[0];
10901148
}
10911149
}
10921150

@@ -1102,10 +1160,10 @@ void SURF_computeDescriptors128(
11021160
descriptors_step /= sizeof(*descriptors);
11031161
keypoints_step /= sizeof(*keypoints);
11041162

1105-
__global float * featureX = keypoints + X_ROW * keypoints_step;
1106-
__global float * featureY = keypoints + Y_ROW * keypoints_step;
1107-
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
1108-
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
1163+
__global const float * featureX = keypoints + X_ROW * keypoints_step;
1164+
__global const float * featureY = keypoints + Y_ROW * keypoints_step;
1165+
__global const float* featureSize = keypoints + SIZE_ROW * keypoints_step;
1166+
__global const float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
11091167

11101168
// 2 floats (dx,dy) for each thread (5x5 sample points in each sub-region)
11111169
volatile __local float sdx[25];

modules/xfeatures2d/src/surf.ocl.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,13 @@ bool SURF_OCL::init(const SURF_Impl* p)
9191
if(ocl::haveOpenCL())
9292
{
9393
const ocl::Device& dev = ocl::Device::getDefault();
94-
if( dev.type() == ocl::Device::TYPE_CPU || dev.doubleFPConfig() == 0 )
94+
if( dev.type() == ocl::Device::TYPE_CPU )
9595
return false;
96-
haveImageSupport = false;//dev.imageSupport();
97-
kerOpts = haveImageSupport ? "-D HAVE_IMAGE2D -D DOUBLE_SUPPORT" : "";
98-
// status = 1;
96+
haveImageSupport = dev.imageSupport();
97+
kerOpts = format("%s%s",
98+
haveImageSupport ? "-D HAVE_IMAGE2D" : "",
99+
dev.doubleFPConfig() > 0? " -D DOUBLE_SUPPORT": "");
100+
status = 1;
99101
}
100102
}
101103
return status > 0;
@@ -243,7 +245,7 @@ bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptor
243245
}
244246

245247
size_t localThreads[] = {6, 6};
246-
size_t globalThreads[] = {nFeatures*localThreads[0], localThreads[1]};
248+
size_t globalThreads[] = {nFeatures*localThreads[0], 16 * localThreads[1]};
247249

248250
if(haveImageSupport)
249251
{
@@ -420,7 +422,7 @@ bool SURF_OCL::findMaximaInLayer(int counterOffset, int octave,
420422
ocl::KernelArg::PtrReadWrite(maxPosBuffer),
421423
ocl::KernelArg::PtrReadWrite(counters),
422424
counterOffset, img_rows, img_cols,
423-
octave, nOctaveLayers,
425+
nOctaveLayers, octave,
424426
layer_rows, layer_cols,
425427
maxCandidates,
426428
(float)params->hessianThreshold).run(2, globalThreads, localThreads, true);

modules/xfeatures2d/test/test_features2d.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,9 +357,9 @@ class CV_DescriptorExtractorTest : public cvtest::BaseTest
357357
}
358358

359359
if(imgLoadMode == IMREAD_GRAYSCALE)
360-
image.create( 50, 50, CV_8UC1 );
360+
image.create( 256, 256, CV_8UC1 );
361361
else
362-
image.create( 50, 50, CV_8UC3 );
362+
image.create( 256, 256, CV_8UC3 );
363363
try
364364
{
365365
dextractor->compute( image, keypoints, descriptors );
@@ -1027,10 +1027,34 @@ TEST( Features2d_DescriptorExtractor_SIFT, regression )
10271027

10281028
TEST( Features2d_DescriptorExtractor_SURF, regression )
10291029
{
1030+
#ifdef HAVE_OPENCL
1031+
bool useOCL = ocl::useOpenCL();
1032+
ocl::setUseOpenCL(false);
1033+
#endif
1034+
10301035
CV_DescriptorExtractorTest<L2<float> > test( "descriptor-surf", 0.05f,
10311036
SURF::create() );
10321037
test.safe_run();
1038+
1039+
#ifdef HAVE_OPENCL
1040+
ocl::setUseOpenCL(useOCL);
1041+
#endif
1042+
}
1043+
1044+
#ifdef HAVE_OPENCL
1045+
TEST( Features2d_DescriptorExtractor_SURF_OCL, regression )
1046+
{
1047+
bool useOCL = ocl::useOpenCL();
1048+
ocl::setUseOpenCL(true);
1049+
if(ocl::useOpenCL())
1050+
{
1051+
CV_DescriptorExtractorTest<L2<float> > test( "descriptor-surf_ocl", 0.05f,
1052+
SURF::create() );
1053+
test.safe_run();
1054+
}
1055+
ocl::setUseOpenCL(useOCL);
10331056
}
1057+
#endif
10341058

10351059
TEST( Features2d_DescriptorExtractor_DAISY, regression )
10361060
{
@@ -1187,7 +1211,7 @@ TEST(Features2d_BruteForceDescriptorMatcher_knnMatch, regression)
11871211
Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce");
11881212
ASSERT_TRUE(matcher != NULL);
11891213

1190-
Mat imgT(sz, sz, CV_8U, Scalar(255));
1214+
Mat imgT(256, 256, CV_8U, Scalar(255));
11911215
line(imgT, Point(20, sz/2), Point(sz-21, sz/2), Scalar(100), 2);
11921216
line(imgT, Point(sz/2, 20), Point(sz/2, sz-21), Scalar(100), 2);
11931217
vector<KeyPoint> kpT;
@@ -1196,7 +1220,7 @@ TEST(Features2d_BruteForceDescriptorMatcher_knnMatch, regression)
11961220
Mat descT;
11971221
ext->compute(imgT, kpT, descT);
11981222

1199-
Mat imgQ(sz, sz, CV_8U, Scalar(255));
1223+
Mat imgQ(256, 256, CV_8U, Scalar(255));
12001224
line(imgQ, Point(30, sz/2), Point(sz-31, sz/2), Scalar(100), 3);
12011225
line(imgQ, Point(sz/2, 30), Point(sz/2, sz-31), Scalar(100), 3);
12021226
vector<KeyPoint> kpQ;

modules/xfeatures2d/test/test_precomp.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
#include "opencv2/opencv_modules.hpp"
2222
#include "cvconfig.h"
2323

24-
#ifdef HAVE_OPENCV_OCL
25-
# include "opencv2/ocl.hpp"
24+
#ifdef HAVE_OPENCL
25+
# include "opencv2/core/ocl.hpp"
2626
#endif
2727

2828
#ifdef HAVE_CUDA

modules/xfeatures2d/test/test_rotation_and_scale_invariance.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,6 @@ void matchKeyPoints(const vector<KeyPoint>& keypoints0, const Mat& H,
168168
const float r0 = 0.5f * keypoints0[i0].size;
169169
for(size_t i1 = 0; i1 < keypoints1.size(); i1++)
170170
{
171-
if(nearestPointIndex >= 0 && usedMask[i1])
172-
continue;
173-
174171
float r1 = 0.5f * keypoints1[i1].size;
175172
float intersectRatio = calcIntersectRatio(points0t.at<Point2f>(i0), r0,
176173
keypoints1[i1].pt, r1);
@@ -619,7 +616,7 @@ class DescriptorScaleInvarianceTest : public cvtest::BaseTest
619616
TEST(Features2d_RotationInvariance_Detector_SURF, regression)
620617
{
621618
DetectorRotationInvarianceTest test(SURF::create(),
622-
0.44f,
619+
0.65f,
623620
0.76f);
624621
test.safe_run();
625622
}
@@ -859,10 +856,21 @@ TEST(Features2d_RotationInvariance2_Detector_SURF, regression)
859856
vector<KeyPoint> keypoints;
860857
surf->detect(cross, keypoints);
861858

859+
// Expect 5 keypoints. One keypoint has coordinates (50.0, 50.0).
860+
// The other 4 keypoints should have the same response.
861+
// The order of the keypoints is indeterminate.
862862
ASSERT_EQ(keypoints.size(), (vector<KeyPoint>::size_type) 5);
863-
ASSERT_LT( fabs(keypoints[1].response - keypoints[2].response), 1e-6);
864-
ASSERT_LT( fabs(keypoints[1].response - keypoints[3].response), 1e-6);
865-
ASSERT_LT( fabs(keypoints[1].response - keypoints[4].response), 1e-6);
863+
864+
int i1 = -1;
865+
for(int i = 0; i < 5; i++)
866+
{
867+
if(keypoints[i].pt.x == 50.0f)
868+
;
869+
else if(i1 == -1)
870+
i1 = i;
871+
else
872+
ASSERT_LT(fabs(keypoints[i1].response - keypoints[i].response) / keypoints[i1].response, 1e-6);
873+
}
866874
}
867875

868876
TEST(DISABLED_Features2d_ScaleInvariance_Descriptor_DAISY, regression)
@@ -942,7 +950,7 @@ TEST(Features2d_ScaleInvariance_Descriptor_BoostDesc_LBGM, regression)
942950
DescriptorScaleInvarianceTest test(SURF::create(),
943951
BoostDesc::create(BoostDesc::LBGM, true, 6.25f),
944952
NORM_L1,
945-
0.98f);
953+
0.95f);
946954
test.safe_run();
947955
}
948956

0 commit comments

Comments
 (0)