Skip to content

Commit 5eaa25c

Browse files
savuoralalek
authored andcommitted
Merge pull request #2106 from savuor:kinfu_fix_ocl
KinFu: try to fix OCL fails (#2106) * some TODOs closed * temporary turn on NaNs patching * reverted * temporary debug code added * more debug code * inf check; more debug code * debug code in kernel * debug code, try to disable some optimizations * -cl-fast-relaxed-math option removed, debug code removed
1 parent 8049208 commit 5eaa25c

File tree

4 files changed

+17
-21
lines changed

4 files changed

+17
-21
lines changed

modules/rgbd/src/fast_icp.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne
545545

546546
cv::String errorStr;
547547
ocl::ProgramSource source = ocl::rgbd::icp_oclsrc;
548-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
548+
cv::String options = "-cl-mad-enable";
549549
ocl::Kernel k;
550550
k.create("getAb", source, options, &errorStr);
551551

@@ -594,8 +594,7 @@ void ICPImpl::getAb<UMat>(const UMat& oldPts, const UMat& oldNrm, const UMat& ne
594594
fxy.val, cxy.val,
595595
distanceThreshold*distanceThreshold,
596596
cos(angleThreshold),
597-
//TODO: replace by KernelArg::Local(lsz)
598-
ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
597+
ocl::KernelArg::Local(lsz),
599598
ocl::KernelArg::WriteOnlyNoSize(groupedSumGpu)
600599
);
601600

modules/rgbd/src/kinfu_frame.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ bool computePointsNormalsGpu(const Intr intr, float depthFactor, const UMat& dep
335335
cv::String errorStr;
336336
cv::String name = "computePointsNormals";
337337
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
338-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
338+
cv::String options = "-cl-mad-enable";
339339
ocl::Kernel k;
340340
k.create(name.c_str(), source, options, &errorStr);
341341

@@ -368,7 +368,7 @@ bool pyrDownBilateralGpu(const UMat& depth, UMat& depthDown, float sigma)
368368
cv::String errorStr;
369369
cv::String name = "pyrDownBilateral";
370370
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
371-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
371+
cv::String options = "-cl-mad-enable";
372372
ocl::Kernel k;
373373
k.create(name.c_str(), source, options, &errorStr);
374374

@@ -402,7 +402,7 @@ bool customBilateralFilterGpu(const UMat src /* udepth */, UMat& dst /* smooth *
402402
cv::String errorStr;
403403
cv::String name = "customBilateral";
404404
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
405-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
405+
cv::String options = "-cl-mad-enable";
406406
ocl::Kernel k;
407407
k.create(name.c_str(), source, options, &errorStr);
408408

@@ -431,7 +431,7 @@ bool pyrDownPointsNormalsGpu(const UMat p, const UMat n, UMat &pdown, UMat &ndow
431431
cv::String errorStr;
432432
cv::String name = "pyrDownPointsNormals";
433433
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
434-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
434+
cv::String options = "-cl-mad-enable";
435435
ocl::Kernel k;
436436
k.create(name.c_str(), source, options, &errorStr);
437437

@@ -462,7 +462,7 @@ static bool ocl_renderPointsNormals(const UMat points, const UMat normals,
462462
cv::String errorStr;
463463
cv::String name = "render";
464464
ocl::ProgramSource source = ocl::rgbd::kinfu_frame_oclsrc;
465-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
465+
cv::String options = "-cl-mad-enable";
466466
ocl::Kernel k;
467467
k.create(name.c_str(), source, options, &errorStr);
468468

@@ -616,9 +616,6 @@ void makeFrameFromDepth(InputArray _depth,
616616
// looks like OpenCV's bilateral filter works the same as KinFu's
617617
Depth smooth;
618618

619-
//TODO: remove it when OpenCV's bilateral works properly
620-
patchNaNs(depth);
621-
622619
bilateralFilter(depth, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial);
623620

624621
// depth truncation is not used by default

modules/rgbd/src/opencl/icp.cl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ inline void calcAb7(__global const char * oldPointsptr,
5656
float3 newP = newPtsRow[x].xyz;
5757
float3 newN = newNrmRow[x].xyz;
5858

59-
if(any(isnan(newP)) || any(isnan(newN)))
59+
if( any(isnan(newP)) || any(isnan(newN)) ||
60+
any(isinf(newP)) || any(isinf(newN)) )
6061
return;
6162

6263
//transform to old coord system
@@ -114,7 +115,8 @@ inline void calcAb7(__global const char * oldPointsptr,
114115
float3 n1 = mix(n10, n11, t.x);
115116
oldN = mix(n0, n1, t.y);
116117

117-
if(any(isnan(oldP)) || any(isnan(oldN)))
118+
if( any(isnan(oldP)) || any(isnan(oldN)) ||
119+
any(isinf(oldP)) || any(isinf(oldN)) )
118120
return;
119121

120122
//filter by distance

modules/rgbd/src/tsdf.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,7 +1204,7 @@ void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor,
12041204
cv::String errorStr;
12051205
cv::String name = "integrate";
12061206
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
1207-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
1207+
cv::String options = "-cl-mad-enable";
12081208
ocl::Kernel k;
12091209
k.create(name.c_str(), source, options, &errorStr);
12101210

@@ -1250,7 +1250,7 @@ void TSDFVolumeGPU::raycast(cv::Affine3f cameraPose, Intr intrinsics, Size frame
12501250
cv::String errorStr;
12511251
cv::String name = "raycast";
12521252
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
1253-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
1253+
cv::String options = "-cl-mad-enable";
12541254
ocl::Kernel k;
12551255
k.create(name.c_str(), source, options, &errorStr);
12561256

@@ -1318,7 +1318,7 @@ void TSDFVolumeGPU::fetchNormals(InputArray _points, OutputArray _normals) const
13181318
cv::String errorStr;
13191319
cv::String name = "getNormals";
13201320
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
1321-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
1321+
cv::String options = "-cl-mad-enable";
13221322
ocl::Kernel k;
13231323
k.create(name.c_str(), source, options, &errorStr);
13241324

@@ -1365,7 +1365,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
13651365

13661366
cv::String errorStr;
13671367
ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc;
1368-
cv::String options = "-cl-fast-relaxed-math -cl-mad-enable";
1368+
cv::String options = "-cl-mad-enable";
13691369

13701370
kscan.create("scanSize", source, options, &errorStr);
13711371

@@ -1409,8 +1409,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
14091409
ocl::KernelArg::PtrReadOnly(volPoseGpu),
14101410
voxelSize,
14111411
voxelSizeInv,
1412-
//TODO: replace by KernelArg::Local(lsz)
1413-
ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
1412+
ocl::KernelArg::Local(lsz),
14141413
ocl::KernelArg::WriteOnlyNoSize(groupedSum));
14151414

14161415
if(!kscan.run(3, globalSize, localSize, true))
@@ -1455,8 +1454,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
14551454
voxelSize,
14561455
voxelSizeInv,
14571456
((int)needNormals),
1458-
//TODO: replace by ::Local(lsz)
1459-
ocl::KernelArg(ocl::KernelArg::LOCAL, 0, 1, 1, 0, lsz),
1457+
ocl::KernelArg::Local(lsz),
14601458
ocl::KernelArg::PtrReadWrite(atomicCtr),
14611459
ocl::KernelArg::ReadOnlyNoSize(groupedSum),
14621460
ocl::KernelArg::WriteOnlyNoSize(pts),

0 commit comments

Comments
 (0)