Skip to content

Commit c5f4dff

Browse files
authored
Merge pull request #3324 from kevinchristensen1:gftt-cuda-fix
Fix CUDA mem leak in GFTT and move CUDA malloc out of critical path
2 parents 8eaa8ac + 1d1dbe3 commit c5f4dff

File tree

2 files changed

+19
-12
lines changed

2 files changed

+19
-12
lines changed

modules/cudaimgproc/src/cuda/gftt.cu

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,25 +87,22 @@ namespace cv { namespace cuda { namespace device
8787
}
8888
}
8989

90-
int findCorners_gpu(const cudaTextureObject_t &eigTex, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream)
90+
int findCorners_gpu(const cudaTextureObject_t &eigTex, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, int* counterPtr, cudaStream_t stream)
9191
{
92-
int* counter_ptr;
93-
cudaSafeCall( cudaMalloc(&counter_ptr, sizeof(int)) );
94-
95-
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
92+
cudaSafeCall( cudaMemsetAsync(counterPtr, 0, sizeof(int), stream) );
9693

9794
dim3 block(16, 16);
9895
dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
9996

10097
if (mask.data)
101-
findCorners<<<grid, block, 0, stream>>>(threshold, SingleMask(mask), corners, max_count, rows, cols, eigTex, counter_ptr);
98+
findCorners<<<grid, block, 0, stream>>>(threshold, SingleMask(mask), corners, max_count, rows, cols, eigTex, counterPtr);
10299
else
103-
findCorners<<<grid, block, 0, stream>>>(threshold, WithOutMask(), corners, max_count, rows, cols, eigTex, counter_ptr);
100+
findCorners<<<grid, block, 0, stream>>>(threshold, WithOutMask(), corners, max_count, rows, cols, eigTex, counterPtr);
104101

105102
cudaSafeCall( cudaGetLastError() );
106103

107104
int count;
108-
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
105+
cudaSafeCall( cudaMemcpyAsync(&count, counterPtr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
109106
if (stream)
110107
cudaSafeCall(cudaStreamSynchronize(stream));
111108
else

modules/cudaimgproc/src/gftt.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ namespace cv { namespace cuda { namespace device
5555
{
5656
namespace gfft
5757
{
58-
int findCorners_gpu(const cudaTextureObject_t &eigTex_, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream);
58+
int findCorners_gpu(const cudaTextureObject_t &eigTex_, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, int* counterPtr, cudaStream_t stream);
5959
void sortCorners_gpu(const cudaTextureObject_t &eigTex_, float2* corners, int count, cudaStream_t stream);
6060
}
6161
}}}
@@ -67,7 +67,7 @@ namespace
6767
public:
6868
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
6969
int blockSize, bool useHarrisDetector, double harrisK);
70-
70+
~GoodFeaturesToTrackDetector();
7171
void detect(InputArray image, OutputArray corners, InputArray mask, Stream& stream);
7272

7373
private:
@@ -82,6 +82,8 @@ namespace
8282
GpuMat buf_;
8383
GpuMat eig_;
8484
GpuMat tmpCorners_;
85+
86+
int* counterPtr_;
8587
};
8688

8789
GoodFeaturesToTrackDetector::GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
@@ -93,6 +95,12 @@ namespace
9395
cornerCriteria_ = useHarrisDetector ?
9496
cuda::createHarrisCorner(srcType, blockSize, 3, harrisK) :
9597
cuda::createMinEigenValCorner(srcType, blockSize, 3);
98+
cudaSafeCall(cudaMalloc(&counterPtr_, sizeof(int)));
99+
}
100+
101+
GoodFeaturesToTrackDetector::~GoodFeaturesToTrackDetector()
102+
{
103+
cudaSafeCall(cudaFree(counterPtr_));
96104
}
97105

98106
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
@@ -125,17 +133,19 @@ namespace
125133
PtrStepSzf eig = eig_;
126134
cv::cuda::device::createTextureObjectPitch2D<float>(&eigTex_, eig, texDesc);
127135

128-
int total = findCorners_gpu(eigTex_, eig_.rows, eig_.cols, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols, stream_);
129-
136+
int total = findCorners_gpu(eigTex_, eig_.rows, eig_.cols, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols, counterPtr_, stream_);
130137

131138
if (total == 0)
132139
{
133140
_corners.release();
141+
cudaSafeCall( cudaDestroyTextureObject(eigTex_) );
134142
return;
135143
}
136144

137145
sortCorners_gpu(eigTex_, tmpCorners_.ptr<float2>(), total, stream_);
138146

147+
cudaSafeCall( cudaDestroyTextureObject(eigTex_) );
148+
139149
if (minDistance_ < 1)
140150
{
141151
tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners, stream);

0 commit comments

Comments
 (0)