Skip to content

Commit 5bf7569

Browse files
committed
cudaoptflow: fix FarnebackOpticalFlow internal stream synchronization when used with an external CUDA stream
1 parent e8ec6ac commit 5bf7569

File tree

2 files changed

+66
-0
lines changed

2 files changed

+66
-0
lines changed

modules/cudaoptflow/src/farneback.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ namespace
140140
int polyN_;
141141
double polySigma_;
142142
int flags_;
143+
Event sourceStreamComplete;
143144

144145
private:
145146
void prepareGaussian(
@@ -317,7 +318,10 @@ namespace
317318

318319
Stream streams[5];
319320
if (stream)
321+
{
320322
streams[0] = stream;
323+
sourceStreamComplete.record();
324+
}
321325

322326
Size size = frame0.size();
323327
GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;
@@ -336,6 +340,8 @@ namespace
336340
}
337341

338342
frame0.convertTo(frames_[0], CV_32F, streams[0]);
343+
if (stream)
344+
streams[1].waitEvent(sourceStreamComplete);
339345
frame1.convertTo(frames_[1], CV_32F, streams[1]);
340346

341347
if (fastPyramids_)

modules/cudaoptflow/test/test_optflow.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,66 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlow, testing::Combine(
355355
testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)),
356356
testing::Values(UseInitFlow(false), UseInitFlow(true))));
357357

358+
359+
PARAM_TEST_CASE(FarnebackOpticalFlowAsync, cv::cuda::DeviceInfo, PyrScale, PolyN, FarnebackOptFlowFlags)
360+
{
361+
cv::cuda::DeviceInfo devInfo;
362+
double pyrScale;
363+
int polyN;
364+
int flags;
365+
366+
virtual void SetUp()
367+
{
368+
devInfo = GET_PARAM(0);
369+
pyrScale = GET_PARAM(1);
370+
polyN = GET_PARAM(2);
371+
flags = GET_PARAM(3);
372+
373+
cv::cuda::setDevice(devInfo.deviceID());
374+
}
375+
};
376+
377+
CUDA_TEST_P(FarnebackOpticalFlowAsync, Accuracy)
378+
{
379+
cv::Mat frame0Mat = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
380+
ASSERT_FALSE(frame0Mat.empty());
381+
382+
cv::Mat frame1Mat = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
383+
ASSERT_FALSE(frame1Mat.empty());
384+
385+
cv::Ptr<cv::cuda::FarnebackOpticalFlow> farn = cv::cuda::FarnebackOpticalFlow::create();
386+
farn->setPyrScale(pyrScale);
387+
farn->setPolyN(polyN);
388+
farn->setPolySigma(1.1);
389+
farn->setFlags(flags);
390+
391+
Stream sourceStream;
392+
HostMem dummyHost(4000, 4000, CV_8UC3), frame0(frame0Mat), frame1(frame1Mat);
393+
GpuMat d_flow, dummyDevice(dummyHost.size(), dummyHost.type()), frame0Device(frame0.size(), frame0.type()), frame1Device(frame1.size(), frame1.type());
394+
395+
// initialize and warm up CUDA kernels to ensure this doesn't occur during the test
396+
farn->calc(loadMat(frame0Mat), loadMat(frame1Mat), d_flow);
397+
d_flow.setTo(0);
398+
399+
frame0Device.upload(frame0, sourceStream);
400+
// place extra work in sourceStream to test internal stream synchronization by delaying the upload of frame1 that stream, see https://github.com/opencv/opencv/issues/24540
401+
dummyDevice.upload(dummyHost, sourceStream);
402+
frame1Device.upload(frame1, sourceStream);
403+
farn->calc(frame0Device, frame1Device, d_flow, sourceStream);
404+
405+
Mat flow;
406+
cv::calcOpticalFlowFarneback(
407+
frame0, frame1, flow, farn->getPyrScale(), farn->getNumLevels(), farn->getWinSize(),
408+
farn->getNumIters(), farn->getPolyN(), farn->getPolySigma(), farn->getFlags());
409+
EXPECT_MAT_SIMILAR(flow, d_flow, 1e-4);
410+
}
411+
412+
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlowAsync, testing::Combine(
413+
ALL_DEVICES,
414+
testing::Values(PyrScale(0.3)),
415+
testing::Values(PolyN(5)),
416+
testing::Values(FarnebackOptFlowFlags(0))));
417+
358418
//////////////////////////////////////////////////////
359419
// OpticalFlowDual_TVL1
360420

0 commit comments

Comments
 (0)