Skip to content

Commit 5e0783e

Browse files
committed
NVIDIA Optical Flow Integration in OpenCV
1 parent f0d30f2 commit 5e0783e

File tree

7 files changed

+1337
-0
lines changed

7 files changed

+1337
-0
lines changed

modules/cudaoptflow/CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,22 @@ set(the_description "CUDA-accelerated Optical Flow")
77
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow)
88

99
ocv_define_module(cudaoptflow opencv_video opencv_optflow opencv_cudaarithm opencv_cudawarping opencv_cudaimgproc OPTIONAL opencv_cudalegacy WRAP python)
10+
11+
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT "79c6cee80a2df9a196f20afd6b598a9810964c32")
12+
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5 "ca5acedee6cb45d0ec610a6732de5c15")
13+
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_1_0_Headers")
14+
ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}.zip"
15+
HASH ${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5}
16+
URL
17+
"https://github.com/NVIDIA/NVIDIAOpticalFlowSDK/archive/"
18+
DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}"
19+
STATUS NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS
20+
ID "NVIDIA_OPTICAL_FLOW"
21+
UNPACK RELATIVE_URL)
22+
23+
if(NOT NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS)
24+
message(STATUS "Failed to download NVIDIA_Optical_Flow_1_0 Headers")
25+
else()
26+
add_definitions(-DHAVE_NVIDIA_OPTFLOW=1)
27+
ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}")
28+
endif()

modules/cudaoptflow/include/opencv2/cudaoptflow.hpp

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,47 @@ class CV_EXPORTS_W SparseOpticalFlow : public Algorithm
102102
OutputArray err = cv::noArray(),
103103
Stream& stream = Stream::Null()) = 0;
104104
};
105+
/** @brief Base Interface for optical flow algorithms using NVIDIA Optical Flow SDK.
106+
*/
107+
class CV_EXPORTS_W NvidiaHWOpticalFlow : public Algorithm
108+
{
109+
public:
110+
/** @brief Calculates Optical Flow using NVIDIA Optical Flow SDK.
111+
112+
* NVIDIA GPUs starting with Turing contain a dedicated hardware accelerator for computing optical flow vectors between pairs of images.
113+
* The optical flow hardware accelerator generates block-based optical flow vectors.
114+
* The size of the block depends on hardware in use, and can be queried using the function getGridSize().
115+
* The block-based flow vectors generated by the hardware can be converted to dense representation (i.e. per-pixel flow vectors) using upSampler() helper function, if needed.
116+
* The flow vectors are stored in CV_16SC2 format with x and y components of each flow vector in 16-bit signed fixed point representation S10.5.
117+
118+
@param inputImage Input image.
119+
@param referenceImage Reference image of the same size and the same type as input image.
120+
@param flow A buffer consisting of inputImage.Size() / getGridSize() flow vectors in CV_16SC2 format.
121+
@param stream Stream for the asynchronous version.
122+
@param hint Hint buffer if client provides external hints. Must have same size as flow buffer.
123+
Caller can provide flow vectors as hints for optical flow calculation.
124+
@param cost Cost buffer contains numbers indicating the confidence associated with each of the generated flow vectors.
125+
Higher the cost, lower the confidence. Cost buffer is of type CV_32SC1.
126+
127+
@note
128+
- Client must use critical sections around each calc() function if calling it from multiple threads.
129+
*/
130+
CV_WRAP virtual void calc(
131+
InputArray inputImage,
132+
InputArray referenceImage,
133+
InputOutputArray flow,
134+
Stream& stream = Stream::Null(),
135+
InputArray hint = cv::noArray(),
136+
OutputArray cost = cv::noArray()) = 0;
137+
138+
/** @brief Releases all buffers, contexts and device pointers.
139+
*/
140+
CV_WRAP virtual void collectGarbage() = 0;
141+
142+
/** @brief Returns grid size of output buffer as per the hardware's capability.
143+
*/
144+
CV_WRAP virtual int getGridSize() const = 0;
145+
};
105146

106147
//
107148
// BroxOpticalFlow
@@ -342,6 +383,70 @@ class CV_EXPORTS_W OpticalFlowDual_TVL1 : public DenseOpticalFlow
342383
bool useInitialFlow = false);
343384
};
344385

386+
//
387+
// NvidiaOpticalFlow
388+
//
389+
390+
/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 1.0.
391+
@note
392+
- A sample application demonstrating the use of NVIDIA Optical Flow can be found at
393+
opencv_source_code/samples/gpu/nvidia_optical_flow.cpp
394+
- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at
395+
opencv_source_code/samples/gpu/optical_flow.cpp
396+
*/
397+
398+
class CV_EXPORTS_W NvidiaOpticalFlow_1_0 : public NvidiaHWOpticalFlow
399+
{
400+
public:
401+
/**
402+
* Supported optical flow performance levels.
403+
*/
404+
enum NVIDIA_OF_PERF_LEVEL
405+
{
406+
NV_OF_PERF_LEVEL_UNDEFINED,
407+
NV_OF_PERF_LEVEL_SLOW = 5, /**< Slow perf level results in lowest performance and best quality */
408+
NV_OF_PERF_LEVEL_MEDIUM = 10, /**< Medium perf level results in low performance and medium quality */
409+
NV_OF_PERF_LEVEL_FAST = 20, /**< Fast perf level results in high performance and low quality */
410+
NV_OF_PERF_LEVEL_MAX
411+
};
412+
413+
/** @brief The NVIDIA optical flow hardware generates flow vectors at granularity gridSize, which can be queried via function getGridSize().
414+
* Upsampler() helper function converts the hardware-generated flow vectors to dense representation (1 flow vector for each pixel)
415+
* using nearest neighbour upsampling method.
416+
417+
@param flow Buffer of type CV_16FC2 containing flow vectors generated by calc().
418+
@param width Width of the input image in pixels for which these flow vectors were generated.
419+
@param height Height of the input image in pixels for which these flow vectors were generated.
420+
@param gridSize Granularity of the optical flow vectors returned by calc() function. Can be queried using getGridSize().
421+
@param upsampledFlow Buffer of type CV_32FC2, containing upsampled flow vectors, each flow vector for 1 pixel, in the pitch-linear layout.
422+
*/
423+
CV_WRAP virtual void upSampler(InputArray flow, int width, int height,
424+
int gridSize, InputOutputArray upsampledFlow) = 0;
425+
426+
/** @brief Instantiate NVIDIA Optical Flow
427+
428+
@param width Width of input image in pixels.
429+
@param height Height of input image in pixels.
430+
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
431+
Defaults to NV_OF_PERF_LEVEL_SLOW.
432+
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
433+
generated in previous call to calc() as internal hints for the current call to calc().
434+
Useful when computing flow vectors between successive video frames. Defaults to false.
435+
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
436+
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
437+
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
438+
*/
439+
CV_WRAP static Ptr<NvidiaOpticalFlow_1_0> create(
440+
int width,
441+
int height,
442+
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset
443+
= cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
444+
bool enableTemporalHints = false,
445+
bool enableExternalHints = false,
446+
bool enableCostBuffer = false,
447+
int gpuId = 0);
448+
};
449+
345450
//! @}
346451

347452
}} // namespace cv { namespace cuda {

modules/cudaoptflow/perf/perf_optflow.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,4 +326,57 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1,
326326
}
327327
}
328328

329+
//////////////////////////////////////////////////////
330+
// NvidiaOpticalFlow_1_0
331+
332+
PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
333+
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
334+
{
335+
declare.time(10);
336+
337+
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
338+
ASSERT_FALSE(frame0.empty());
339+
340+
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
341+
ASSERT_FALSE(frame1.empty());
342+
343+
const int width = frame0.size().width;
344+
const int height = frame0.size().height;
345+
const bool enableTemporalHints = false;
346+
const bool enableExternalHints = false;
347+
const bool enableCostBuffer = false;
348+
const int gpuid = 0;
349+
350+
if (PERF_RUN_CUDA())
351+
{
352+
const cv::cuda::GpuMat d_frame0(frame0);
353+
const cv::cuda::GpuMat d_frame1(frame1);
354+
cv::cuda::GpuMat d_flow;
355+
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
356+
try
357+
{
358+
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
359+
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST,
360+
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
361+
}
362+
catch (const cv::Exception& e)
363+
{
364+
if(e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
365+
throw SkipTestException("Current configuration is not supported");
366+
throw;
367+
}
368+
369+
TEST_CYCLE() d_nvof->calc(d_frame0, d_frame1, d_flow);
370+
371+
cv::cuda::GpuMat flow[2];
372+
cv::cuda::split(d_flow, flow);
373+
374+
cv::cuda::GpuMat u = flow[0];
375+
cv::cuda::GpuMat v = flow[1];
376+
377+
CUDA_SANITY_CHECK(u, 1e-10);
378+
CUDA_SANITY_CHECK(v, 1e-10);
379+
}
380+
}
381+
329382
}} // namespace

0 commit comments

Comments
 (0)