Skip to content

Commit 7b97851

Browse files
authored
Merge pull request #3542 from cudawarped:cudacodec_videoreader_seek
cudacodec::VideoReader: allow frame seek on initialization #3542 Allow seeking of video source on initialization of `cudacodec::VideoReader` when new variable `VideoReaderInitParams::iFirstFrame` != 0. Dependant on opencv/opencv#24012 Fixes #3541. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
1 parent d51add3 commit 7b97851

File tree

7 files changed

+120
-46
lines changed

7 files changed

+120
-46
lines changed

modules/cudacodec/include/opencv2/cudacodec.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,14 @@ class CV_EXPORTS_W RawVideoSource
544544
@return `true` unless the property is unset set or not supported.
545545
*/
546546
virtual bool get(const int propertyId, double& propertyVal) const = 0;
547+
548+
/** @brief Retrieve the index of the first frame that will returned after construction.
549+
550+
@return index of the index of the first frame that will returned after construction.
551+
552+
@note To reduce the decoding overhead when initializing VideoReader to start its decoding from frame N, RawVideoSource should seek to the first valid key frame less than or equal to N and return that index here.
553+
*/
554+
virtual int getFirstFrameIdx() const = 0;
547555
};
548556

549557
/** @brief VideoReader initialization parameters
@@ -561,9 +569,10 @@ but it cannot go below the number determined by NVDEC.
561569
@param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to,
562570
defaults to the full frame.
563571
@param enableHistogram Request output of decoded luma histogram \a hist from VideoReader::nextFrame(GpuMat& frame, GpuMat& hist, Stream& stream), if hardware supported.
572+
@param firstFrameIdx Index of the first frame to seek to on initialization of the VideoReader.
564573
*/
565574
struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
566-
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0), enableHistogram(false){};
575+
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0), enableHistogram(false), firstFrameIdx(0){};
567576
CV_PROP_RW bool udpSource;
568577
CV_PROP_RW bool allowFrameDrop;
569578
CV_PROP_RW int minNumDecodeSurfaces;
@@ -572,6 +581,7 @@ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
572581
CV_PROP_RW cv::Rect srcRoi;
573582
CV_PROP_RW cv::Rect targetRoi;
574583
CV_PROP_RW bool enableHistogram;
584+
CV_PROP_RW int firstFrameIdx;
575585
};
576586

577587
/** @brief Creates video reader.

modules/cudacodec/src/ffmpeg_video_source.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,19 +169,21 @@ bool ParamSetsExist(unsigned char* parameterSets, const int szParameterSets, uns
169169
return paramSetStartCodeLen != 0 && packetStartCodeLen != 0 && parameterSets[paramSetStartCodeLen] == data[packetStartCodeLen];
170170
}
171171

172-
cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname, const std::vector<int>& _videoCaptureParams)
172+
cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname, const std::vector<int>& _videoCaptureParams, const int iMaxStartFrame)
173173
: videoCaptureParams(_videoCaptureParams)
174174
{
175175
if (!videoio_registry::hasBackend(CAP_FFMPEG))
176176
CV_Error(Error::StsNotImplemented, "FFmpeg backend not found");
177177

178-
cap.open(fname, CAP_FFMPEG, videoCaptureParams);
179-
if (!cap.isOpened())
178+
videoCaptureParams.push_back(CAP_PROP_FORMAT);
179+
videoCaptureParams.push_back(-1);
180+
if (!cap.open(fname, CAP_FFMPEG, videoCaptureParams))
180181
CV_Error(Error::StsUnsupportedFormat, "Unsupported video source");
181-
182-
if (!cap.set(CAP_PROP_FORMAT, -1)) // turn off video decoder (extract stream)
183-
CV_Error(Error::StsUnsupportedFormat, "Fetching of RAW video streams is not supported");
184182
CV_Assert(cap.get(CAP_PROP_FORMAT) == -1);
183+
if (iMaxStartFrame) {
184+
CV_Assert(cap.set(CAP_PROP_POS_FRAMES, iMaxStartFrame));
185+
firstFrameIdx = static_cast<int>(cap.get(CAP_PROP_POS_FRAMES));
186+
}
185187

186188
const int codecExtradataIndex = static_cast<int>(cap.get(CAP_PROP_CODEC_EXTRADATA_INDEX));
187189
Mat tmpExtraData;

modules/cudacodec/src/ffmpeg_video_source.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ namespace cv { namespace cudacodec { namespace detail {
5151
class FFmpegVideoSource : public RawVideoSource
5252
{
5353
public:
54-
FFmpegVideoSource(const String& fname, const std::vector<int>& params);
54+
FFmpegVideoSource(const String& fname, const std::vector<int>& params, const int iMaxStartFrame);
5555
~FFmpegVideoSource();
5656

5757
bool getNextPacket(unsigned char** data, size_t* size) CV_OVERRIDE;
@@ -66,12 +66,15 @@ class FFmpegVideoSource : public RawVideoSource
6666

6767
bool get(const int propertyId, double& propertyVal) const;
6868

69+
int getFirstFrameIdx() const { return firstFrameIdx; }
70+
6971
private:
7072
FormatInfo format_;
7173
VideoCapture cap;
7274
Mat rawFrame, extraData, dataWithHeader;
7375
int iFrame = 0;
7476
std::vector<int> videoCaptureParams;
77+
int firstFrameIdx = 0;
7578
};
7679

7780
}}}

modules/cudacodec/src/video_reader.cpp

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ namespace
112112
{
113113
public:
114114
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false,
115-
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect(), const bool enableHistogram = false);
115+
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect(), const bool enableHistogram = false, const int firstFrameIdx = 0);
116116
~VideoReaderImpl();
117117

118118
bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE;
@@ -135,6 +135,9 @@ namespace
135135
bool get(const int propertyId, double& propertyVal) const CV_OVERRIDE;
136136

137137
private:
138+
bool skipFrame();
139+
bool aquireFrameInfo(std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo, Stream& stream = Stream::Null());
140+
void releaseFrameInfo(const std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo);
138141
bool internalGrab(GpuMat & frame, GpuMat & histogram, Stream & stream);
139142
void waitForDecoderInit();
140143

@@ -154,6 +157,7 @@ namespace
154157
static const int rawPacketsBaseIdx = 2;
155158
ColorFormat colorFormat = ColorFormat::BGRA;
156159
static const String errorMsg;
160+
int iFrame = 0;
157161
};
158162

159163
const String VideoReaderImpl::errorMsg = "Parsing/Decoding video source failed, check GPU memory is available and GPU supports requested functionality.";
@@ -173,7 +177,7 @@ namespace
173177
}
174178

175179
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource,
176-
const Size targetSz, const Rect srcRoi, const Rect targetRoi, const bool enableHistogram) :
180+
const Size targetSz, const Rect srcRoi, const Rect targetRoi, const bool enableHistogram, const int firstFrameIdx) :
177181
videoSource_(source),
178182
lock_(0)
179183
{
@@ -190,6 +194,8 @@ namespace
190194
videoSource_->setVideoParser(videoParser_);
191195
videoSource_->start();
192196
waitForDecoderInit();
197+
for(iFrame = videoSource_->getFirstFrameIdx(); iFrame < firstFrameIdx; iFrame++)
198+
CV_Assert(skipFrame());
193199
videoSource_->updateFormat(videoDecoder_->format());
194200
}
195201

@@ -209,10 +215,7 @@ namespace
209215
CUvideoctxlock m_lock;
210216
};
211217

212-
bool VideoReaderImpl::internalGrab(GpuMat& frame, GpuMat& histogram, Stream& stream) {
213-
if (videoParser_->hasError())
214-
CV_Error(Error::StsError, errorMsg);
215-
cudacodec::FormatInfo fmt;
218+
bool VideoReaderImpl::aquireFrameInfo(std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo, Stream& stream) {
216219
if (frames_.empty())
217220
{
218221
CUVIDPARSERDISPINFO displayInfo;
@@ -234,34 +237,53 @@ namespace
234237

235238
bool isProgressive = displayInfo.progressive_frame != 0;
236239
const int num_fields = isProgressive ? 1 : 2 + displayInfo.repeat_first_field;
237-
fmt = videoDecoder_->format();
238-
videoSource_->updateFormat(fmt);
239240

240241
for (int active_field = 0; active_field < num_fields; ++active_field)
241242
{
242243
CUVIDPROCPARAMS videoProcParams;
243244
std::memset(&videoProcParams, 0, sizeof(CUVIDPROCPARAMS));
244245

245246
videoProcParams.progressive_frame = displayInfo.progressive_frame;
246-
videoProcParams.second_field = active_field;
247-
videoProcParams.top_field_first = displayInfo.top_field_first;
248-
videoProcParams.unpaired_field = (num_fields == 1);
247+
videoProcParams.second_field = active_field;
248+
videoProcParams.top_field_first = displayInfo.top_field_first;
249+
videoProcParams.unpaired_field = (num_fields == 1);
249250
videoProcParams.output_stream = StreamAccessor::getStream(stream);
250251

251252
frames_.push_back(std::make_pair(displayInfo, videoProcParams));
252253
}
253254
}
255+
else {
256+
for (auto& frame : frames_)
257+
frame.second.output_stream = StreamAccessor::getStream(stream);
258+
}
254259

255260
if (frames_.empty())
256261
return false;
257262

258-
std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS> frameInfo = frames_.front();
263+
frameInfo = frames_.front();
259264
frames_.pop_front();
265+
return true;
266+
}
267+
268+
void VideoReaderImpl::releaseFrameInfo(const std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo) {
269+
// release the frame, so it can be re-used in decoder
270+
if (frames_.empty())
271+
frameQueue_->releaseFrame(frameInfo.first);
272+
}
273+
274+
bool VideoReaderImpl::internalGrab(GpuMat& frame, GpuMat& histogram, Stream& stream) {
275+
if (videoParser_->hasError())
276+
CV_Error(Error::StsError, errorMsg);
277+
278+
std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS> frameInfo;
279+
if (!aquireFrameInfo(frameInfo, stream))
280+
return false;
260281

261282
{
262283
VideoCtxAutoLock autoLock(lock_);
263284

264285
unsigned long long cuHistogramPtr = 0;
286+
const cudacodec::FormatInfo fmt = videoDecoder_->format();
265287
if (fmt.enableHistogram)
266288
frameInfo.second.histogram_dptr = &cuHistogramPtr;
267289

@@ -281,10 +303,16 @@ namespace
281303
videoDecoder_->unmapFrame(decodedFrame);
282304
}
283305

284-
// release the frame, so it can be re-used in decoder
285-
if (frames_.empty())
286-
frameQueue_->releaseFrame(frameInfo.first);
306+
releaseFrameInfo(frameInfo);
307+
iFrame++;
308+
return true;
309+
}
287310

311+
bool VideoReaderImpl::skipFrame() {
312+
std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS> frameInfo;
313+
if (!aquireFrameInfo(frameInfo))
314+
return false;
315+
releaseFrameInfo(frameInfo);
288316
return true;
289317
}
290318

@@ -399,6 +427,10 @@ namespace
399427
}
400428

401429
bool VideoReaderImpl::get(const int propertyId, double& propertyVal) const {
430+
if (propertyId == cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES) {
431+
propertyVal = static_cast<double>(iFrame);
432+
return true;
433+
}
402434
return videoSource_->get(propertyId, propertyVal);
403435
}
404436

@@ -421,28 +453,26 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const
421453
CV_Assert(!filename.empty());
422454

423455
Ptr<VideoSource> videoSource;
424-
425456
try
426457
{
427458
// prefer ffmpeg to cuvidGetSourceVideoFormat() which doesn't always return the corrct raw pixel format
428-
Ptr<RawVideoSource> source(new FFmpegVideoSource(filename, sourceParams));
459+
Ptr<RawVideoSource> source(new FFmpegVideoSource(filename, sourceParams, params.firstFrameIdx));
429460
videoSource.reset(new RawVideoSourceWrapper(source, params.rawMode));
430461
}
431462
catch (...)
432463
{
433464
if (sourceParams.size()) throw;
434465
videoSource.reset(new CuvidVideoSource(filename));
435466
}
436-
437467
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
438-
params.srcRoi, params.targetRoi, params.enableHistogram);
468+
params.srcRoi, params.targetRoi, params.enableHistogram, params.firstFrameIdx);
439469
}
440470

441471
Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params)
442472
{
443473
Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, params.rawMode));
444474
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
445-
params.srcRoi, params.targetRoi, params.enableHistogram);
475+
params.srcRoi, params.targetRoi, params.enableHistogram, params.firstFrameIdx);
446476
}
447477

448478
void cv::cudacodec::MapHist(const GpuMat& hist, Mat& histFull) {

modules/cudacodec/src/video_source.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ bool cv::cudacodec::detail::RawVideoSourceWrapper::get(const int propertyId, dou
7676
return source_->get(propertyId, propertyVal);
7777
}
7878

79+
int cv::cudacodec::detail::RawVideoSourceWrapper::getFirstFrameIdx() const {
80+
return source_->getFirstFrameIdx();
81+
}
82+
7983
void cv::cudacodec::detail::RawVideoSourceWrapper::start()
8084
{
8185
stop_ = false;

modules/cudacodec/src/video_source.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class VideoSource
5858
virtual FormatInfo format() const = 0;
5959
virtual void updateFormat(const FormatInfo& videoFormat) = 0;
6060
virtual bool get(const int propertyId, double& propertyVal) const { return false; }
61+
virtual int getFirstFrameIdx() const { return 0; }
6162
virtual void start() = 0;
6263
virtual void stop() = 0;
6364
virtual bool isStarted() const = 0;
@@ -91,6 +92,7 @@ class RawVideoSourceWrapper : public VideoSource
9192
FormatInfo format() const CV_OVERRIDE;
9293
void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
9394
bool get(const int propertyId, double& propertyVal) const CV_OVERRIDE;
95+
int getFirstFrameIdx() const CV_OVERRIDE;
9496
void start() CV_OVERRIDE;
9597
void stop() CV_OVERRIDE;
9698
bool isStarted() const CV_OVERRIDE;

modules/cudacodec/test/test_video.cpp

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ struct CheckParams : SetDevice
113113
{
114114
};
115115

116+
struct Seek : SetDevice
117+
{
118+
};
119+
116120
#if defined(HAVE_NVCUVID)
117121
//////////////////////////////////////////////////////
118122
// VideoReader
@@ -542,36 +546,22 @@ CUDA_TEST_P(CheckParams, Reader)
542546
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_OPEN_TIMEOUT_MSEC, msActual));
543547
ASSERT_EQ(msActual, msReference);
544548
}
545-
546-
{
547-
std::vector<bool> exceptionsThrown = { false,true };
548-
std::vector<int> capPropFormats = { -1,0 };
549-
for (int i = 0; i < capPropFormats.size(); i++) {
550-
bool exceptionThrown = false;
551-
try {
552-
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {
553-
cv::VideoCaptureProperties::CAP_PROP_FORMAT, capPropFormats.at(i) });
554-
}
555-
catch (cv::Exception &ex) {
556-
if (ex.code == Error::StsUnsupportedFormat)
557-
exceptionThrown = true;
558-
}
559-
ASSERT_EQ(exceptionThrown, exceptionsThrown.at(i));
560-
}
561-
}
562549
}
563550

564551
CUDA_TEST_P(CheckParams, CaptureProps)
565552
{
566553
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
567554
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
568-
double width, height, fps;
555+
double width, height, fps, iFrame;
569556
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_FRAME_WIDTH, width));
570557
ASSERT_EQ(672, width);
571558
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT, height));
572559
ASSERT_EQ(384, height);
573560
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_FPS, fps));
574561
ASSERT_EQ(24, fps);
562+
ASSERT_TRUE(reader->grab());
563+
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES, iFrame));
564+
ASSERT_EQ(iFrame, 1.);
575565
}
576566

577567
CUDA_TEST_P(CheckDecodeSurfaces, Reader)
@@ -619,6 +609,37 @@ CUDA_TEST_P(CheckInitParams, Reader)
619609
ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE, rawMode) && static_cast<bool>(rawMode) == params.rawMode);
620610
}
621611

612+
CUDA_TEST_P(Seek, Reader)
613+
{
614+
#if defined(WIN32)
615+
throw SkipTestException("Test disabled on Windows until the FFMpeg wrapper is updated to include PR24012.");
616+
#endif
617+
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
618+
// seek to a non key frame
619+
const int firstFrameIdx = 18;
620+
621+
GpuMat frameGs;
622+
{
623+
cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
624+
ASSERT_TRUE(readerGs->set(cudacodec::ColorFormat::GRAY));
625+
for (int i = 0; i <= firstFrameIdx; i++)
626+
ASSERT_TRUE(readerGs->nextFrame(frameGs));
627+
}
628+
629+
cudacodec::VideoReaderInitParams params;
630+
params.firstFrameIdx = firstFrameIdx;
631+
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
632+
double iFrame = 0.;
633+
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES, iFrame));
634+
ASSERT_EQ(iFrame, static_cast<double>(firstFrameIdx));
635+
ASSERT_TRUE(reader->set(cudacodec::ColorFormat::GRAY));
636+
GpuMat frame;
637+
ASSERT_TRUE(reader->nextFrame(frame));
638+
ASSERT_EQ(cuda::norm(frameGs, frame, NORM_INF), 0.0);
639+
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES, iFrame));
640+
ASSERT_EQ(iFrame, static_cast<double>(firstFrameIdx+1));
641+
}
642+
622643
#endif // HAVE_NVCUVID
623644

624645
#if defined(HAVE_NVCUVID) && defined(HAVE_NVCUVENC)
@@ -958,5 +979,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckInitParams, testing::Combine(
958979
testing::Values("highgui/video/big_buck_bunny.mp4"),
959980
testing::Values(true,false), testing::Values(true,false), testing::Values(true,false)));
960981

982+
INSTANTIATE_TEST_CASE_P(CUDA_Codec, Seek, ALL_DEVICES);
983+
961984
#endif // HAVE_NVCUVID || HAVE_NVCUVENC
962985
}} // namespace

0 commit comments

Comments
 (0)