Skip to content

Commit 54528c5

Browse files
CarlosLeeGitpymumu
authored andcommitted
avoid gpu decode stuck
1 parent 6e93132 commit 54528c5

File tree

4 files changed

+89
-15
lines changed

4 files changed

+89
-15
lines changed

src/drivers/devices/cuda/flowunit/video_decoder/nvcodec_video_decoder.cc

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,39 @@
2323

2424
#define MIN_ALLOWABLE_DECODE_SURFACE_NUM 1
2525

26+
NvcodecConcurrencyLimiter *NvcodecConcurrencyLimiter::GetInstance() {
27+
static NvcodecConcurrencyLimiter limiter;
28+
return &limiter;
29+
}
30+
31+
void NvcodecConcurrencyLimiter::Init(uint32_t limit) {
32+
if (limit == 0) {
33+
limited_ = false;
34+
}
35+
36+
count_ = limit;
37+
}
38+
39+
void NvcodecConcurrencyLimiter::Acquire() {
40+
if (!limited_) {
41+
return;
42+
}
43+
44+
std::unique_lock<std::mutex> lock(count_lock_);
45+
count_cv_.wait(lock, [=] { return count_ > 0; });
46+
--count_;
47+
}
48+
49+
void NvcodecConcurrencyLimiter::Release() {
50+
if (!limited_) {
51+
return;
52+
}
53+
54+
std::unique_lock<std::mutex> lock(count_lock_);
55+
++count_;
56+
count_cv_.notify_one();
57+
}
58+
2659
#define NVDEC_THROW_ERROR(err_str, err_code) \
2760
throw NVDECException::MakeNVDECException(err_str, err_code, __FUNCTION__, \
2861
__FILE__, __LINE__);
@@ -87,7 +120,7 @@ NvcodecVideoDecoder::~NvcodecVideoDecoder() {
87120
modelbox::Status NvcodecVideoDecoder::Init(const std::string &device_id,
88121
AVCodecID codec_id,
89122
std::string &file_url,
90-
bool skip_err_frame) {
123+
bool skip_err_frame, bool no_delay) {
91124
gpu_id_ = std::stoi(device_id);
92125
MBLOG_INFO << "Init decode in gpu " << gpu_id_;
93126
// Use cuda runtime CUContext on same device in whole modelbox process to
@@ -122,8 +155,8 @@ modelbox::Status NvcodecVideoDecoder::Init(const std::string &device_id,
122155
videoParserParams.CodecType = codec_id_;
123156
videoParserParams.ulMaxNumDecodeSurfaces = 1;
124157
videoParserParams.ulMaxDisplayDelay =
125-
2; // setting ulMaxDisplayDelay to 2 achieves max decoding rate, based on
126-
// several tests.
158+
no_delay ? 0 : 2; // setting ulMaxDisplayDelay to 2 achieves max decoding
159+
// rate, based on several tests.
127160
videoParserParams.pUserData = (void *)this;
128161
videoParserParams.pfnSequenceCallback = HandleVideoSequenceProc;
129162
videoParserParams.pfnDecodePicture = HandlePictureDecodeProc;
@@ -170,7 +203,14 @@ modelbox::Status NvcodecVideoDecoder::Decode(
170203
return modelbox::STATUS_FAULT;
171204
}
172205

206+
NvcodecConcurrencyLimiter::GetInstance()->Acquire();
207+
is_limiter_released_ = false;
173208
CUDA_API_CALL(cuvidParseVideoData(video_parser_, &packet));
209+
if (!is_limiter_released_) {
210+
// might release when handle display
211+
NvcodecConcurrencyLimiter::GetInstance()->Release();
212+
is_limiter_released_ = true;
213+
}
174214

175215
for (size_t i = 0; i < frame_count_in_one_decode_; ++i) {
176216
auto frame = std::make_shared<NvcodecFrame>();
@@ -389,6 +429,9 @@ int32_t NvcodecVideoDecoder::HandlePictureDisplay(
389429
return 1;
390430
}
391431

432+
NvcodecConcurrencyLimiter::GetInstance()->Release();
433+
is_limiter_released_ = true;
434+
392435
++frame_count_in_one_decode_;
393436
SaveFrame(src_frame_ptr, src_pitch);
394437
SaveTimestamp(display_info->timestamp);

src/drivers/devices/cuda/flowunit/video_decoder/nvcodec_video_decoder.h

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
* limitations under the License.
1515
*/
1616

17-
1817
#ifndef MODELBOX_FLOWUNIT_NVCODEC_VIDEO_DECODER_H_
1918
#define MODELBOX_FLOWUNIT_NVCODEC_VIDEO_DECODER_H_
2019

21-
#include <modelbox/base/status.h>
2220
#include <cuda.h>
2321
#include <libavformat/avformat.h>
22+
#include <modelbox/base/status.h>
2423
#include <nvcuvid.h>
2524

25+
#include <condition_variable>
2626
#include <iostream>
2727
#include <map>
2828
#include <memory>
@@ -31,6 +31,25 @@
3131
#include <utility>
3232
#include <vector>
3333

34+
class NvcodecConcurrencyLimiter {
35+
public:
36+
static NvcodecConcurrencyLimiter *GetInstance();
37+
38+
void Init(uint32_t limit);
39+
40+
void Acquire();
41+
42+
void Release();
43+
44+
private:
45+
NvcodecConcurrencyLimiter() = default;
46+
47+
std::mutex count_lock_;
48+
std::condition_variable count_cv_;
49+
uint32_t count_{0};
50+
bool limited_{false};
51+
};
52+
3453
class NVDECException : public std::exception {
3554
public:
3655
NVDECException(std::string err_str, const CUresult err_code)
@@ -98,7 +117,8 @@ class NvcodecVideoDecoder {
98117
virtual ~NvcodecVideoDecoder();
99118

100119
modelbox::Status Init(const std::string &device_id, AVCodecID codec_id,
101-
std::string &file_url, bool skip_err_frame);
120+
std::string &file_url, bool skip_err_frame,
121+
bool no_delay);
102122

103123
modelbox::Status Decode(
104124
const std::shared_ptr<NvcodecPacket> &pkt,
@@ -115,7 +135,7 @@ class NvcodecVideoDecoder {
115135
modelbox::Status InitCuCtx(const std::string &device_id);
116136

117137
modelbox::Status GetCudaVideoCodec(AVCodecID codec_id,
118-
cudaVideoCodec &cuda_codec_id);
138+
cudaVideoCodec &cuda_codec_id);
119139

120140
std::string GetVideoCodecString(cudaVideoCodec cuda_codec_id);
121141

@@ -193,6 +213,8 @@ class NvcodecVideoDecoder {
193213
bool skip_err_frame_{false};
194214
int64_t latest_pts_{0};
195215
int32_t gpu_id_{0};
216+
217+
bool is_limiter_released_{false};
196218
};
197219

198-
#endif // MODELBOX_FLOWUNIT_NVCODEC_VIDEO_DECODER_H_
220+
#endif // MODELBOX_FLOWUNIT_NVCODEC_VIDEO_DECODER_H_

src/drivers/devices/cuda/flowunit/video_decoder/video_decoder_flowunit.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ modelbox::Status VideoDecoderFlowUnit::Open(
3535
}
3636

3737
skip_err_frame_ = opts->GetBool("skip_error_frame", false);
38+
concurrency_limit_ = opts->GetUint32("concurrency_limit", 0);
39+
NvcodecConcurrencyLimiter::GetInstance()->Init(concurrency_limit_);
3840
return modelbox::STATUS_OK;
3941
}
4042

@@ -146,11 +148,11 @@ modelbox::Status VideoDecoderFlowUnit::WriteData(
146148
std::shared_ptr<modelbox::DataContext> &data_ctx,
147149
std::vector<std::shared_ptr<NvcodecFrame>> &frame_list, bool eos,
148150
const std::string &file_url) {
149-
auto last_frame =
150-
std::static_pointer_cast<modelbox::Buffer>(data_ctx->GetPrivate(LAST_FRAME));
151+
auto last_frame = std::static_pointer_cast<modelbox::Buffer>(
152+
data_ctx->GetPrivate(LAST_FRAME));
151153
data_ctx->SetPrivate(LAST_FRAME, nullptr);
152-
auto color_cvt =
153-
std::static_pointer_cast<NppiColorConverter>(data_ctx->GetPrivate(CVT_CTX));
154+
auto color_cvt = std::static_pointer_cast<NppiColorConverter>(
155+
data_ctx->GetPrivate(CVT_CTX));
154156
auto frame_buff_list = data_ctx->Output(FRAME_INFO_OUTPUT);
155157
if (last_frame != nullptr) {
156158
frame_buff_list->PushBack(last_frame); // Send last frame in cache
@@ -263,8 +265,10 @@ modelbox::Status VideoDecoderFlowUnit::DataPre(
263265
}
264266

265267
auto video_decoder = std::make_shared<NvcodecVideoDecoder>();
268+
// when concurrency limit set, no delay must be true to avoid gpu cache
269+
auto no_delay = concurrency_limit_ != 0;
266270
auto ret = video_decoder->Init(GetBindDevice()->GetDeviceID(), *codec_id,
267-
*source_url, skip_err_frame_);
271+
*source_url, skip_err_frame_, no_delay);
268272
if (ret != modelbox::STATUS_SUCCESS) {
269273
MBLOG_ERROR << "Video decoder init failed";
270274
return modelbox::STATUS_FAULT;
@@ -311,6 +315,9 @@ MODELBOX_FLOWUNIT(VideoDecoderFlowUnit, desc) {
311315
desc.AddFlowUnitOption(modelbox::FlowUnitOption(
312316
"skip_error_frame", "bool", true, "false",
313317
"whether the video decoder skip the error frame"));
318+
desc.AddFlowUnitOption(modelbox::FlowUnitOption(
319+
"concurrency_limit", "int", false, "0",
320+
"limit gpu decode concurrency to avoid decode stuck"));
314321
}
315322

316323
MODELBOX_DRIVER_FLOWUNIT(desc) {

src/drivers/devices/cuda/flowunit/video_decoder/video_decoder_flowunit.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
* limitations under the License.
1515
*/
1616

17-
1817
#ifndef MODELBOX_FLOWUNIT_VIDEO_DECODER_CPU_H_
1918
#define MODELBOX_FLOWUNIT_VIDEO_DECODER_CPU_H_
2019

@@ -58,7 +57,8 @@ constexpr const char *FLOWUNIT_DESC =
5857
"\t\tField Name: shape, Type: vector<size_t>\n"
5958
"\t\tField Name: type, Type: ModelBoxDataType::MODELBOX_UINT8\n"
6059
"\t@Constraint: The flowuint 'video_decoder' must be used pair "
61-
"with 'video_demuxer. the output buffer meta fields 'pix_fmt' is 'brg_packed' or 'rgb_packed', 'layout' is 'hcw'.";
60+
"with 'video_demuxer. the output buffer meta fields 'pix_fmt' is "
61+
"'brg_packed' or 'rgb_packed', 'layout' is 'hcw'.";
6262
constexpr const char *CODEC_META = "codec_meta";
6363
constexpr const char *DECODER_CTX = "decoder_ctx";
6464
constexpr const char *CVT_CTX = "converter_ctx";
@@ -113,6 +113,8 @@ class VideoDecoderFlowUnit : public modelbox::FlowUnit {
113113
std::string out_pix_fmt_str_;
114114
bool skip_err_frame_{false};
115115
std::string device_id_;
116+
// limit decode concurrency to avoid decoder stuck bug in gpu driver
117+
uint32_t concurrency_limit_{0};
116118
};
117119

118120
#endif // MODELBOX_FLOWUNIT_VIDEO_DECODER_CPU_H_

0 commit comments

Comments
 (0)