Skip to content

Commit 656b20a

Browse files
author
Maxim Pashchenkov
authored
Merge pull request opencv#19070 from mpashchenkov:mp/onnx-gframe
G-API: Support GFrame for ONNX infer * Added GFrame for ONNX * Cut test * Removed IE from assert * Review comments * Added const/bbot rstrt * View instead unique_ptr in func. sig. * Added extractMat function, ONNXCompiled contains exMat - cv::Mat with non processed input data * Added meta check for inferList2
1 parent 50bb344 commit 656b20a

File tree

2 files changed

+395
-28
lines changed

2 files changed

+395
-28
lines changed

modules/gapi/src/backends/onnx/gonnxbackend.cpp

Lines changed: 130 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,14 @@
1313
#include <ade/util/zip_range.hpp>
1414
#include <opencv2/gapi/infer.hpp>
1515
#include <opencv2/gapi/own/convert.hpp>
16+
#include <opencv2/gapi/gframe.hpp>
1617

1718
#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
1819

20+
namespace {
21+
struct ONNXCallContext;
22+
}
23+
1924
namespace cv {
2025
namespace gimpl {
2126
namespace onnx {
@@ -64,6 +69,8 @@ struct TensorInfo {
6469
cv::util::optional<MeanStdev> mstd;
6570
};
6671

72+
using Views = std::vector<std::unique_ptr<cv::MediaFrame::View>>;
73+
6774
class ONNXCompiled {
6875
// ONNX Resources
6976
// NOTE: Env must live with the session, otherwise segfaults.
@@ -98,9 +105,12 @@ class ONNXCompiled {
98105
std::size_t numInputs() const { return params.num_in; }
99106
std::size_t numOutputs() const { return params.num_out; }
100107
void setInput(int i, const cv::Mat &m);
101-
void setOutput(int i, cv::Mat &m);
108+
void setOutput(int idx, cv::Mat &m);
102109
cv::Mat allocOutput(int i) const;
103-
110+
// Gets exMat from input
111+
void extractMat(ONNXCallContext &ctx, const size_t in_idx, Views &views);
112+
// Extracted cv::Mat from input cv::Mat/cv::MediaFrame
113+
cv::Mat exMat;
104114
// Run with the assigned inputs/outputs
105115
void run();
106116
};
@@ -256,6 +266,26 @@ inline void preprocess(const cv::Mat& src,
256266
}
257267
}
258268

269+
void preprocess(const cv::MediaFrame::View& view,
270+
const cv::GFrameDesc& desc,
271+
cv::Mat& dst) {
272+
// This overload constructs cv::Mat from cv::MediaFrame
273+
switch (desc.fmt) {
274+
case cv::MediaFormat::BGR: {
275+
dst = cv::Mat(desc.size, CV_8UC3, view.ptr[0], view.stride[0]);
276+
break;
277+
}
278+
case cv::MediaFormat::NV12: {
279+
const auto y_plane = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]);
280+
const auto uv_plane = cv::Mat(desc.size / 2, CV_8UC2, view.ptr[1], view.stride[1]);
281+
cvtColorTwoPlane(y_plane, uv_plane, dst, cv::COLOR_YUV2BGR_NV12);
282+
break;
283+
}
284+
default:
285+
GAPI_Assert(false && "Unsupported media format for ONNX backend");
286+
}
287+
}
288+
259289
template <typename T>
260290
inline Ort::Value createTensor(const Ort::MemoryInfo& memory_info,
261291
const cv::gimpl::onnx::TensorInfo& tensor_params,
@@ -297,7 +327,7 @@ struct ONNXUnit {
297327
struct ONNXCallContext {
298328
// Input parameters passed to an inference operation.
299329
std::vector<cv::GArg> args;
300-
330+
cv::GShapes in_shapes;
301331
//FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
302332
//to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
303333
//once on enter for input and output arguments, and once before return for output arguments only
@@ -312,6 +342,11 @@ struct ONNXCallContext {
312342
const cv::Mat& inMat(std::size_t input) {
313343
return inArg<cv::Mat>(input);
314344
}
345+
346+
const cv::MediaFrame& inFrame(std::size_t input) {
347+
return inArg<cv::MediaFrame>(input);
348+
}
349+
315350
cv::Mat& outMatR(std::size_t output) {
316351
return *cv::util::get<cv::Mat*>(results.at(output));
317352
}
@@ -403,7 +438,8 @@ cv::GArg cv::gimpl::onnx::GONNXExecutable::packArg(const cv::GArg &arg) {
403438
GAPI_Assert( arg.kind != cv::detail::ArgKind::GMAT
404439
&& arg.kind != cv::detail::ArgKind::GSCALAR
405440
&& arg.kind != cv::detail::ArgKind::GARRAY
406-
&& arg.kind != cv::detail::ArgKind::GOPAQUE);
441+
&& arg.kind != cv::detail::ArgKind::GOPAQUE
442+
&& arg.kind != cv::detail::ArgKind::GFRAME);
407443

408444
if (arg.kind != cv::detail::ArgKind::GOBJREF) {
409445
util::throw_error(std::logic_error("Inference supports G-types ONLY!"));
@@ -425,6 +461,8 @@ cv::GArg cv::gimpl::onnx::GONNXExecutable::packArg(const cv::GArg &arg) {
425461
// (and constructed by either bindIn/Out or resetInternal)
426462
case GShape::GOPAQUE: return GArg(m_res.slot<cv::detail::OpaqueRef>().at(ref.id));
427463

464+
case GShape::GFRAME: return GArg(m_res.slot<cv::MediaFrame>().at(ref.id));
465+
428466
default:
429467
util::throw_error(std::logic_error("Unsupported GShape type"));
430468
break;
@@ -451,8 +489,16 @@ void cv::gimpl::onnx::GONNXExecutable::run(std::vector<InObj> &&input_objs,
451489
context.args.reserve(op.args.size());
452490
using namespace std::placeholders;
453491
ade::util::transform(op.args,
454-
std::back_inserter(context.args),
455-
std::bind(&GONNXExecutable::packArg, this, _1));
492+
std::back_inserter(context.args),
493+
std::bind(&GONNXExecutable::packArg, this, _1));
494+
495+
// NB: Need to store inputs shape to recognize GFrame/GMat
496+
context.in_shapes.reserve(op.args.size());
497+
ade::util::transform(op.args,
498+
std::back_inserter(context.in_shapes),
499+
[](const cv::GArg& arg) {
500+
return arg.get<cv::gimpl::RcDesc>().shape;
501+
});
456502

457503
// - Output parameters.
458504
for (const auto &out_it : ade::util::indexed(op.outs)) {
@@ -590,13 +636,32 @@ cv::GMatDesc ONNXCompiled::outMeta(int idx) const {
590636
toCV(out_tensor_info[ort_idx].dims));
591637
}
592638

593-
void ONNXCompiled::setInput(int i, const cv::Mat &m) {
594-
const auto in_idx = i;
639+
void ONNXCompiled::setInput(int in_idx, const cv::Mat &m) {
640+
GAPI_Assert(!m.empty() && "Input data can't be empty!");
595641
const auto in_name = params.input_names[in_idx];
596642
const auto ort_idx = getIdxByName(in_tensor_info, in_name);
597643
preprocess(m, in_tensor_info[ort_idx], in_data[in_idx]);
598644
}
599645

646+
void ONNXCompiled::extractMat(ONNXCallContext &ctx, const size_t in_idx, Views& views) {
647+
switch (ctx.in_shapes[in_idx]) {
648+
case cv::GShape::GFRAME: {
649+
const cv::MediaFrame& frame = ctx.inFrame(in_idx);
650+
views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R)));
651+
GAPI_Assert(views.size() <= numInputs());
652+
preprocess(*views.back(), frame.desc(), exMat);
653+
break;
654+
}
655+
case cv::GShape::GMAT: {
656+
exMat = ctx.inMat(in_idx);
657+
break;
658+
}
659+
default: {
660+
GAPI_Assert("Unsupported input shape for ONNX backend");
661+
}
662+
}
663+
}
664+
600665
void ONNXCompiled::setOutput(int i, cv::Mat &m) {
601666
// FIXME: No need in double-indexing?
602667
out_data[i] = m;
@@ -678,6 +743,23 @@ void ONNXCompiled::run() {
678743
Run(in_data, out_data);
679744
}
680745

746+
static void checkInputMeta(const cv::GMetaArg mm) {
747+
switch (mm.index()) {
748+
case cv::GMetaArg::index_of<cv::GMatDesc>(): break;
749+
case cv::GMetaArg::index_of<cv::GFrameDesc>(): {
750+
const auto &meta = util::get<cv::GFrameDesc>(mm);
751+
switch (meta.fmt) {
752+
case cv::MediaFormat::NV12: break;
753+
case cv::MediaFormat::BGR: break;
754+
default:
755+
GAPI_Assert(false && "Unsupported media format for ONNX backend");
756+
} break;
757+
} break;
758+
default:
759+
util::throw_error(std::runtime_error("Unsupported input meta for ONNX backend"));
760+
}
761+
}
762+
681763
struct Infer: public cv::detail::KernelTag {
682764
using API = cv::GInferBase;
683765
static cv::gapi::GBackend backend() { return cv::gapi::onnx::backend(); }
@@ -695,8 +777,7 @@ struct Infer: public cv::detail::KernelTag {
695777
GAPI_Assert(uu.oc->numInputs() == in_metas.size()
696778
&& "Known input layers count doesn't match input meta count");
697779
for (auto &&mm : in_metas) {
698-
GAPI_Assert(util::holds_alternative<cv::GMatDesc>(mm)
699-
&& "Non-GMat inputs are not supported");
780+
checkInputMeta(mm);
700781
}
701782
for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
702783
result.emplace_back(uu.oc->outMeta(idx));
@@ -705,8 +786,10 @@ struct Infer: public cv::detail::KernelTag {
705786
}
706787

707788
static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
789+
Views views;
708790
for (auto &&idx : ade::util::iota(uu.oc->numInputs())) {
709-
uu.oc->setInput(idx, ctx.inMat(idx));
791+
uu.oc->extractMat(ctx, idx, views);
792+
uu.oc->setInput(idx, uu.oc->exMat);
710793
}
711794
for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
712795
uu.oc->setOutput(idx, ctx.outMatR(idx));
@@ -730,20 +813,20 @@ struct InferROI: public cv::detail::KernelTag {
730813
const auto &uu = gm.metadata(nh).get<ONNXUnit>();
731814
GAPI_Assert(1u == uu.oc->numInputs());
732815
GAPI_Assert(2u == in_metas.size());
733-
816+
checkInputMeta(in_metas.at(1));
734817
for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
735818
result.emplace_back(uu.oc->outMeta(idx));
736819
}
737820
return result;
738821
}
739822

740823
static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
824+
Views views;
741825
// non-generic version for now, per the InferROI's definition
742826
GAPI_Assert(uu.oc->numInputs() == 1u);
743827
const auto& this_roi = ctx.inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
744-
const auto this_mat = ctx.inMat(1);
745-
746-
uu.oc->setInput(0, this_mat(this_roi));
828+
uu.oc->extractMat(ctx, 1, views);
829+
uu.oc->setInput(0, uu.oc->exMat(this_roi));
747830
for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
748831
uu.oc->setOutput(idx, ctx.outMatR(idx));
749832
}
@@ -769,10 +852,8 @@ struct InferList: public cv::detail::KernelTag {
769852
&& "Known input layers count doesn't match input meta count");
770853

771854
for (auto i : ade::util::iota(uu.oc->numInputs())) {
772-
const auto & mm = in_metas[i + 1];
773-
774-
GAPI_Assert(util::holds_alternative<cv::GMatDesc>(mm)
775-
&& "Non-GMat inputs are not supported");
855+
const auto &mm = in_metas[i + 1];
856+
checkInputMeta(mm);
776857
}
777858

778859
// roi-list version is much easier at the moment.
@@ -784,19 +865,20 @@ struct InferList: public cv::detail::KernelTag {
784865
}
785866

786867
static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
868+
Views views;
787869
// non-generic version for now:
788870
// - assumes input 0 is always ROI list
789871
// - assumes all inputs/outputs are always Mats
790872
GAPI_Assert(uu.oc->numInputs() == 1); // roi list is not counted in net's inputs
791873

792874
const auto& in_roi_vec = ctx.inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
793-
const cv::Mat this_mat = ctx.inMat(1u);
794875

795876
for (auto i : ade::util::iota(uu.oc->numOutputs())) {
796877
ctx.outVecR<cv::Mat>(i).clear();
797878
}
879+
uu.oc->extractMat(ctx, 1, views);
798880
for (const auto &rc : in_roi_vec) {
799-
uu.oc->setInput(0, this_mat(rc));
881+
uu.oc->setInput(0, uu.oc->exMat(rc));
800882
std::vector<cv::Mat> out_mats(uu.oc->numOutputs());
801883
for (auto i : ade::util::iota(uu.oc->numOutputs())) {
802884
out_mats[i] = uu.oc->allocOutput(i);
@@ -837,10 +919,30 @@ struct InferList2: public cv::detail::KernelTag {
837919
// FIXME: this is filtering not done, actually! GArrayDesc has
838920
// no hint for type!
839921
const auto &mm_0 = in_metas[0u];
840-
const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
841-
GAPI_Assert( !meta_0.isND()
842-
&& !meta_0.planar
843-
&& "Only images are supported as the 0th argument");
922+
switch (in_metas[0u].index()) {
923+
case cv::GMetaArg::index_of<cv::GMatDesc>(): {
924+
const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
925+
GAPI_Assert( !meta_0.isND()
926+
&& !meta_0.planar
927+
&& "Only images are supported as the 0th argument");
928+
break;
929+
}
930+
case cv::GMetaArg::index_of<cv::GFrameDesc>(): {
931+
const auto &meta_0 = util::get<cv::GFrameDesc>(mm_0);
932+
GAPI_Assert( (meta_0.fmt == cv::MediaFormat::BGR)
933+
|| (meta_0.fmt == cv::MediaFormat::NV12));
934+
GAPI_Assert((meta_0.size.height !=0) && (meta_0.size.width !=0));
935+
break;
936+
}
937+
default:
938+
util::throw_error(std::runtime_error("Unsupported input meta for ONNX backend"));
939+
}
940+
if (util::holds_alternative<cv::GMatDesc>(mm_0)) {
941+
const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
942+
GAPI_Assert( !meta_0.isND()
943+
&& !meta_0.planar
944+
&& "Only images are supported as the 0th argument");
945+
}
844946
for (auto i : ade::util::iota(uu.oc->numInputs())) {
845947
const auto &mm = in_metas[i + 1];
846948
GAPI_Assert(util::holds_alternative<cv::GArrayDesc>(mm)
@@ -856,11 +958,11 @@ struct InferList2: public cv::detail::KernelTag {
856958
}
857959

858960
static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
961+
Views views;
859962
GAPI_Assert(ctx.args.size() > 1u
860963
&& "This operation must have at least two arguments");
861-
964+
uu.oc->extractMat(ctx, 0, views);
862965
// Since we do a ROI list inference, always assume our input buffer is image
863-
const cv::Mat mat_0 = ctx.inMat(0u);
864966
// Take the next argument, which must be vector (of any kind).
865967
// Use this only to obtain the ROI list size (sizes of all
866968
// other vectors must be equal to this one)
@@ -885,7 +987,7 @@ struct InferList2: public cv::detail::KernelTag {
885987
if (this_vec.holds<cv::Rect>()) {
886988
// ROI case - create an ROI blob
887989
const auto &vec = this_vec.rref<cv::Rect>();
888-
uu.oc->setInput(in_idx, mat_0(vec[list_idx]));
990+
uu.oc->setInput(in_idx, uu.oc->exMat(vec[list_idx]));
889991
} else if (this_vec.holds<cv::Mat>()) {
890992
// Mat case - create a regular blob
891993
// FIXME: NOW Assume Mats are always BLOBS (not

0 commit comments

Comments
 (0)