13
13
#include < ade/util/zip_range.hpp>
14
14
#include < opencv2/gapi/infer.hpp>
15
15
#include < opencv2/gapi/own/convert.hpp>
16
+ #include < opencv2/gapi/gframe.hpp>
16
17
17
18
#include " api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
18
19
20
+ namespace {
21
+ struct ONNXCallContext ;
22
+ }
23
+
19
24
namespace cv {
20
25
namespace gimpl {
21
26
namespace onnx {
@@ -64,6 +69,8 @@ struct TensorInfo {
64
69
cv::util::optional<MeanStdev> mstd;
65
70
};
66
71
72
+ using Views = std::vector<std::unique_ptr<cv::MediaFrame::View>>;
73
+
67
74
class ONNXCompiled {
68
75
// ONNX Resources
69
76
// NOTE: Env must live with the session, otherwise segfaults.
@@ -98,9 +105,12 @@ class ONNXCompiled {
98
105
std::size_t numInputs () const { return params.num_in ; }
99
106
std::size_t numOutputs () const { return params.num_out ; }
100
107
void setInput (int i, const cv::Mat &m);
101
- void setOutput (int i , cv::Mat &m);
108
+ void setOutput (int idx , cv::Mat &m);
102
109
cv::Mat allocOutput (int i) const ;
103
-
110
+ // Gets exMat from input
111
+ void extractMat (ONNXCallContext &ctx, const size_t in_idx, Views &views);
112
+ // Extracted cv::Mat from input cv::Mat/cv::MediaFrame
113
+ cv::Mat exMat;
104
114
// Run with the assigned inputs/outputs
105
115
void run ();
106
116
};
@@ -256,6 +266,26 @@ inline void preprocess(const cv::Mat& src,
256
266
}
257
267
}
258
268
269
+ void preprocess (const cv::MediaFrame::View& view,
270
+ const cv::GFrameDesc& desc,
271
+ cv::Mat& dst) {
272
+ // This overload constructs cv::Mat from cv::MediaFrame
273
+ switch (desc.fmt ) {
274
+ case cv::MediaFormat::BGR: {
275
+ dst = cv::Mat (desc.size , CV_8UC3, view.ptr [0 ], view.stride [0 ]);
276
+ break ;
277
+ }
278
+ case cv::MediaFormat::NV12: {
279
+ const auto y_plane = cv::Mat (desc.size , CV_8UC1, view.ptr [0 ], view.stride [0 ]);
280
+ const auto uv_plane = cv::Mat (desc.size / 2 , CV_8UC2, view.ptr [1 ], view.stride [1 ]);
281
+ cvtColorTwoPlane (y_plane, uv_plane, dst, cv::COLOR_YUV2BGR_NV12);
282
+ break ;
283
+ }
284
+ default :
285
+ GAPI_Assert (false && " Unsupported media format for ONNX backend" );
286
+ }
287
+ }
288
+
259
289
template <typename T>
260
290
inline Ort::Value createTensor (const Ort::MemoryInfo& memory_info,
261
291
const cv::gimpl::onnx::TensorInfo& tensor_params,
@@ -297,7 +327,7 @@ struct ONNXUnit {
297
327
struct ONNXCallContext {
298
328
// Input parameters passed to an inference operation.
299
329
std::vector<cv::GArg> args;
300
-
330
+ cv::GShapes in_shapes;
301
331
// FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
302
332
// to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
303
333
// once on enter for input and output arguments, and once before return for output arguments only
@@ -312,6 +342,11 @@ struct ONNXCallContext {
312
342
const cv::Mat& inMat (std::size_t input) {
313
343
return inArg<cv::Mat>(input);
314
344
}
345
+
346
+ const cv::MediaFrame& inFrame (std::size_t input) {
347
+ return inArg<cv::MediaFrame>(input);
348
+ }
349
+
315
350
cv::Mat& outMatR (std::size_t output) {
316
351
return *cv::util::get<cv::Mat*>(results.at (output));
317
352
}
@@ -403,7 +438,8 @@ cv::GArg cv::gimpl::onnx::GONNXExecutable::packArg(const cv::GArg &arg) {
403
438
GAPI_Assert ( arg.kind != cv::detail::ArgKind::GMAT
404
439
&& arg.kind != cv::detail::ArgKind::GSCALAR
405
440
&& arg.kind != cv::detail::ArgKind::GARRAY
406
- && arg.kind != cv::detail::ArgKind::GOPAQUE);
441
+ && arg.kind != cv::detail::ArgKind::GOPAQUE
442
+ && arg.kind != cv::detail::ArgKind::GFRAME);
407
443
408
444
if (arg.kind != cv::detail::ArgKind::GOBJREF) {
409
445
util::throw_error (std::logic_error (" Inference supports G-types ONLY!" ));
@@ -425,6 +461,8 @@ cv::GArg cv::gimpl::onnx::GONNXExecutable::packArg(const cv::GArg &arg) {
425
461
// (and constructed by either bindIn/Out or resetInternal)
426
462
case GShape::GOPAQUE: return GArg (m_res.slot <cv::detail::OpaqueRef>().at (ref.id ));
427
463
464
+ case GShape::GFRAME: return GArg (m_res.slot <cv::MediaFrame>().at (ref.id ));
465
+
428
466
default :
429
467
util::throw_error (std::logic_error (" Unsupported GShape type" ));
430
468
break ;
@@ -451,8 +489,16 @@ void cv::gimpl::onnx::GONNXExecutable::run(std::vector<InObj> &&input_objs,
451
489
context.args .reserve (op.args .size ());
452
490
using namespace std ::placeholders;
453
491
ade::util::transform (op.args ,
454
- std::back_inserter (context.args ),
455
- std::bind (&GONNXExecutable::packArg, this , _1));
492
+ std::back_inserter (context.args ),
493
+ std::bind (&GONNXExecutable::packArg, this , _1));
494
+
495
+ // NB: Need to store inputs shape to recognize GFrame/GMat
496
+ context.in_shapes .reserve (op.args .size ());
497
+ ade::util::transform (op.args ,
498
+ std::back_inserter (context.in_shapes ),
499
+ [](const cv::GArg& arg) {
500
+ return arg.get <cv::gimpl::RcDesc>().shape ;
501
+ });
456
502
457
503
// - Output parameters.
458
504
for (const auto &out_it : ade::util::indexed (op.outs )) {
@@ -590,13 +636,32 @@ cv::GMatDesc ONNXCompiled::outMeta(int idx) const {
590
636
toCV (out_tensor_info[ort_idx].dims ));
591
637
}
592
638
593
- void ONNXCompiled::setInput (int i , const cv::Mat &m) {
594
- const auto in_idx = i ;
639
+ void ONNXCompiled::setInput (int in_idx , const cv::Mat &m) {
640
+ GAPI_Assert (!m. empty () && " Input data can't be empty! " ) ;
595
641
const auto in_name = params.input_names [in_idx];
596
642
const auto ort_idx = getIdxByName (in_tensor_info, in_name);
597
643
preprocess (m, in_tensor_info[ort_idx], in_data[in_idx]);
598
644
}
599
645
646
+ void ONNXCompiled::extractMat (ONNXCallContext &ctx, const size_t in_idx, Views& views) {
647
+ switch (ctx.in_shapes [in_idx]) {
648
+ case cv::GShape::GFRAME: {
649
+ const cv::MediaFrame& frame = ctx.inFrame (in_idx);
650
+ views.emplace_back (new cv::MediaFrame::View (frame.access (cv::MediaFrame::Access::R)));
651
+ GAPI_Assert (views.size () <= numInputs ());
652
+ preprocess (*views.back (), frame.desc (), exMat);
653
+ break ;
654
+ }
655
+ case cv::GShape::GMAT: {
656
+ exMat = ctx.inMat (in_idx);
657
+ break ;
658
+ }
659
+ default : {
660
+ GAPI_Assert (" Unsupported input shape for ONNX backend" );
661
+ }
662
+ }
663
+ }
664
+
600
665
void ONNXCompiled::setOutput (int i, cv::Mat &m) {
601
666
// FIXME: No need in double-indexing?
602
667
out_data[i] = m;
@@ -678,6 +743,23 @@ void ONNXCompiled::run() {
678
743
Run (in_data, out_data);
679
744
}
680
745
746
+ static void checkInputMeta (const cv::GMetaArg mm) {
747
+ switch (mm.index ()) {
748
+ case cv::GMetaArg::index_of<cv::GMatDesc>(): break ;
749
+ case cv::GMetaArg::index_of<cv::GFrameDesc>(): {
750
+ const auto &meta = util::get<cv::GFrameDesc>(mm);
751
+ switch (meta.fmt ) {
752
+ case cv::MediaFormat::NV12: break ;
753
+ case cv::MediaFormat::BGR: break ;
754
+ default :
755
+ GAPI_Assert (false && " Unsupported media format for ONNX backend" );
756
+ } break ;
757
+ } break ;
758
+ default :
759
+ util::throw_error (std::runtime_error (" Unsupported input meta for ONNX backend" ));
760
+ }
761
+ }
762
+
681
763
struct Infer : public cv ::detail::KernelTag {
682
764
using API = cv::GInferBase;
683
765
static cv::gapi::GBackend backend () { return cv::gapi::onnx::backend (); }
@@ -695,8 +777,7 @@ struct Infer: public cv::detail::KernelTag {
695
777
GAPI_Assert (uu.oc ->numInputs () == in_metas.size ()
696
778
&& " Known input layers count doesn't match input meta count" );
697
779
for (auto &&mm : in_metas) {
698
- GAPI_Assert (util::holds_alternative<cv::GMatDesc>(mm)
699
- && " Non-GMat inputs are not supported" );
780
+ checkInputMeta (mm);
700
781
}
701
782
for (auto &&idx : ade::util::iota (uu.oc ->numOutputs ())) {
702
783
result.emplace_back (uu.oc ->outMeta (idx));
@@ -705,8 +786,10 @@ struct Infer: public cv::detail::KernelTag {
705
786
}
706
787
707
788
static void run (const ONNXUnit &uu, ONNXCallContext &ctx) {
789
+ Views views;
708
790
for (auto &&idx : ade::util::iota (uu.oc ->numInputs ())) {
709
- uu.oc ->setInput (idx, ctx.inMat (idx));
791
+ uu.oc ->extractMat (ctx, idx, views);
792
+ uu.oc ->setInput (idx, uu.oc ->exMat );
710
793
}
711
794
for (auto &&idx : ade::util::iota (uu.oc ->numOutputs ())) {
712
795
uu.oc ->setOutput (idx, ctx.outMatR (idx));
@@ -730,20 +813,20 @@ struct InferROI: public cv::detail::KernelTag {
730
813
const auto &uu = gm.metadata (nh).get <ONNXUnit>();
731
814
GAPI_Assert (1u == uu.oc ->numInputs ());
732
815
GAPI_Assert (2u == in_metas.size ());
733
-
816
+ checkInputMeta (in_metas. at ( 1 ));
734
817
for (auto &&idx : ade::util::iota (uu.oc ->numOutputs ())) {
735
818
result.emplace_back (uu.oc ->outMeta (idx));
736
819
}
737
820
return result;
738
821
}
739
822
740
823
static void run (const ONNXUnit &uu, ONNXCallContext &ctx) {
824
+ Views views;
741
825
// non-generic version for now, per the InferROI's definition
742
826
GAPI_Assert (uu.oc ->numInputs () == 1u );
743
827
const auto & this_roi = ctx.inArg <cv::detail::OpaqueRef>(0 ).rref <cv::Rect>();
744
- const auto this_mat = ctx.inMat (1 );
745
-
746
- uu.oc ->setInput (0 , this_mat (this_roi));
828
+ uu.oc ->extractMat (ctx, 1 , views);
829
+ uu.oc ->setInput (0 , uu.oc ->exMat (this_roi));
747
830
for (auto &&idx : ade::util::iota (uu.oc ->numOutputs ())) {
748
831
uu.oc ->setOutput (idx, ctx.outMatR (idx));
749
832
}
@@ -769,10 +852,8 @@ struct InferList: public cv::detail::KernelTag {
769
852
&& " Known input layers count doesn't match input meta count" );
770
853
771
854
for (auto i : ade::util::iota (uu.oc ->numInputs ())) {
772
- const auto & mm = in_metas[i + 1 ];
773
-
774
- GAPI_Assert (util::holds_alternative<cv::GMatDesc>(mm)
775
- && " Non-GMat inputs are not supported" );
855
+ const auto &mm = in_metas[i + 1 ];
856
+ checkInputMeta (mm);
776
857
}
777
858
778
859
// roi-list version is much easier at the moment.
@@ -784,19 +865,20 @@ struct InferList: public cv::detail::KernelTag {
784
865
}
785
866
786
867
static void run (const ONNXUnit &uu, ONNXCallContext &ctx) {
868
+ Views views;
787
869
// non-generic version for now:
788
870
// - assumes input 0 is always ROI list
789
871
// - assumes all inputs/outputs are always Mats
790
872
GAPI_Assert (uu.oc ->numInputs () == 1 ); // roi list is not counted in net's inputs
791
873
792
874
const auto & in_roi_vec = ctx.inArg <cv::detail::VectorRef>(0u ).rref <cv::Rect>();
793
- const cv::Mat this_mat = ctx.inMat (1u );
794
875
795
876
for (auto i : ade::util::iota (uu.oc ->numOutputs ())) {
796
877
ctx.outVecR <cv::Mat>(i).clear ();
797
878
}
879
+ uu.oc ->extractMat (ctx, 1 , views);
798
880
for (const auto &rc : in_roi_vec) {
799
- uu.oc ->setInput (0 , this_mat (rc));
881
+ uu.oc ->setInput (0 , uu. oc -> exMat (rc));
800
882
std::vector<cv::Mat> out_mats (uu.oc ->numOutputs ());
801
883
for (auto i : ade::util::iota (uu.oc ->numOutputs ())) {
802
884
out_mats[i] = uu.oc ->allocOutput (i);
@@ -837,10 +919,30 @@ struct InferList2: public cv::detail::KernelTag {
837
919
// FIXME: this is filtering not done, actually! GArrayDesc has
838
920
// no hint for type!
839
921
const auto &mm_0 = in_metas[0u ];
840
- const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
841
- GAPI_Assert ( !meta_0.isND ()
842
- && !meta_0.planar
843
- && " Only images are supported as the 0th argument" );
922
+ switch (in_metas[0u ].index ()) {
923
+ case cv::GMetaArg::index_of<cv::GMatDesc>(): {
924
+ const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
925
+ GAPI_Assert ( !meta_0.isND ()
926
+ && !meta_0.planar
927
+ && " Only images are supported as the 0th argument" );
928
+ break ;
929
+ }
930
+ case cv::GMetaArg::index_of<cv::GFrameDesc>(): {
931
+ const auto &meta_0 = util::get<cv::GFrameDesc>(mm_0);
932
+ GAPI_Assert ( (meta_0.fmt == cv::MediaFormat::BGR)
933
+ || (meta_0.fmt == cv::MediaFormat::NV12));
934
+ GAPI_Assert ((meta_0.size .height !=0 ) && (meta_0.size .width !=0 ));
935
+ break ;
936
+ }
937
+ default :
938
+ util::throw_error (std::runtime_error (" Unsupported input meta for ONNX backend" ));
939
+ }
940
+ if (util::holds_alternative<cv::GMatDesc>(mm_0)) {
941
+ const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
942
+ GAPI_Assert ( !meta_0.isND ()
943
+ && !meta_0.planar
944
+ && " Only images are supported as the 0th argument" );
945
+ }
844
946
for (auto i : ade::util::iota (uu.oc ->numInputs ())) {
845
947
const auto &mm = in_metas[i + 1 ];
846
948
GAPI_Assert (util::holds_alternative<cv::GArrayDesc>(mm)
@@ -856,11 +958,11 @@ struct InferList2: public cv::detail::KernelTag {
856
958
}
857
959
858
960
static void run (const ONNXUnit &uu, ONNXCallContext &ctx) {
961
+ Views views;
859
962
GAPI_Assert (ctx.args .size () > 1u
860
963
&& " This operation must have at least two arguments" );
861
-
964
+ uu. oc -> extractMat (ctx, 0 , views);
862
965
// Since we do a ROI list inference, always assume our input buffer is image
863
- const cv::Mat mat_0 = ctx.inMat (0u );
864
966
// Take the next argument, which must be vector (of any kind).
865
967
// Use this only to obtain the ROI list size (sizes of all
866
968
// other vectors must be equal to this one)
@@ -885,7 +987,7 @@ struct InferList2: public cv::detail::KernelTag {
885
987
if (this_vec.holds <cv::Rect>()) {
886
988
// ROI case - create an ROI blob
887
989
const auto &vec = this_vec.rref <cv::Rect>();
888
- uu.oc ->setInput (in_idx, mat_0 (vec[list_idx]));
990
+ uu.oc ->setInput (in_idx, uu. oc -> exMat (vec[list_idx]));
889
991
} else if (this_vec.holds <cv::Mat>()) {
890
992
// Mat case - create a regular blob
891
993
// FIXME: NOW Assume Mats are always BLOBS (not
0 commit comments