artoolkitx
diff --git a/‎modules/gapi/src/backends/onnx/gonnxbackend.cpp
Lines changed: 130 additions & 28 deletions b/‎modules/gapi/src/backends/onnx/gonnxbackend.cpp
Lines changed: 130 additions & 28 deletions
@@ -13,9 +13,14 @@
 #include <ade/util/zip_range.hpp>
 #include <opencv2/gapi/infer.hpp>
 #include <opencv2/gapi/own/convert.hpp>
+#include <opencv2/gapi/gframe.hpp>
 
 #include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
 
+namespace {
+struct ONNXCallContext;
+}
+
 namespace cv {
 namespace gimpl {
 namespace onnx {
@@ -64,6 +69,8 @@ struct TensorInfo {
     cv::util::optional<MeanStdev> mstd;
 };
 
+using Views = std::vector<std::unique_ptr<cv::MediaFrame::View>>;
+
 class ONNXCompiled {
     // ONNX Resources
     // NOTE: Env must live with the session, otherwise segfaults.
@@ -98,9 +105,12 @@ class ONNXCompiled {
     std::size_t numInputs() const { return params.num_in; }
     std::size_t numOutputs() const { return params.num_out; }
     void setInput(int i, const cv::Mat &m);
-    void setOutput(int i, cv::Mat &m);
+    void setOutput(int idx, cv::Mat &m);
     cv::Mat allocOutput(int i) const;
-
+    // Gets exMat from input
+    void extractMat(ONNXCallContext &ctx, const size_t in_idx, Views &views);
+    // Extracted cv::Mat from input cv::Mat/cv::MediaFrame
+    cv::Mat exMat;
     // Run with the assigned inputs/outputs
     void run();
 };
@@ -256,6 +266,26 @@ inline void preprocess(const cv::Mat& src,
     }
 }
 
+void preprocess(const cv::MediaFrame::View& view,
+                const cv::GFrameDesc& desc,
+                      cv::Mat& dst) {
+    // This overload constructs cv::Mat from cv::MediaFrame
+    switch (desc.fmt) {
+        case cv::MediaFormat::BGR: {
+            dst = cv::Mat(desc.size, CV_8UC3, view.ptr[0], view.stride[0]);
+            break;
+        }
+        case cv::MediaFormat::NV12: {
+            const auto y_plane  = cv::Mat(desc.size, CV_8UC1, view.ptr[0], view.stride[0]);
+            const auto uv_plane = cv::Mat(desc.size / 2, CV_8UC2, view.ptr[1], view.stride[1]);
+            cvtColorTwoPlane(y_plane, uv_plane, dst, cv::COLOR_YUV2BGR_NV12);
+            break;
+        }
+        default:
+            GAPI_Assert(false && "Unsupported media format for ONNX backend");
+    }
+}
+
 template <typename T>
 inline Ort::Value createTensor(const Ort::MemoryInfo& memory_info,
                                const cv::gimpl::onnx::TensorInfo& tensor_params,
@@ -297,7 +327,7 @@ struct ONNXUnit {
 struct ONNXCallContext {
     // Input parameters passed to an inference operation.
     std::vector<cv::GArg> args;
-
+    cv::GShapes in_shapes;
     //FIXME: avoid conversion of arguments from internal representation to OpenCV one on each call
     //to OCV kernel. (This can be achieved by a two single time conversions in GCPUExecutable::run,
     //once on enter for input and output arguments, and once before return for output arguments only
@@ -312,6 +342,11 @@ struct ONNXCallContext {
     const cv::Mat&   inMat(std::size_t input) {
         return inArg<cv::Mat>(input);
     }
+
+    const cv::MediaFrame& inFrame(std::size_t input) {
+        return inArg<cv::MediaFrame>(input);
+    }
+
     cv::Mat&         outMatR(std::size_t output) {
         return *cv::util::get<cv::Mat*>(results.at(output));
     }
@@ -403,7 +438,8 @@ cv::GArg cv::gimpl::onnx::GONNXExecutable::packArg(const cv::GArg &arg) {
     GAPI_Assert(   arg.kind != cv::detail::ArgKind::GMAT
                 && arg.kind != cv::detail::ArgKind::GSCALAR
                 && arg.kind != cv::detail::ArgKind::GARRAY
-                && arg.kind != cv::detail::ArgKind::GOPAQUE);
+                && arg.kind != cv::detail::ArgKind::GOPAQUE
+                && arg.kind != cv::detail::ArgKind::GFRAME);
 
     if (arg.kind != cv::detail::ArgKind::GOBJREF) {
         util::throw_error(std::logic_error("Inference supports G-types ONLY!"));
@@ -425,6 +461,8 @@ cv::GArg cv::gimpl::onnx::GONNXExecutable::packArg(const cv::GArg &arg) {
     //   (and constructed by either bindIn/Out or resetInternal)
     case GShape::GOPAQUE:  return GArg(m_res.slot<cv::detail::OpaqueRef>().at(ref.id));
 
+    case GShape::GFRAME:   return GArg(m_res.slot<cv::MediaFrame>().at(ref.id));
+
     default:
         util::throw_error(std::logic_error("Unsupported GShape type"));
         break;
@@ -451,8 +489,16 @@ void cv::gimpl::onnx::GONNXExecutable::run(std::vector<InObj>  &&input_objs,
     context.args.reserve(op.args.size());
     using namespace std::placeholders;
     ade::util::transform(op.args,
-                          std::back_inserter(context.args),
-                          std::bind(&GONNXExecutable::packArg, this, _1));
+                         std::back_inserter(context.args),
+                         std::bind(&GONNXExecutable::packArg, this, _1));
+
+    // NB: Need to store inputs shape to recognize GFrame/GMat
+    context.in_shapes.reserve(op.args.size());
+    ade::util::transform(op.args,
+                         std::back_inserter(context.in_shapes),
+                         [](const cv::GArg& arg) {
+                             return arg.get<cv::gimpl::RcDesc>().shape;
+                         });
 
     // - Output parameters.
     for (const auto &out_it : ade::util::indexed(op.outs)) {
@@ -590,13 +636,32 @@ cv::GMatDesc ONNXCompiled::outMeta(int idx) const {
                         toCV(out_tensor_info[ort_idx].dims));
 }
 
-void ONNXCompiled::setInput(int i, const cv::Mat &m) {
-    const auto in_idx  = i;
+void ONNXCompiled::setInput(int in_idx, const cv::Mat &m) {
+    GAPI_Assert(!m.empty() && "Input data can't be empty!");
     const auto in_name = params.input_names[in_idx];
     const auto ort_idx = getIdxByName(in_tensor_info, in_name);
     preprocess(m, in_tensor_info[ort_idx], in_data[in_idx]);
 }
 
+void ONNXCompiled::extractMat(ONNXCallContext &ctx, const size_t in_idx, Views& views) {
+    switch (ctx.in_shapes[in_idx]) {
+        case cv::GShape::GFRAME: {
+            const cv::MediaFrame& frame = ctx.inFrame(in_idx);
+            views.emplace_back(new cv::MediaFrame::View(frame.access(cv::MediaFrame::Access::R)));
+            GAPI_Assert(views.size() <= numInputs());
+            preprocess(*views.back(), frame.desc(), exMat);
+            break;
+        }
+        case cv::GShape::GMAT: {
+            exMat = ctx.inMat(in_idx);
+            break;
+        }
+        default: {
+            GAPI_Assert("Unsupported input shape for ONNX backend");
+        }
+    }
+}
+
 void ONNXCompiled::setOutput(int i, cv::Mat &m) {
     // FIXME: No need in double-indexing?
     out_data[i] = m;
@@ -678,6 +743,23 @@ void ONNXCompiled::run() {
     Run(in_data, out_data);
 }
 
+static void checkInputMeta(const cv::GMetaArg mm) {
+    switch (mm.index()) {
+        case cv::GMetaArg::index_of<cv::GMatDesc>(): break;
+        case cv::GMetaArg::index_of<cv::GFrameDesc>(): {
+            const auto &meta = util::get<cv::GFrameDesc>(mm);
+            switch (meta.fmt) {
+                case cv::MediaFormat::NV12: break;
+                case cv::MediaFormat::BGR:  break;
+                default:
+                    GAPI_Assert(false && "Unsupported media format for ONNX backend");
+            } break;
+        } break;
+        default:
+            util::throw_error(std::runtime_error("Unsupported input meta for ONNX backend"));
+    }
+}
+
 struct Infer: public cv::detail::KernelTag {
     using API = cv::GInferBase;
     static cv::gapi::GBackend backend()  { return cv::gapi::onnx::backend(); }
@@ -695,8 +777,7 @@ struct Infer: public cv::detail::KernelTag {
         GAPI_Assert(uu.oc->numInputs() == in_metas.size()
                     && "Known input layers count doesn't match input meta count");
         for (auto &&mm : in_metas) {
-            GAPI_Assert(util::holds_alternative<cv::GMatDesc>(mm)
-                        && "Non-GMat inputs are not supported");
+            checkInputMeta(mm);
         }
         for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
             result.emplace_back(uu.oc->outMeta(idx));
@@ -705,8 +786,10 @@ struct Infer: public cv::detail::KernelTag {
     }
 
     static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        Views views;
         for (auto &&idx : ade::util::iota(uu.oc->numInputs())) {
-            uu.oc->setInput(idx, ctx.inMat(idx));
+            uu.oc->extractMat(ctx, idx, views);
+            uu.oc->setInput(idx, uu.oc->exMat);
         }
         for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
             uu.oc->setOutput(idx, ctx.outMatR(idx));
@@ -730,20 +813,20 @@ struct InferROI: public cv::detail::KernelTag {
         const auto &uu = gm.metadata(nh).get<ONNXUnit>();
         GAPI_Assert(1u == uu.oc->numInputs());
         GAPI_Assert(2u == in_metas.size());
-
+        checkInputMeta(in_metas.at(1));
         for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
             result.emplace_back(uu.oc->outMeta(idx));
         }
         return result;
     }
 
     static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        Views views;
         // non-generic version for now, per the InferROI's definition
         GAPI_Assert(uu.oc->numInputs() == 1u);
         const auto& this_roi = ctx.inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
-        const auto  this_mat = ctx.inMat(1);
-
-        uu.oc->setInput(0, this_mat(this_roi));
+        uu.oc->extractMat(ctx, 1, views);
+        uu.oc->setInput(0, uu.oc->exMat(this_roi));
         for (auto &&idx : ade::util::iota(uu.oc->numOutputs())) {
             uu.oc->setOutput(idx, ctx.outMatR(idx));
         }
@@ -769,10 +852,8 @@ struct InferList: public cv::detail::KernelTag {
                     && "Known input layers count doesn't match input meta count");
 
         for (auto i : ade::util::iota(uu.oc->numInputs())) {
-            const auto & mm = in_metas[i + 1];
-
-            GAPI_Assert(util::holds_alternative<cv::GMatDesc>(mm)
-                        && "Non-GMat inputs are not supported");
+            const auto &mm = in_metas[i + 1];
+            checkInputMeta(mm);
         }
 
         // roi-list version is much easier at the moment.
@@ -784,19 +865,20 @@ struct InferList: public cv::detail::KernelTag {
     }
 
     static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        Views views;
         // non-generic version for now:
         // - assumes input 0 is always ROI list
         // - assumes all inputs/outputs are always Mats
         GAPI_Assert(uu.oc->numInputs() == 1); // roi list is not counted in net's inputs
 
         const auto& in_roi_vec = ctx.inArg<cv::detail::VectorRef>(0u).rref<cv::Rect>();
-        const cv::Mat this_mat = ctx.inMat(1u);
 
         for (auto i : ade::util::iota(uu.oc->numOutputs())) {
             ctx.outVecR<cv::Mat>(i).clear();
         }
+        uu.oc->extractMat(ctx, 1, views);
         for (const auto &rc : in_roi_vec) {
-            uu.oc->setInput(0, this_mat(rc));
+            uu.oc->setInput(0, uu.oc->exMat(rc));
             std::vector<cv::Mat> out_mats(uu.oc->numOutputs());
             for (auto i : ade::util::iota(uu.oc->numOutputs())) {
                 out_mats[i] = uu.oc->allocOutput(i);
@@ -837,10 +919,30 @@ struct InferList2: public cv::detail::KernelTag {
         // FIXME: this is filtering not done, actually! GArrayDesc has
         // no hint for type!
         const auto &mm_0   = in_metas[0u];
-        const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
-        GAPI_Assert(   !meta_0.isND()
-                    && !meta_0.planar
-                    && "Only images are supported as the 0th argument");
+        switch (in_metas[0u].index()) {
+            case cv::GMetaArg::index_of<cv::GMatDesc>(): {
+                const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
+                GAPI_Assert(   !meta_0.isND()
+                            && !meta_0.planar
+                            && "Only images are supported as the 0th argument");
+                break;
+            }
+            case cv::GMetaArg::index_of<cv::GFrameDesc>(): {
+                const auto &meta_0 = util::get<cv::GFrameDesc>(mm_0);
+                GAPI_Assert(   (meta_0.fmt == cv::MediaFormat::BGR)
+                            || (meta_0.fmt == cv::MediaFormat::NV12));
+                GAPI_Assert((meta_0.size.height !=0) && (meta_0.size.width !=0));
+                break;
+            }
+            default:
+                util::throw_error(std::runtime_error("Unsupported input meta for ONNX backend"));
+        }
+        if (util::holds_alternative<cv::GMatDesc>(mm_0)) {
+            const auto &meta_0 = util::get<cv::GMatDesc>(mm_0);
+            GAPI_Assert(   !meta_0.isND()
+                        && !meta_0.planar
+                        && "Only images are supported as the 0th argument");
+        }
         for (auto i : ade::util::iota(uu.oc->numInputs())) {
             const auto &mm = in_metas[i + 1];
             GAPI_Assert(util::holds_alternative<cv::GArrayDesc>(mm)
@@ -856,11 +958,11 @@ struct InferList2: public cv::detail::KernelTag {
     }
 
     static void run(const ONNXUnit &uu, ONNXCallContext &ctx) {
+        Views views;
         GAPI_Assert(ctx.args.size() > 1u
                     && "This operation must have at least two arguments");
-
+        uu.oc->extractMat(ctx, 0, views);
         // Since we do a ROI list inference, always assume our input buffer is image
-        const cv::Mat mat_0  = ctx.inMat(0u);
         // Take the next argument, which must be vector (of any kind).
         // Use this only to obtain the ROI list size (sizes of all
         // other vectors must be equal to this one)
@@ -885,7 +987,7 @@ struct InferList2: public cv::detail::KernelTag {
                 if (this_vec.holds<cv::Rect>()) {
                     // ROI case - create an ROI blob
                     const auto &vec = this_vec.rref<cv::Rect>();
-                    uu.oc->setInput(in_idx, mat_0(vec[list_idx]));
+                    uu.oc->setInput(in_idx, uu.oc->exMat(vec[list_idx]));
                 } else if (this_vec.holds<cv::Mat>()) {
                     // Mat case - create a regular blob
                     // FIXME: NOW Assume Mats are always BLOBS (not