diff --git a/openvino_bindings/src/BUILD b/openvino_bindings/src/BUILD index 5bd46cf0..857ff225 100644 --- a/openvino_bindings/src/BUILD +++ b/openvino_bindings/src/BUILD @@ -9,7 +9,6 @@ cc_library( "//src/utils:input_devices", "//src/utils:status", "//src/utils:utils", - "//src/image:image_inference", "//src/sentence_transformer:sentence_transformer_pipeline", "//src/llm:llm_inference", "//src/audio:speech_to_text", diff --git a/openvino_bindings/src/bindings.cc b/openvino_bindings/src/bindings.cc index c69559dd..ffbc76ae 100644 --- a/openvino_bindings/src/bindings.cc +++ b/openvino_bindings/src/bindings.cc @@ -12,7 +12,6 @@ #include #include "src/audio/speech_to_text.h" -#include "src/image/image_inference.h" #include "src/mediapipe/graph_runner.h" #include "src/mediapipe/serialization/serialization_calculators.h" #include "src/llm/llm_inference.h" @@ -49,11 +48,6 @@ void freeStatusOrInt(StatusOrInt *status) { // delete status; //} -void freeStatusOrImageInference(StatusOrString *status) { - //std::cout << "Freeing StatusOrImageInference" << std::endl; - delete status; -} - void freeStatusOrModelResponse(StatusOrModelResponse *status) { //std::cout << "Freeing StatusOrImageInference" << std::endl; delete status; @@ -91,125 +85,6 @@ void freeStatusOrCameraDevices(StatusOrCameraDevices *status) { delete status; } -StatusOrImageInference* imageInferenceOpen(const char* model_path, const char* task, const char* device, const char* label_definitions_json) { - try { - auto instance = new ImageInference(model_path, get_task_type(task), device); - instance->project_labels = nlohmann::json::parse(label_definitions_json); - return new StatusOrImageInference{OkStatus, "", instance}; - } catch (...) { - auto except = handle_exceptions(); - return new StatusOrImageInference{except->status, except->message}; - } -} - -Status* imageInferenceClose(CImageInference instance) { - auto inference = reinterpret_cast(instance); - inference->close(); - delete inference; - return new Status{OkStatus}; -} - -StatusOrString* imageInferenceInfer(CImageInference instance, unsigned char* image_data, const size_t data_length, bool json, bool csv, bool overlay) { - try { - if(!(json || csv || overlay)){ - return new StatusOrString{OverlayNoOutputSelected}; - } - auto image_inference = reinterpret_cast(instance); - std::vector image_vector(image_data, image_data + data_length); - auto image = cv::imdecode(image_vector, 1); - auto inference_result = image_inference->infer(image); - auto result = image_inference->serialize(inference_result, image, json, csv, overlay).dump(); - return new StatusOrString{OkStatus, "", strdup(result.c_str())}; - } catch (...) { - auto except = handle_exceptions(); - return new StatusOrString{except->status, except->message}; - } -} - -StatusOrString* imageInferenceInferRoi(CImageInference instance, unsigned char* image_data, const size_t data_length, int x, int y, int width, int height, bool json, bool csv, bool overlay) { - try { - if(!(json || csv || overlay)){ - return new StatusOrString{OverlayNoOutputSelected}; - } - - auto image_inference = reinterpret_cast(instance); - std::vector image_vector(image_data, image_data + data_length); - auto image = cv::imdecode(image_vector, 1); - cv::cvtColor(image, image, cv::COLOR_BGR2RGB); - auto rect = cv::Rect(x, y, width, height); - auto roi = image(rect).clone(); - auto inference_result = image_inference->infer(roi); - auto result = image_inference->serialize(inference_result, roi, json, csv, overlay).dump(); - return new StatusOrString{OkStatus, "", strdup(result.c_str())}; - } catch (...) { - auto except = handle_exceptions(); - return new StatusOrString{except->status, except->message}; - } -} - -Status* imageInferenceInferAsync(CImageInference instance, const char* id, unsigned char* image_data, const size_t data_length, bool json, bool csv, bool overlay) { - try { - auto image_inference = reinterpret_cast(instance); - std::vector image_vector(image_data, image_data + data_length); - auto image = cv::imdecode(image_vector, 1); - image_inference->inferAsync(image, id, json, csv, overlay); - return new Status{OkStatus, ""}; - } catch (...) { - return handle_exceptions(); - } -} - -Status* imageInferenceSetListener(CImageInference instance, ImageInferenceCallbackFunction callback) { - try { - auto lambda_callback = [callback](StatusEnum status, const std::string& error_message, const std::string& response) { - callback(new StatusOrString{status, strdup(error_message.c_str()), strdup(response.c_str())}); - }; - auto image_inference = reinterpret_cast(instance); - image_inference->set_listener(lambda_callback); - return new Status{OkStatus, ""}; - } catch (...) { - return handle_exceptions(); - } -} - -Status* imageInferenceSerializeModel(const char* model_path, const char* output_path) { - try { - ImageInference::serialize_model(model_path, output_path); - return new Status{OkStatus, ""}; - } catch (...) { - return handle_exceptions(); - } -} - -Status* imageInferenceOpenCamera(CImageInference instance, int device) { - try { - auto image_inference = reinterpret_cast(instance); - image_inference->open_camera(device); - return new Status{OkStatus, ""}; - } catch (...) { - return handle_exceptions(); - } -} - -Status* imageInferenceStopCamera(CImageInference instance) { - try { - auto image_inference = reinterpret_cast(instance); - image_inference->stop_camera(); - return new Status{OkStatus, ""}; - } catch (...) { - return handle_exceptions(); - } -} - -Status* load_font(const char* font_path) { - try { - ImageInference::load_font(font_path); - return new Status{OkStatus}; - } catch (...) { - return handle_exceptions(); - } -} - StatusOrLLMInference* llmInferenceOpen(const char* model_path, const char* device) { try { auto instance = new LLMInference(model_path, device); diff --git a/openvino_bindings/src/bindings.h b/openvino_bindings/src/bindings.h index 4ad487fd..f3cfad04 100644 --- a/openvino_bindings/src/bindings.h +++ b/openvino_bindings/src/bindings.h @@ -71,12 +71,6 @@ typedef struct { int value; } StatusOrInt; -typedef struct { - enum StatusEnum status; - const char* message; - CImageInference value; -} StatusOrImageInference; - typedef struct { enum StatusEnum status; const char* message; @@ -171,7 +165,6 @@ typedef void (*VLMInferenceCallbackFunction)(StatusOrString*); EXPORT void freeStatus(Status *status); EXPORT void freeStatusOrString(StatusOrString *status); EXPORT void freeStatusOrInt(StatusOrInt *status); -EXPORT void freeStatusOrImageInference(StatusOrImageInference *status); EXPORT void freeStatusOrLLMInference(StatusOrLLMInference *status); EXPORT void freeStatusOrSpeechToText(StatusOrSpeechToText *status); EXPORT void freeStatusOrModelResponse(StatusOrModelResponse *status); @@ -180,17 +173,6 @@ EXPORT void freeStatusOrDevices(StatusOrDevices *status); EXPORT void freeStatusOrEmbeddings(StatusOrEmbeddings *status); EXPORT void freeStatusOrCameraDevices(StatusOrCameraDevices *status); -EXPORT StatusOrImageInference* imageInferenceOpen(const char* model_path, const char* task, const char* device, const char* label_definitions_json); -EXPORT StatusOrString* imageInferenceInfer(CImageInference instance, unsigned char* image_data, const size_t data_length, bool json, bool csv, bool overlay); -EXPORT StatusOrString* imageInferenceInferRoi(CImageInference instance, unsigned char* image_data, const size_t data_length, int x, int y, int width, int height, bool json, bool csv, bool overlay); -EXPORT Status* imageInferenceInferAsync(CImageInference instance, const char* id, unsigned char* image_data, const size_t data_length, bool json, bool csv, bool overlay); -EXPORT Status* imageInferenceSetListener(CImageInference instance, ImageInferenceCallbackFunction callback); -EXPORT Status* imageInferenceOpenCamera(CImageInference instance, int device); -EXPORT Status* imageInferenceStopCamera(CImageInference instance); -EXPORT Status* imageInferenceClose(CImageInference instance); -EXPORT Status* imageInferenceSerializeModel(const char* model_path, const char* output_path); -EXPORT Status* load_font(const char* font_path); - EXPORT StatusOrLLMInference* llmInferenceOpen(const char* model_path, const char* device); EXPORT Status* llmInferenceSetListener(CLLMInference instance, LLMInferenceCallbackFunction callback); EXPORT StatusOrModelResponse* llmInferencePrompt(CLLMInference instance, const char* message, bool apply_template, float temperature, float top_p); diff --git a/openvino_bindings/src/image/BUILD b/openvino_bindings/src/image/BUILD index d8bb89b0..0221bb48 100644 --- a/openvino_bindings/src/image/BUILD +++ b/openvino_bindings/src/image/BUILD @@ -1,27 +1,3 @@ -cc_library( - name = "image_inference", - hdrs = [ - "image_inference.h", - ], - srcs = [ - "image_inference.cc", - ], - deps = [ - "//third_party:blend2d", - "//third_party:opencv", - "//third_party:model_api", - "@nlohmann_json//:json", - "//src/utils:status", - "//src/utils:errors", - ":data_structures", - ":post_processing", - ":utils", - ":serialization", - ":overlay", - ], - visibility = ["//visibility:public"], -) - cc_library( name = "utils", hdrs = [ @@ -70,20 +46,6 @@ cc_library( visibility = ["//visibility:public"], ) -cc_library( - name = "post_processing", - hdrs = [ - "post_processing.h", - ], - deps = [ - "//third_party:openvino", - "//third_party:opencv", - "//third_party:model_api", - ":contourer", - ":data_structures", - ], -) - cc_library( name = "serialization", hdrs = [ diff --git a/openvino_bindings/src/image/image_inference.cc b/openvino_bindings/src/image/image_inference.cc deleted file mode 100644 index ed43067b..00000000 --- a/openvino_bindings/src/image/image_inference.cc +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright (c) 2024 Intel Corporation - * - * SPDX-License-Identifier: Apache-2.0 - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "image_inference.h" -#include "src/image/csv_serialization.h" -#include "src/image/json_serialization.h" -#include "src/image/post_processing.h" -#include "src/image/utils.h" -#include "src/utils/errors.h" -#include "src/utils/status.h" -#include "third_party/cpp-base64/base64.h" - - -ModelType get_model_type(const std::string& name) { - if (name == "ssd" || name == "Detection") { - return ModelType::Detection; - } else if (name == "Classification") { - return ModelType::Classification; - } else if (name == "MaskRCNN") { - return ModelType::MaskRCNN; - } else if (name == "Segmentation") { - return ModelType::Segmentation; - } else if (name == "AnomalyDetection") { - return ModelType::Anomaly; - } else { - throw api_error(ModelTypeNotSupported, name); - } -} - -TaskType get_task_type(const std::string& name) { - if (name == "detection") { - return TaskType::Detection; - } else if (name == "classification") { - return TaskType::Classification; - } else if (name == "rotated_detection") { - return TaskType::RotatedDetection; - } else if (name == "instance_segmentation") { - return TaskType::InstanceSegmentation; - } else if (name == "segmentation") { - return TaskType::Segmentation; - } else if (name == "anomaly") { - return TaskType::Anomaly; - } else if (name == "anomaly_classification") { - return TaskType::Anomaly; - } else { - throw api_error(TaskTypeNotSupported, name); - } -} - - -inline void output_model_config(std::shared_ptr ia) { - auto config = ia->getModelConfig(); - for (auto& prop: config) { - std::cout << prop.first << ": " << prop.second.as() << std::endl; - } -} - -BLFontFace ImageInference::face; - -ImageInference::ImageInference(std::string model_path, TaskType task, std::string device): task(task) { - auto core = ov::Core(); - auto ov_model = core.read_model(model_path); - ia = std::make_shared(); - ov::AnyMap tput{{ov::hint::performance_mode.name(), ov::hint::PerformanceMode::THROUGHPUT}}; - ia->loadModel(ov_model, core, device, tput); - auto config = ia->getModelConfig(); - - labels = geti::get_labels_from_configuration(config); - - auto model_type_iter = config.find("model_type"); - if (model_type_iter == config.end()) { - throw api_error(StatusEnum::ModelTypeNotSupplied); - } - model_type = get_model_type(model_type_iter->second.as()); - - switch(model_type) { - case ModelType::Detection: - model = DetectionModel::create_model(ia); - break; - case ModelType::Classification: - model = ClassificationModel::create_model(ia); - break; - case ModelType::MaskRCNN: - { - auto maskrcnn = MaskRCNNModel::create_model(model_path, {}, true, device); - // post processing for rotated detection via model api - maskrcnn->postprocess_semantic_masks = task == TaskType::RotatedDetection; - model = std::unique_ptr(maskrcnn.release()); - } - break; - case ModelType::Segmentation: - model = SegmentationModel::create_model(ia); - //model->postprocess_semantic_masks = false; - break; - case ModelType::Anomaly: - model = AnomalyModel::create_model(ia); - break; - default: - throw std::runtime_error("Model type loading not implemented"); - } -} - -geti::InferenceResult ImageInference::infer(cv::Mat image) { - const ImageInputData& input_data = image; - - geti::InferenceResult obj = post_process(model->infer(input_data), image); - if (empty_label.has_value()) { - size_t n_predictions = obj.polygons.size() + - obj.rectangles.size() + - obj.circles.size() + - obj.rotated_rectangles.size(); - - if (n_predictions == 0) { - obj.rectangles.push_back({{geti::LabelResult{0.0f, empty_label.value()}}, obj.roi}); - } - } - return obj; -} - -geti::InferenceResult ImageInference::post_process(std::unique_ptr result, cv::Mat image) { - switch(task) { - case TaskType::Detection: - return geti::detection_post_processing(std::move(result), labels, image); - case TaskType::Classification: - return geti::classification_post_processing(std::move(result), labels, image); - case TaskType::RotatedDetection: - return geti::rotated_detection_post_processing(std::move(result), labels, image); - case TaskType::Anomaly: - return geti::anomaly_post_processing(std::move(result), labels, image); - case TaskType::InstanceSegmentation: - return geti::instance_segmentation_post_processing(std::move(result), labels, image); - case TaskType::Segmentation: - auto m = dynamic_cast(model.get()); - auto inference_result = std::unique_ptr(static_cast(result.release())); - auto contours = m->getContours(*inference_result.get()); //uhh.. - return geti::segmentation_post_processing(std::move(inference_result), contours, labels, image); - } - throw std::runtime_error("Model type loading not implemented"); -} - -void ImageInference::inferAsync(cv::Mat image, const std::string& id, bool json, bool csv, bool overlay) { - const ImageInputData& input_data = image; - model->inferAsync(input_data, {{"image", image}, {"id", id}, {"json", json}, {"csv", csv}, {"overlay", overlay}}); -} - - -void ImageInference::set_listener(const std::function callback) { - auto lambda_callback = [callback, this](std::unique_ptr result, const ov::AnyMap& args) { - try { - cv::Mat image = args.find("image")->second.as(); - bool csv = args.find("csv")->second.as(); - bool json = args.find("json")->second.as(); - bool overlay = args.find("overlay")->second.as(); - auto inference_result = post_process(std::move(result), image); - auto response = serialize(inference_result, image, json, csv, overlay); - response["id"] = args.find("id")->second.as(); - callback(OkStatus, "", response.dump()); - } catch(const api_error& re) { - callback(re.status, re.additional_info, ""); - } catch(const std::exception& ex) { - callback(ErrorStatus, strdup(ex.what()), ""); - } catch (...) { - callback(ErrorStatus, "", ""); - } - }; - model->setCallback(lambda_callback); -} - -nlohmann::json ImageInference::serialize(const geti::InferenceResult& inference_result, cv::Mat image, bool json, bool csv, bool overlay) { - nlohmann::json output = {}; - if (json) { - output["json"] = inference_result; - } - if (csv) { - output["csv"] = geti::csv_serialize(inference_result); - } - if (overlay) { - auto overlay = geti::draw_overlay(image, inference_result, draw_options, project_labels, ImageInference::face); - cv::Mat overlay_rgb; - cv::cvtColor(overlay, overlay_rgb, cv::COLOR_RGB2BGRA); - output["overlay"] = geti::base64_encode_mat(overlay_rgb); - } - - return output; -} - -void ImageInference::close() { - //nothing to clean up yet... - stop_camera(); -} - -bool ImageInference::model_loaded() { - return model != nullptr; -} - -void ImageInference::open_camera(int device) { - camera_get_frame = true; - camera_thread = std::thread(&ImageInference::start_camera, this, device); -} - -void ImageInference::stop_camera() { - camera_get_frame = false; - if (camera_thread.joinable()) { - camera_thread.join(); - } -} - -void ImageInference::start_camera(int device) { - cv::VideoCapture cap; - std::cout << device << std::endl; - cap.open(device); - if (!cap.isOpened()) { - throw api_error(CameraNotOpenend); - } - - cv::Mat frame; - int i = 0; - while(camera_get_frame) { - std::cout << "input..." << std::endl; - cap.read(frame); - std::cout << frame.rows << std::endl; - if (frame.empty()) { - std::cout << "empty frame" << std::endl; - continue; - } - inferAsync(frame, "frame_" + std::to_string(i), false, false, true); - model->awaitAll(); - i++; - } -} - - void ImageInference::load_font(const char* font_path) { - auto font_success = ImageInference::face.createFromFile(font_path); - if (font_success != BL_SUCCESS) { - throw api_error(FontLoadError); - } - -} - -void ImageInference::serialize_model(const std::string& model_path, const std::string& output_path) { - std::unique_ptr model; - std::string device = "CPU"; //Loading is faster on CPU, and serialization is a small task - - std::string model_type = ""; - { - auto core = ov::Core(); - auto ov_model = core.read_model(model_path); - - auto config = ov_model->get_rt_info("model_info"); - auto model_type_iter = config.find("model_type"); - if (model_type_iter == config.end()) { - throw api_error(StatusEnum::ModelTypeNotSupplied); - } - model_type = model_type_iter->second.as(); - } - - switch(get_model_type(model_type)) { - case ModelType::Detection: - model = DetectionModel::create_model(model_path, {}, "", true, device); - break; - case ModelType::Classification: - model = ClassificationModel::create_model(model_path, {}, true, device); - break; - case ModelType::Segmentation: - model = SegmentationModel::create_model(model_path, {}, true, device); - break; - case ModelType::MaskRCNN: - model = MaskRCNNModel::create_model(model_path, {}, true, device); - break; - case ModelType::Anomaly: - model = AnomalyModel::create_model(model_path, {}, true, device); - break; - default: - throw std::runtime_error("Model type serialization not implemented"); - } - ov::serialize(model->getModel(), output_path); -} diff --git a/openvino_bindings/src/image/image_inference.h b/openvino_bindings/src/image/image_inference.h deleted file mode 100644 index 816e0e2f..00000000 --- a/openvino_bindings/src/image/image_inference.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2024 Intel Corporation - * - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef IMAGE_INFERENCE_H_ -#define IMAGE_INFERENCE_H_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "data_structures.h" -#include "src/image/overlay.h" -#include "src/utils/status.h" - - -enum class ModelType { - Detection, - Classification, - MaskRCNN, - Segmentation, - Anomaly, -}; - -ModelType get_model_type(const std::string& name); - -enum class TaskType { - Detection, - Classification, - RotatedDetection, - InstanceSegmentation, - Segmentation, - Anomaly, -}; - -TaskType get_task_type(const std::string& name); - -class ImageInference { -public: - std::optional empty_label; - TaskType task; - std::vector project_labels; // Labels with color coding etc. - static BLFontFace face; - - ImageInference(std::string model_path, TaskType task, std::string device); - - bool model_loaded(); - geti::InferenceResult infer(cv::Mat image); - void inferAsync(cv::Mat image, const std::string& id, bool json, bool csv, bool overlay); - geti::InferenceResult post_process(std::unique_ptr result, cv::Mat image); - void set_listener(const std::function callback); - nlohmann::json serialize(const geti::InferenceResult& inference_result, cv::Mat image, bool json, bool csv, bool overlay); - - void open_camera(int device); - void stop_camera(); - bool camera_get_frame = false; - - static void load_font(const char* font_path); - static void serialize_model(const std::string& model_path, const std::string& output_path); - - void close(); -private: - void start_camera(int device); - - ModelType model_type; - std::shared_ptr ia; - std::unique_ptr model; - std::vector labels; - geti::DrawOptions draw_options{2, 0.4, 1.0}; - std::thread camera_thread; - -}; - - -#endif // IMAGE_INFERENCE_H_ diff --git a/openvino_bindings/src/image/post_processing.h b/openvino_bindings/src/image/post_processing.h deleted file mode 100644 index ed7ab4a1..00000000 --- a/openvino_bindings/src/image/post_processing.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2024 Intel Corporation - * - * SPDX-License-Identifier: Apache-2.0 - */ - -#ifndef POST_PROCESSING_H_ -#define POST_PROCESSING_H_ - -#include "src/image/contourer.h" -#include "src/image/data_structures.h" -#include "src/utils/errors.h" -#include - -namespace geti { - -inline InferenceResult detection_post_processing(std::unique_ptr result_base, const std::vector