openvino: massive perf improvements via async api usage

koush · koush · commit 7ea849d357c9 · 2024-12-29T23:20:17.000-08:00
diff --git a/plugins/openvino/package-lock.json b/plugins/openvino/package-lock.json
diff --git a/plugins/openvino/package.json b/plugins/openvino/package.json
@@ -48,5 +48,5 @@
    "devDependencies": {
       "@scrypted/sdk": "file:../../sdk"
    },
-   "version": "0.1.147"
+   "version": "0.1.148"
 }
diff --git a/plugins/openvino/src/ov/__init__.py b/plugins/openvino/src/ov/__init__.py
@@ -25,8 +25,8 @@
 except:
     OpenVINOTextRecognition = None
 
-predictExecutor = concurrent.futures.ThreadPoolExecutor(1, "OpenVINO-Predict")
-prepareExecutor = concurrent.futures.ThreadPoolExecutor(1, "OpenVINO-Prepare")
+predictExecutor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix="OpenVINO-Predict")
+prepareExecutor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix="OpenVINO-Prepare")
 
 availableModels = [
     "Default",
@@ -48,10 +48,6 @@
     "scrypted_yolov9s_320",
     "scrypted_yolov9t_320",
     "scrypted_yolov8n_320",
-    "ssd_mobilenet_v1_coco",
-    "ssdlite_mobilenet_v2",
-    "yolo-v3-tiny-tf",
-    "yolo-v4-tiny-tf",
 ]
 
 
@@ -245,6 +241,15 @@ def __init__(self, nativeId: str | None = None, forked: bool = False):
                     self.storage.removeItem("precision")
                     self.requestRestart()
 
+        self.infer_queue = ov.AsyncInferQueue(self.compiled_model)
+        def callback(infer_request, future: asyncio.Future):
+            try:
+                output = infer_request.get_output_tensor(0)
+                self.loop.call_soon_threadsafe(future.set_result, output)
+            except Exception as e:
+                self.loop.call_soon_threadsafe(future.set_exception, e)
+        self.infer_queue.set_callback(callback)
+
         print(
             "EXECUTION_DEVICES",
             self.compiled_model.get_property("EXECUTION_DEVICES"),
@@ -318,68 +323,38 @@ def get_input_format(self):
         return super().get_input_format()
 
     async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
-        def predict(input_tensor):
-            infer_request = self.compiled_model.create_infer_request()
-            infer_request.set_input_tensor(input_tensor)
-            output_tensors = infer_request.infer()
+        async def predict(input_tensor):
+            f = asyncio.Future(loop = self.loop)
+            self.infer_queue.start_async(input_tensor, f)
 
-            objs = []
+            output_tensors = await f
 
-            if self.scrypted_yolo:
-                if self.scrypted_yolov10:
-                    return yolo.parse_yolov10(output_tensors[0][0])
-                if self.scrypted_yolo_nas:
-                    return yolo.parse_yolo_nas([output_tensors[1], output_tensors[0]])
-                return yolo.parse_yolov9(output_tensors[0][0])
-
-            if self.yolo:
-                # index 2 will always either be 13 or 26
-                # index 1 may be 13/26 or 255 depending on yolo 3 vs 4
-                if infer_request.outputs[0].data.shape[2] == 13:
-                    out_blob = infer_request.outputs[0]
-                else:
-                    out_blob = infer_request.outputs[1]
-
-                # 13 13
-                objects = yolo.parse_yolo_region(
-                    out_blob.data,
-                    (input.width, input.height),
-                    (81, 82, 135, 169, 344, 319),
-                    self.sigmoid,
-                )
+            if not self.yolo:
+                output = output_tensors
+                for values in output.data[0][0]:
+                    valid, index, confidence, l, t, r, b = values
+                    if valid == -1:
+                        break
 
-                for r in objects:
-                    obj = Prediction(
-                        r["classId"],
-                        r["confidence"],
-                        Rectangle(r["xmin"], r["ymin"], r["xmax"], r["ymax"]),
-                    )
-                    objs.append(obj)
+                    def torelative(value: float):
+                        return value * self.model_dim
 
-                # what about output[1]?
-                # 26 26
-                # objects = yolo.parse_yolo_region(out_blob, (input.width, input.height), (,27, 37,58, 81,82))
+                    l = torelative(l)
+                    t = torelative(t)
+                    r = torelative(r)
+                    b = torelative(b)
 
-                return objs
-
-            output = infer_request.get_output_tensor(0)
-            for values in output.data[0][0]:
-                valid, index, confidence, l, t, r, b = values
-                if valid == -1:
-                    break
-
-                def torelative(value: float):
-                    return value * self.model_dim
-
-                l = torelative(l)
-                t = torelative(t)
-                r = torelative(r)
-                b = torelative(b)
+                    obj = Prediction(index - 1, confidence, Rectangle(l, t, r, b))
+                    objs.append(obj)
 
-                obj = Prediction(index - 1, confidence, Rectangle(l, t, r, b))
-                objs.append(obj)
+                return objs
 
-            return objs
+            output = output_tensors.data
+            if self.scrypted_yolov10:
+                return yolo.parse_yolov10(output[0])
+            if self.scrypted_yolo_nas:
+                return yolo.parse_yolo_nas([output[1], output[0]])
+            return yolo.parse_yolov9(output[0])
 
         def prepare():
             # the input_tensor can be created with the shared_memory=True parameter,
@@ -414,9 +389,7 @@ def prepare():
             input_tensor = await asyncio.get_event_loop().run_in_executor(
                 prepareExecutor, lambda: prepare()
             )
-            objs = await asyncio.get_event_loop().run_in_executor(
-                predictExecutor, lambda: predict(input_tensor)
-            )
+            objs = await predict(input_tensor)
 
         except:
             traceback.print_exc()

Original file line number	Diff line number	Diff line change
`@@ -48,5 +48,5 @@`
`48`	`48`	`"devDependencies": {`
`49`	`49`	`"@scrypted/sdk": "file:../../sdk"`
`50`	`50`	`},`
`51`		`- "version": "0.1.147"`
	`51`	`+ "version": "0.1.148"`
`52`	`52`	`}`