openvinotoolkit · akuporos · Nov 15, 2021 · Oct 21, 2021 · Oct 26, 2021 · Oct 29, 2021
@@ -36,7 +36,7 @@
 from openvino.pyopenvino import DataPtr
 from openvino.pyopenvino import TensorDesc
 from openvino.pyopenvino import get_version
-#from openvino.pyopenvino import InferQueue
+from openvino.pyopenvino import AsyncInferQueue
 from openvino.pyopenvino import InferRequest  # TODO: move to ie_api?
 from openvino.pyopenvino import Blob
 from openvino.pyopenvino import PreProcessInfo
@@ -83,5 +83,5 @@
 # Patching InferRequest
 InferRequest.infer = infer
 InferRequest.start_async = start_async
-# Patching InferQueue
-#InferQueue.async_infer = async_infer
+# Patching AsyncInferQueue
+AsyncInferQueue.start_async = start_async
@@ -3,7 +3,7 @@
 
 import numpy as np
 import copy
-from typing import List
+from typing import List, Union
 
 from openvino.pyopenvino import TBlobFloat32
 from openvino.pyopenvino import TBlobFloat64
@@ -17,6 +17,7 @@
 from openvino.pyopenvino import TBlobUint8
 from openvino.pyopenvino import TensorDesc
 from openvino.pyopenvino import InferRequest
+from openvino.pyopenvino import AsyncInferQueue
 from openvino.pyopenvino import ExecutableNetwork
 from openvino.pyopenvino import Tensor
 
@@ -57,7 +58,7 @@ def infer_new_request(exec_net: ExecutableNetwork, inputs: dict = None) -> List[
     return [copy.deepcopy(tensor.data) for tensor in res]
 
 # flake8: noqa: D102
-def start_async(request: InferRequest, inputs: dict = {}, userdata: dict = None) -> None:  # type: ignore
+def start_async(request: Union[InferRequest, AsyncInferQueue], inputs: dict = {}, userdata: dict = None) -> None:  # type: ignore
     request._start_async(inputs=normalize_inputs(inputs), userdata=userdata)
 
 # flake8: noqa: C901

@@ -0,0 +1,205 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "pyopenvino/core/async_infer_queue.hpp"
+
+#include <ie_common.h>
+#include <pybind11/functional.h>
+#include <pybind11/stl.h>
+
+#include <chrono>
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <string>
+#include <vector>
+
+#include "pyopenvino/core/common.hpp"
+#include "pyopenvino/core/infer_request.hpp"
+
+namespace py = pybind11;
+
+class AsyncInferQueue {
+public:
+    AsyncInferQueue(std::vector<InferRequestWrapper> requests,
+                    std::queue<size_t> idle_handles,
+                    std::vector<py::object> user_ids)
+        : _requests(requests),
+          _idle_handles(idle_handles),
+          _user_ids(user_ids) {
+        this->set_default_callbacks();
+    }
+
+    ~AsyncInferQueue() {
+        _requests.clear();
+    }
+
+    bool _is_ready() {
+        py::gil_scoped_release release;
+        std::unique_lock<std::mutex> lock(_mutex);
+        _cv.wait(lock, [this] {
+            return !(_idle_handles.empty());
+        });
+
+        return !(_idle_handles.empty());
+    }
+
+    size_t get_idle_request_id() {
+        // Wait for any of _idle_handles
+        py::gil_scoped_release release;
+        std::unique_lock<std::mutex> lock(_mutex);
+        _cv.wait(lock, [this] {
+            return !(_idle_handles.empty());
+        });
+
+        return _idle_handles.front();
+        ;
+    }
+
+    void wait_all() {
+        // Wait for all requests to return with callback thus updating
+        // _idle_handles so it matches the size of requests
+        py::gil_scoped_release release;
+        std::unique_lock<std::mutex> lock(_mutex);
+        _cv.wait(lock, [this] {
+            return _idle_handles.size() == _requests.size();
+        });
+    }
+
+    void set_default_callbacks() {
+        for (size_t handle = 0; handle < _requests.size(); handle++) {
+            _requests[handle]._request.set_callback([this, handle /* ... */](std::exception_ptr exception_ptr) {
+                _requests[handle]._end_time = Time::now();
+                // Add idle handle to queue
+                _idle_handles.push(handle);
+                // Notify locks in getIdleRequestId() or waitAll() functions
+                _cv.notify_one();
+            });
+        }
+    }
+
+    void set_custom_callbacks(py::function f_callback) {
+        for (size_t handle = 0; handle < _requests.size(); handle++) {
+            _requests[handle]._request.set_callback([this, f_callback, handle](std::exception_ptr exception_ptr) {
+                _requests[handle]._end_time = Time::now();
+                try {
+                    if (exception_ptr) {
+                        std::rethrow_exception(exception_ptr);
+                    }
+                } catch (const std::exception& e) {
+                    throw ov::Exception(e.what());
+                }
+                // Acquire GIL, execute Python function
+                py::gil_scoped_acquire acquire;
+                f_callback(_requests[handle], _user_ids[handle]);
+                // Add idle handle to queue
+                _idle_handles.push(handle);
+                // Notify locks in getIdleRequestId() or waitAll() functions
+                _cv.notify_one();
+            });
+        }
+    }
+
+    std::vector<InferRequestWrapper> _requests;
+    std::queue<size_t> _idle_handles;
+    std::vector<py::object> _user_ids;  // user ID can be any Python object
+    std::mutex _mutex;
+    std::condition_variable _cv;
+};
+
+void regclass_AsyncInferQueue(py::module m) {
+    py::class_<AsyncInferQueue, std::shared_ptr<AsyncInferQueue>> cls(m, "AsyncInferQueue");
+
+    cls.def(py::init([](ov::runtime::ExecutableNetwork& net, size_t jobs) {
+                if (jobs == 0) {
+                    jobs = (size_t)Common::get_optimal_number_of_requests(net);
+                }
+
+                std::vector<InferRequestWrapper> requests;
+                std::queue<size_t> idle_handles;
+                std::vector<py::object> user_ids(jobs);
+
+                for (size_t handle = 0; handle < jobs; handle++) {
+                    auto request = InferRequestWrapper(net.create_infer_request());
+                    // Get Inputs and Outputs info from executable network
+                    request._inputs = net.inputs();
+                    request._outputs = net.outputs();
+
+                    requests.push_back(request);
+                    idle_handles.push(handle);
+                }
+
+                return new AsyncInferQueue(requests, idle_handles, user_ids);
+            }),
+            py::arg("network"),
+            py::arg("jobs") = 0);
+
+    cls.def(
+        "_start_async",
+        [](AsyncInferQueue& self, const py::dict inputs, py::object userdata) {
+            // getIdleRequestId function has an intention to block InferQueue
+            // until there is at least one idle (free to use) InferRequest
+            auto handle = self.get_idle_request_id();
+            self._idle_handles.pop();
+            // Set new inputs label/id from user
+            self._user_ids[handle] = userdata;
+            // Update inputs if there are any
+            if (!inputs.empty()) {
+                if (py::isinstance<std::string>(inputs.begin()->first)) {
+                    auto inputs_map = Common::cast_to_tensor_name_map(inputs);
+                    for (auto&& input : inputs_map) {
+                        self._requests[handle]._request.set_tensor(input.first, input.second);
+                    }
+                } else if (py::isinstance<int>(inputs.begin()->first)) {
+                    auto inputs_map = Common::cast_to_tensor_index_map(inputs);
+                    for (auto&& input : inputs_map) {
+                        self._requests[handle]._request.set_input_tensor(input.first, input.second);
+                    }
+                }
+            }
+            // Now GIL can be released - we are NOT working with Python objects in this block
+            {
+                py::gil_scoped_release release;
+                self._requests[handle]._start_time = Time::now();
+                // Start InferRequest in asynchronus mode
+                self._requests[handle]._request.start_async();
+            }
+        },
+        py::arg("inputs"),
+        py::arg("userdata"));
+
+    cls.def("is_ready", [](AsyncInferQueue& self) {
+        return self._is_ready();
+    });
+
+    cls.def("wait_all", [](AsyncInferQueue& self) {
+        return self.wait_all();
+    });
+
+    cls.def("get_idle_request_id", [](AsyncInferQueue& self) {
+        return self.get_idle_request_id();
+    });
+
+    cls.def("set_callback", [](AsyncInferQueue& self, py::function f_callback) {
+        self.set_custom_callbacks(f_callback);
+    });
+
+    cls.def("__len__", [](AsyncInferQueue& self) {
+        return self._requests.size();
+    });
+
+    cls.def(
+        "__iter__",
+        [](AsyncInferQueue& self) {
+            return py::make_iterator(self._requests.begin(), self._requests.end());
+        },
+        py::keep_alive<0, 1>()); /* Keep set alive while iterator is used */
+
+    cls.def("__getitem__", [](AsyncInferQueue& self, size_t i) {
+        return self._requests[i];
+    });
+
+    cls.def_property_readonly("userdata", [](AsyncInferQueue& self) {
+        return self._user_ids;
+    });
+}
@@ -7,4 +7,4 @@
 
 namespace py = pybind11;
 
-void regclass_InferQueue(py::module m);
+void regclass_AsyncInferQueue(py::module m);
@@ -321,13 +321,13 @@ void set_request_blobs(InferenceEngine::InferRequest& request, const py::dict& d
     }
 }
 
-uint32_t get_optimal_number_of_requests(const InferenceEngine::ExecutableNetwork& actual) {
+uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& actual) {
     try {
-        auto parameter_value = actual.GetMetric(METRIC_KEY(SUPPORTED_METRICS));
+        auto parameter_value = actual.get_metric(METRIC_KEY(SUPPORTED_METRICS));
         auto supported_metrics = parameter_value.as<std::vector<std::string>>();
         const std::string key = METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS);
         if (std::find(supported_metrics.begin(), supported_metrics.end(), key) != supported_metrics.end()) {
-            parameter_value = actual.GetMetric(key);
+            parameter_value = actual.get_metric(key);
             if (parameter_value.is<unsigned int>())
                 return parameter_value.as<unsigned int>();
             else

@@ -15,6 +15,7 @@
 #include "Python.h"
 #include "ie_common.h"
 #include "openvino/runtime/tensor.hpp"
+#include "openvino/runtime/executable_network.hpp"
 #include "pyopenvino/core/containers.hpp"
 
 namespace py = pybind11;
@@ -60,5 +61,5 @@ namespace Common
 
     void set_request_blobs(InferenceEngine::InferRequest& request, const py::dict& dictonary);
 
-    uint32_t get_optimal_number_of_requests(const InferenceEngine::ExecutableNetwork& actual);
+    uint32_t get_optimal_number_of_requests(const ov::runtime::ExecutableNetwork& actual);
 }; // namespace Common
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,4 +7,4 @@

		namespace py = pybind11;

		void regclass_InferQueue(py::module m);
		void regclass_AsyncInferQueue(py::module m);