Skip to content

Commit 5b3fd9d

Browse files
authored
[Backend] Support Intel GPU with heterogeneous mode (#701)
* Add some comments for python api * support openvino gpu * Add cpu operators * add interface to specify hetero operators * remove useless dir * format code * remove debug code * Support GPU for ONNX
1 parent ad5c9c0 commit 5b3fd9d

File tree

7 files changed

+156
-44
lines changed

7 files changed

+156
-44
lines changed

.devcontainer/devcontainer.json

Lines changed: 0 additions & 8 deletions
This file was deleted.

fastdeploy/backends/openvino/ov_backend.cc

Lines changed: 92 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ std::vector<int64_t> PartialShapeToVec(const ov::PartialShape& shape) {
3232
return res;
3333
}
3434

35+
ov::PartialShape VecToPartialShape(const std::vector<int64_t>& shape) {
36+
std::vector<ov::Dimension> dims;
37+
for (size_t i = 0; i < shape.size(); ++i) {
38+
dims.emplace_back(ov::Dimension(shape[i]));
39+
}
40+
return ov::PartialShape(dims);
41+
}
42+
3543
FDDataType OpenVINODataTypeToFD(const ov::element::Type& type) {
3644
if (type == ov::element::f32) {
3745
return FDDataType::FP32;
@@ -100,6 +108,26 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
100108
option_ = option;
101109

102110
std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);
111+
if (option_.shape_infos.size() > 0) {
112+
std::map<std::string, ov::PartialShape> shape_infos;
113+
for (const auto& item : option_.shape_infos) {
114+
shape_infos[item.first] = VecToPartialShape(item.second);
115+
}
116+
model->reshape(shape_infos);
117+
}
118+
119+
if (option_.device.find("HETERO") != std::string::npos) {
120+
auto supported_ops = core_.query_model(model, option_.device);
121+
for (auto&& op : model->get_ops()) {
122+
auto& affinity = supported_ops[op->get_friendly_name()];
123+
if (option_.cpu_operators.find(op->description()) !=
124+
option_.cpu_operators.end()) {
125+
op->get_rt_info()["affinity"] = "CPU";
126+
} else {
127+
op->get_rt_info()["affinity"] = affinity;
128+
}
129+
}
130+
}
103131

104132
// Get inputs/outputs information from loaded model
105133
const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
@@ -151,14 +179,25 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
151179
if (option_.cpu_thread_num > 0) {
152180
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
153181
}
154-
if (option_.ov_num_streams == -1) {
155-
properties["NUM_STREAMS"] = ov::streams::AUTO;
156-
} else if (option_.ov_num_streams == -2) {
157-
properties["NUM_STREAMS"] = ov::streams::NUMA;
158-
} else if (option_.ov_num_streams > 0) {
159-
properties["NUM_STREAMS"] = option_.ov_num_streams;
182+
if (option_.device == "CPU") {
183+
if (option_.num_streams == -1) {
184+
properties["NUM_STREAMS"] = ov::streams::AUTO;
185+
} else if (option_.num_streams == -2) {
186+
properties["NUM_STREAMS"] = ov::streams::NUMA;
187+
} else if (option_.num_streams > 0) {
188+
properties["NUM_STREAMS"] = option_.num_streams;
189+
}
190+
} else {
191+
if (option_.num_streams != 0) {
192+
FDWARNING << "NUM_STREAMS only available on device CPU, currently the "
193+
"device is set as "
194+
<< option_.device << ", the NUM_STREAMS will be ignored."
195+
<< std::endl;
196+
}
160197
}
161-
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." << std::endl;
198+
199+
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "."
200+
<< std::endl;
162201
compiled_model_ = core_.compile_model(model, option.device, properties);
163202

164203
request_ = compiled_model_.create_infer_request();
@@ -199,6 +238,27 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
199238

200239
std::shared_ptr<ov::Model> model = core_.read_model(model_file);
201240

241+
if (option_.shape_infos.size() > 0) {
242+
std::map<std::string, ov::PartialShape> shape_infos;
243+
for (const auto& item : option_.shape_infos) {
244+
shape_infos[item.first] = VecToPartialShape(item.second);
245+
}
246+
model->reshape(shape_infos);
247+
}
248+
249+
if (option_.device.find("HETERO") != std::string::npos) {
250+
auto supported_ops = core_.query_model(model, option_.device);
251+
for (auto&& op : model->get_ops()) {
252+
auto& affinity = supported_ops[op->get_friendly_name()];
253+
if (option_.cpu_operators.find(op->description()) !=
254+
option_.cpu_operators.end()) {
255+
op->get_rt_info()["affinity"] = "CPU";
256+
} else {
257+
op->get_rt_info()["affinity"] = affinity;
258+
}
259+
}
260+
}
261+
202262
// Get inputs/outputs information from loaded model
203263
const std::vector<ov::Output<ov::Node>> inputs = model->inputs();
204264
std::map<std::string, TensorInfo> input_infos;
@@ -249,18 +309,29 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
249309
if (option_.cpu_thread_num > 0) {
250310
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
251311
}
252-
if (option_.ov_num_streams == -1) {
253-
properties["NUM_STREAMS"] = ov::streams::AUTO;
254-
} else if (option_.ov_num_streams == -2) {
255-
properties["NUM_STREAMS"] = ov::streams::NUMA;
256-
} else if (option_.ov_num_streams > 0) {
257-
properties["NUM_STREAMS"] = option_.ov_num_streams;
312+
if (option_.device == "CPU") {
313+
if (option_.num_streams == -1) {
314+
properties["NUM_STREAMS"] = ov::streams::AUTO;
315+
} else if (option_.num_streams == -2) {
316+
properties["NUM_STREAMS"] = ov::streams::NUMA;
317+
} else if (option_.num_streams > 0) {
318+
properties["NUM_STREAMS"] = option_.num_streams;
319+
}
320+
} else {
321+
if (option_.num_streams != 0) {
322+
FDWARNING << "NUM_STREAMS only available on device CPU, currently the "
323+
"device is set as "
324+
<< option_.device << ", the NUM_STREAMS will be ignored."
325+
<< std::endl;
326+
}
258327
}
259-
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "." << std::endl;
328+
329+
FDINFO << "Compile OpenVINO model on device_name:" << option.device << "."
330+
<< std::endl;
260331
compiled_model_ = core_.compile_model(model, option.device, properties);
261332

262333
request_ = compiled_model_.create_infer_request();
263-
334+
264335
initialized_ = true;
265336
return true;
266337
}
@@ -302,13 +373,16 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
302373
return true;
303374
}
304375

305-
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void *stream, int device_id) {
306-
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<OpenVINOBackend>();
376+
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void* stream,
377+
int device_id) {
378+
std::unique_ptr<BaseBackend> new_backend =
379+
utils::make_unique<OpenVINOBackend>();
307380
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());
308381
casted_backend->option_ = option_;
309382
casted_backend->request_ = compiled_model_.create_infer_request();
310383
casted_backend->input_infos_.assign(input_infos_.begin(), input_infos_.end());
311-
casted_backend->output_infos_.assign(output_infos_.begin(), output_infos_.end());
384+
casted_backend->output_infos_.assign(output_infos_.begin(),
385+
output_infos_.end());
312386
return new_backend;
313387
}
314388

fastdeploy/backends/openvino/ov_backend.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ namespace fastdeploy {
2828
struct OpenVINOBackendOption {
2929
std::string device = "CPU";
3030
int cpu_thread_num = -1;
31-
int ov_num_streams = 1;
31+
int num_streams = 0;
3232
std::map<std::string, std::vector<int64_t>> shape_infos;
33+
std::set<std::string> cpu_operators{"MulticlassNms"};
3334
};
3435

3536
class OpenVINOBackend : public BaseBackend {
@@ -38,13 +39,13 @@ class OpenVINOBackend : public BaseBackend {
3839
OpenVINOBackend() {}
3940
virtual ~OpenVINOBackend() = default;
4041

41-
bool InitFromPaddle(
42-
const std::string& model_file, const std::string& params_file,
43-
const OpenVINOBackendOption& option = OpenVINOBackendOption());
42+
bool
43+
InitFromPaddle(const std::string& model_file, const std::string& params_file,
44+
const OpenVINOBackendOption& option = OpenVINOBackendOption());
4445

45-
bool InitFromOnnx(
46-
const std::string& model_file,
47-
const OpenVINOBackendOption& option = OpenVINOBackendOption());
46+
bool
47+
InitFromOnnx(const std::string& model_file,
48+
const OpenVINOBackendOption& option = OpenVINOBackendOption());
4849

4950
bool Infer(std::vector<FDTensor>& inputs,
5051
std::vector<FDTensor>* outputs) override;
@@ -58,7 +59,7 @@ class OpenVINOBackend : public BaseBackend {
5859
std::vector<TensorInfo> GetInputInfos() override;
5960
std::vector<TensorInfo> GetOutputInfos() override;
6061

61-
std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
62+
std::unique_ptr<BaseBackend> Clone(void* stream = nullptr,
6263
int device_id = -1) override;
6364

6465
private:
@@ -71,4 +72,5 @@ class OpenVINOBackend : public BaseBackend {
7172
std::vector<TensorInfo> input_infos_;
7273
std::vector<TensorInfo> output_infos_;
7374
};
75+
7476
} // namespace fastdeploy

fastdeploy/pybind/runtime.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ void BindRuntime(pybind11::module& m) {
3434
.def("use_lite_backend", &RuntimeOption::UseLiteBackend)
3535
.def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
3636
.def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
37+
.def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
38+
.def("set_openvino_cpu_operators", &RuntimeOption::SetOpenVINOCpuOperators)
3739
.def("enable_paddle_log_info", &RuntimeOption::EnablePaddleLogInfo)
3840
.def("disable_paddle_log_info", &RuntimeOption::DisablePaddleLogInfo)
3941
.def("set_paddle_mkldnn_cache_size",

fastdeploy/runtime.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,11 @@ void Runtime::CreateOpenVINOBackend() {
646646
auto ov_option = OpenVINOBackendOption();
647647
ov_option.cpu_thread_num = option.cpu_thread_num;
648648
ov_option.device = option.openvino_device;
649-
ov_option.ov_num_streams = option.ov_num_streams;
649+
ov_option.shape_infos = option.ov_shape_infos;
650+
ov_option.num_streams = option.ov_num_streams;
651+
for (const auto& op : option.ov_cpu_operators) {
652+
ov_option.cpu_operators.insert(op);
653+
}
650654
FDASSERT(option.model_format == ModelFormat::PADDLE ||
651655
option.model_format == ModelFormat::ONNX,
652656
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "

fastdeploy/runtime.h

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,22 @@ struct FASTDEPLOY_DECL RuntimeOption {
171171
/**
172172
* @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
173173
*/
174-
void SetOpenVINODevice(const std::string& name = "CPU");
174+
void SetOpenVINODevice(const std::string& name = "CPU");
175+
176+
/**
177+
* @brief Set shape info for OpenVINO
178+
*/
179+
void SetOpenVINOShapeInfo(
180+
const std::map<std::string, std::vector<int64_t>>& shape_info) {
181+
ov_shape_infos = shape_info;
182+
}
183+
184+
/**
185+
* @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU
186+
*/
187+
void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
188+
ov_cpu_operators = operators;
189+
}
175190

176191
/**
177192
* @brief Set optimzed model dir for Paddle Lite backend.
@@ -349,9 +364,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
349364
size_t trt_max_batch_size = 32;
350365
size_t trt_max_workspace_size = 1 << 30;
351366

352-
// ======Only for OpenVINO Backend======
353-
std::string openvino_device = "CPU";
354-
355367
// ======Only for Poros Backend=======
356368
bool is_dynamic = false;
357369
bool long_to_int = true;
@@ -360,7 +372,10 @@ struct FASTDEPLOY_DECL RuntimeOption {
360372
std::string poros_file = "";
361373

362374
// ======Only for OpenVINO Backend=======
363-
int ov_num_streams = 1;
375+
int ov_num_streams = 0;
376+
std::string openvino_device = "CPU";
377+
std::map<std::string, std::vector<int64_t>> ov_shape_infos;
378+
std::vector<std::string> ov_cpu_operators;
364379

365380
// ======Only for RKNPU2 Backend=======
366381
fastdeploy::rknpu2::CpuName rknpu2_cpu_name_

python/fastdeploy/runtime.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def __init__(self, runtime_option):
3535
self.runtime_option._option), "Initialize Runtime Failed!"
3636

3737
def forward(self, *inputs):
38-
"""Inference with input data for poros
38+
"""[Only for Poros backend] Inference with input data for poros
3939
4040
:param data: (list[str : numpy.ndarray])The input data list
4141
:return list of numpy.ndarray
@@ -60,7 +60,7 @@ def infer(self, data):
6060
return self._runtime.infer(data)
6161

6262
def compile(self, warm_datas):
63-
"""compile with prewarm data for poros
63+
"""[Only for Poros backend] compile with prewarm data for poros
6464
6565
:param data: (list[str : numpy.ndarray])The prewarm data list
6666
:return TorchScript Model
@@ -122,6 +122,9 @@ class RuntimeOption:
122122
"""
123123

124124
def __init__(self):
125+
"""Initialize a FastDeploy RuntimeOption object.
126+
"""
127+
125128
self._option = C.RuntimeOption()
126129

127130
@property
@@ -210,8 +213,6 @@ def use_cpu(self):
210213
def use_rknpu2(self,
211214
rknpu2_name=rknpu2.CpuName.RK3588,
212215
rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0):
213-
"""Inference with CPU
214-
"""
215216
return self._option.use_rknpu2(rknpu2_name, rknpu2_core)
216217

217218
def set_cpu_thread_num(self, thread_num=-1):
@@ -222,6 +223,10 @@ def set_cpu_thread_num(self, thread_num=-1):
222223
return self._option.set_cpu_thread_num(thread_num)
223224

224225
def set_ort_graph_opt_level(self, level=-1):
226+
"""Set graph optimization level for ONNX Runtime backend
227+
228+
:param level: (int)Optimization level, -1 means the default setting
229+
"""
225230
return self._option.set_ort_graph_opt_level(level)
226231

227232
def use_paddle_backend(self):
@@ -274,6 +279,20 @@ def set_openvino_device(self, name="CPU"):
274279
"""
275280
return self._option.set_openvino_device(name)
276281

282+
def set_openvino_shape_info(self, shape_info):
283+
"""Set shape information of the models' inputs, used for GPU to fix the shape
284+
285+
:param shape_info: (dict{str, list of int})Shape information of model's inputs, e.g {"image": [1, 3, 640, 640], "scale_factor": [1, 2]}
286+
"""
287+
return self._option.set_openvino_shape_info(shape_info)
288+
289+
def set_openvino_cpu_operators(self, operators):
290+
"""While using OpenVINO backend and intel GPU, this interface specifies unsupported operators to run on CPU
291+
292+
:param operators: (list of string)list of operators' name, e.g ["MulticlasNms"]
293+
"""
294+
return self._option.set_openvino_cpu_operators(operators)
295+
277296
def enable_paddle_log_info(self):
278297
"""Enable print out the debug log information while using Paddle Inference backend, the log information is disabled by default.
279298
"""
@@ -367,9 +386,13 @@ def set_trt_max_batch_size(self, trt_max_batch_size):
367386
return self._option.set_trt_max_batch_size(trt_max_batch_size)
368387

369388
def enable_paddle_trt_collect_shape(self):
389+
"""Enable collect subgraph shape information while using Paddle Inference with TensorRT
390+
"""
370391
return self._option.enable_paddle_trt_collect_shape()
371392

372393
def disable_paddle_trt_collect_shape(self):
394+
"""Disable collect subgraph shape information while using Paddle Inference with TensorRT
395+
"""
373396
return self._option.disable_paddle_trt_collect_shape()
374397

375398
def use_ipu(self,

0 commit comments

Comments
 (0)