Skip to content

Commit d8e29ad

Browse files
mv function implement to cpp
1 parent 99937ef commit d8e29ad

File tree

2 files changed

+306
-270
lines changed

2 files changed

+306
-270
lines changed

ggml-sycl/common.cpp

Lines changed: 282 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,4 +213,285 @@ static ggml_sycl_device_info ggml_sycl_init() try {
213213
ggml_sycl_device_info &ggml_sycl_info() {
214214
static ggml_sycl_device_info info = ggml_sycl_init();
215215
return info;
216-
}
216+
}
217+
218+
//--sycl_device_mgr--
219+
220+
sycl_device_mgr::sycl_device_mgr(
221+
ggml_sycl_backend_device_filter device_filter) {
222+
switch (device_filter) {
223+
case SYCL_DEVICES_TOP_LEVEL_ZERO:
224+
detect_sycl_gpu_list_with_max_cu();
225+
create_context_for_group_gpus();
226+
break;
227+
case SYCL_ALL_DEVICES:
228+
detect_all_sycl_device_list();
229+
create_context_for_devices();
230+
break;
231+
case SYCL_VISIBLE_DEVICES:
232+
detect_sycl_visible_device_list();
233+
create_context_for_devices();
234+
break;
235+
default:
236+
std::cerr << "sycl_device_mgr: Invalid device_filter " << device_filter
237+
<< std::endl;
238+
}
239+
init_allow_devices();
240+
}
241+
242+
/*
243+
Bind all gpus in same host with same context, for better performance in
244+
device-to-device copy in the future.
245+
*/
246+
void sycl_device_mgr::create_context_for_group_gpus() {
247+
sycl::context ctx = sycl::context(devices);
248+
assert(device_ids.size() > 0);
249+
first_queue = dpct::get_current_device().create_queue(ctx, devices[0]);
250+
sycl::context ctx0 = first_queue->get_context();
251+
for (int i = 0; i < device_ids.size(); i++) {
252+
ctxs.push_back(ctx0);
253+
}
254+
}
255+
256+
sycl::queue *sycl_device_mgr::create_queue_for_device(sycl::context &ctx,
257+
sycl::device &device) {
258+
dpct::select_device(dpct::dev_mgr::instance().get_device_id(device));
259+
auto res = dpct::get_current_device().create_queue(ctx, device);
260+
return res;
261+
}
262+
263+
sycl::queue *sycl_device_mgr::create_queue_for_device_id(int device_id) {
264+
int i = get_device_index(device_id);
265+
sycl::context ctx = ctxs[i];
266+
sycl::device device = dpct::dev_mgr::instance().get_device(device_id);
267+
;
268+
return create_queue_for_device(ctx, device);
269+
}
270+
271+
int sycl_device_mgr::get_device_index(int device_id) {
272+
for (int i = 0; i < device_ids.size(); i++) {
273+
if (device_ids[i] == device_id)
274+
return i;
275+
}
276+
return -1;
277+
}
278+
279+
void sycl_device_mgr::create_context_for_devices() {
280+
for (int i = 0; i < device_ids.size(); i++) {
281+
sycl::context ctx = sycl::context(devices[i]);
282+
ctxs.push_back(ctx);
283+
}
284+
}
285+
286+
void sycl_device_mgr::init_allow_devices() {
287+
device_list = "";
288+
for (size_t i = 0; i < device_ids.size(); ++i) {
289+
device_list += std::to_string(device_ids[i]);
290+
device_list += ",";
291+
}
292+
if (device_list.length() > 1) {
293+
device_list.pop_back();
294+
}
295+
}
296+
297+
bool sycl_device_mgr::is_allowed_device(int device_id) {
298+
return std::find(device_ids.begin(), device_ids.end(), device_id) !=
299+
device_ids.end();
300+
}
301+
302+
void sycl_device_mgr::detect_all_sycl_device_list() try {
303+
int device_count = dpct::dev_mgr::instance().device_count();
304+
305+
for (int id = 0; id < device_count; id++) {
306+
sycl::device device = dpct::dev_mgr::instance().get_device(id);
307+
device_ids.push_back(id);
308+
devices.push_back(device);
309+
dpct::device_info prop;
310+
dpct::get_device_info(prop, device);
311+
work_group_sizes.push_back(prop.get_max_work_group_size());
312+
max_compute_units.push_back(prop.get_max_compute_units());
313+
}
314+
return;
315+
} catch (sycl::exception const &exc) {
316+
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
317+
<< ", line:" << __LINE__ << std::endl;
318+
std::exit(1);
319+
}
320+
321+
void sycl_device_mgr::detect_sycl_visible_device_list() try {
322+
std::vector<int> sycl_devices = get_sycl_visible_devices();
323+
int device_count = dpct::dev_mgr::instance().device_count();
324+
325+
for (int i = 0; i < sycl_devices.size(); i++) {
326+
int id = sycl_devices[i];
327+
if (id >= device_count) {
328+
std::cerr << __func__ << ": invalid device_id:" << id
329+
<< " from GGML_SYCL_VISIBLE_DEVICES="
330+
<< getenv("GGML_SYCL_VISIBLE_DEVICES")
331+
<< ", available IDs: ";
332+
if (device_count > 1) {
333+
std::cerr << "[0, " << device_count - 1 << "]";
334+
} else if (device_count == 1) {
335+
std::cerr << "[0]";
336+
} else {
337+
std::cerr << "[]";
338+
}
339+
std::cerr << std::endl;
340+
}
341+
sycl::device device = dpct::dev_mgr::instance().get_device(id);
342+
device_ids.push_back(id);
343+
devices.push_back(device);
344+
dpct::device_info prop;
345+
dpct::get_device_info(prop, device);
346+
work_group_sizes.push_back(prop.get_max_work_group_size());
347+
max_compute_units.push_back(prop.get_max_compute_units());
348+
}
349+
return;
350+
} catch (sycl::exception const &exc) {
351+
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
352+
<< ", line:" << __LINE__ << std::endl;
353+
std::exit(1);
354+
}
355+
356+
/*
357+
Use all GPUs with same top max compute units
358+
*/
359+
void sycl_device_mgr::detect_sycl_gpu_list_with_max_cu() try {
360+
int device_count = dpct::dev_mgr::instance().device_count();
361+
int local_max_compute_units = 0;
362+
for (int id = 0; id < device_count; id++) {
363+
sycl::device device = dpct::dev_mgr::instance().get_device(id);
364+
if (!device.is_gpu())
365+
continue;
366+
dpct::device_info prop;
367+
dpct::get_device_info(prop, device);
368+
if (local_max_compute_units < prop.get_max_compute_units())
369+
local_max_compute_units = prop.get_max_compute_units();
370+
}
371+
372+
for (int id = 0; id < device_count; id++) {
373+
sycl::device device = dpct::dev_mgr::instance().get_device(id);
374+
if (!device.is_gpu())
375+
continue;
376+
dpct::device_info prop;
377+
dpct::get_device_info(prop, device);
378+
if (local_max_compute_units == prop.get_max_compute_units() &&
379+
is_ext_oneapi_device(device)) {
380+
device_ids.push_back(id);
381+
devices.push_back(device);
382+
work_group_sizes.push_back(prop.get_max_work_group_size());
383+
max_compute_units.push_back(prop.get_max_compute_units());
384+
}
385+
}
386+
return;
387+
} catch (sycl::exception const &exc) {
388+
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
389+
<< ", line:" << __LINE__ << std::endl;
390+
std::exit(1);
391+
}
392+
393+
int sycl_device_mgr::get_device_count() { return (int)device_ids.size(); }
394+
395+
bool sycl_device_mgr::is_ext_oneapi_device(const sycl::device &dev) {
396+
sycl::backend dev_backend = dev.get_backend();
397+
if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
398+
dev_backend == sycl::backend::ext_oneapi_cuda ||
399+
dev_backend == sycl::backend::ext_oneapi_hip)
400+
return true;
401+
return false;
402+
}
403+
//--sycl_device_mgr--
404+
405+
//--ggml_sycl_device_info--
406+
void ggml_sycl_device_info::print_gpu_device_list() {
407+
GGML_ASSERT(device_mgr);
408+
409+
char *hint = NULL;
410+
if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
411+
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
412+
"GGML_SYCL_VISIBLE_DEVICES=%s\n";
413+
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
414+
getenv("ONEAPI_DEVICE_SELECTOR"),
415+
getenv("GGML_SYCL_VISIBLE_DEVICES"));
416+
} else if (oneapi_device_selector_existed) {
417+
hint = "detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n";
418+
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
419+
getenv("ONEAPI_DEVICE_SELECTOR"));
420+
} else if (sycl_visible_devices_existed) {
421+
hint = "detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n";
422+
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
423+
getenv("GGML_SYCL_VISIBLE_DEVICES"));
424+
} else {
425+
hint = "detect %d SYCL level-zero GPUs:[%s] with top Max compute "
426+
"units:%d, to use any SYCL devices, set/export "
427+
"GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n";
428+
fprintf(stderr, hint, device_mgr->get_device_count(), devices_list(),
429+
device_mgr->max_compute_units[0]);
430+
}
431+
}
432+
433+
int ggml_sycl_device_info::work_group_size(int device_id) {
434+
GGML_ASSERT(device_mgr);
435+
return device_mgr->work_group_sizes[device_id];
436+
}
437+
438+
void ggml_sycl_device_info::refresh_device() {
439+
oneapi_device_selector_existed = env_existed("ONEAPI_DEVICE_SELECTOR");
440+
sycl_visible_devices_existed = env_existed("GGML_SYCL_VISIBLE_DEVICES");
441+
if (!device_mgr)
442+
delete device_mgr;
443+
444+
if (sycl_visible_devices_existed) {
445+
device_mgr = new sycl_device_mgr(SYCL_VISIBLE_DEVICES);
446+
} else if (oneapi_device_selector_existed) {
447+
device_mgr = new sycl_device_mgr(SYCL_ALL_DEVICES);
448+
} else {
449+
device_mgr = new sycl_device_mgr(SYCL_DEVICES_TOP_LEVEL_ZERO);
450+
}
451+
452+
device_count = device_mgr->get_device_count();
453+
454+
int64_t total_vram = 0;
455+
456+
for (int i = 0; i < device_count; ++i) {
457+
int id = get_device_id(i);
458+
devices[id].vmm = 0;
459+
dpct::device_info prop;
460+
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
461+
prop, dpct::dev_mgr::instance().get_device(id))));
462+
463+
default_tensor_split[i] =
464+
total_vram; // continue data, so use device index
465+
total_vram += prop.get_global_mem_size();
466+
467+
devices[id].cc =
468+
100 * prop.get_major_version() + 10 * prop.get_minor_version();
469+
}
470+
471+
for (int i = 0; i < device_count; ++i) {
472+
default_tensor_split[i] /=
473+
total_vram; // continue data, so use device index
474+
}
475+
476+
print_gpu_device_list();
477+
}
478+
479+
bool ggml_sycl_device_info::is_allowed_device(int device_id) {
480+
return device_mgr->is_allowed_device(device_id);
481+
}
482+
483+
const char *ggml_sycl_device_info::devices_list() {
484+
return device_mgr->device_list.c_str();
485+
}
486+
487+
int ggml_sycl_device_info::get_device_id(int device_index) {
488+
if (device_index < device_mgr->device_ids.size()) {
489+
return device_mgr->device_ids.at(device_index);
490+
} else {
491+
std::cerr << __func__ << ":SYCL device:" << device_index
492+
<< " is out of range:[" << devices_list() << "]" << std::endl;
493+
std::exit(1);
494+
}
495+
}
496+
497+
//--ggml_sycl_device_info--

0 commit comments

Comments
 (0)