@@ -213,4 +213,285 @@ static ggml_sycl_device_info ggml_sycl_init() try {
213
213
ggml_sycl_device_info &ggml_sycl_info () {
214
214
static ggml_sycl_device_info info = ggml_sycl_init ();
215
215
return info;
216
- }
216
+ }
217
+
218
+ // --sycl_device_mgr--
219
+
220
+ sycl_device_mgr::sycl_device_mgr (
221
+ ggml_sycl_backend_device_filter device_filter) {
222
+ switch (device_filter) {
223
+ case SYCL_DEVICES_TOP_LEVEL_ZERO:
224
+ detect_sycl_gpu_list_with_max_cu ();
225
+ create_context_for_group_gpus ();
226
+ break ;
227
+ case SYCL_ALL_DEVICES:
228
+ detect_all_sycl_device_list ();
229
+ create_context_for_devices ();
230
+ break ;
231
+ case SYCL_VISIBLE_DEVICES:
232
+ detect_sycl_visible_device_list ();
233
+ create_context_for_devices ();
234
+ break ;
235
+ default :
236
+ std::cerr << " sycl_device_mgr: Invalid device_filter " << device_filter
237
+ << std::endl;
238
+ }
239
+ init_allow_devices ();
240
+ }
241
+
242
+ /*
243
+ Bind all gpus in same host with same context, for better performance in
244
+ device-to-device copy in the future.
245
+ */
246
+ void sycl_device_mgr::create_context_for_group_gpus () {
247
+ sycl::context ctx = sycl::context (devices);
248
+ assert (device_ids.size () > 0 );
249
+ first_queue = dpct::get_current_device ().create_queue (ctx, devices[0 ]);
250
+ sycl::context ctx0 = first_queue->get_context ();
251
+ for (int i = 0 ; i < device_ids.size (); i++) {
252
+ ctxs.push_back (ctx0);
253
+ }
254
+ }
255
+
256
+ sycl::queue *sycl_device_mgr::create_queue_for_device (sycl::context &ctx,
257
+ sycl::device &device) {
258
+ dpct::select_device (dpct::dev_mgr::instance ().get_device_id (device));
259
+ auto res = dpct::get_current_device ().create_queue (ctx, device);
260
+ return res;
261
+ }
262
+
263
+ sycl::queue *sycl_device_mgr::create_queue_for_device_id (int device_id) {
264
+ int i = get_device_index (device_id);
265
+ sycl::context ctx = ctxs[i];
266
+ sycl::device device = dpct::dev_mgr::instance ().get_device (device_id);
267
+ ;
268
+ return create_queue_for_device (ctx, device);
269
+ }
270
+
271
+ int sycl_device_mgr::get_device_index (int device_id) {
272
+ for (int i = 0 ; i < device_ids.size (); i++) {
273
+ if (device_ids[i] == device_id)
274
+ return i;
275
+ }
276
+ return -1 ;
277
+ }
278
+
279
+ void sycl_device_mgr::create_context_for_devices () {
280
+ for (int i = 0 ; i < device_ids.size (); i++) {
281
+ sycl::context ctx = sycl::context (devices[i]);
282
+ ctxs.push_back (ctx);
283
+ }
284
+ }
285
+
286
+ void sycl_device_mgr::init_allow_devices () {
287
+ device_list = " " ;
288
+ for (size_t i = 0 ; i < device_ids.size (); ++i) {
289
+ device_list += std::to_string (device_ids[i]);
290
+ device_list += " ," ;
291
+ }
292
+ if (device_list.length () > 1 ) {
293
+ device_list.pop_back ();
294
+ }
295
+ }
296
+
297
+ bool sycl_device_mgr::is_allowed_device (int device_id) {
298
+ return std::find (device_ids.begin (), device_ids.end (), device_id) !=
299
+ device_ids.end ();
300
+ }
301
+
302
+ void sycl_device_mgr::detect_all_sycl_device_list () try {
303
+ int device_count = dpct::dev_mgr::instance ().device_count ();
304
+
305
+ for (int id = 0 ; id < device_count; id++) {
306
+ sycl::device device = dpct::dev_mgr::instance ().get_device (id);
307
+ device_ids.push_back (id);
308
+ devices.push_back (device);
309
+ dpct::device_info prop;
310
+ dpct::get_device_info (prop, device);
311
+ work_group_sizes.push_back (prop.get_max_work_group_size ());
312
+ max_compute_units.push_back (prop.get_max_compute_units ());
313
+ }
314
+ return ;
315
+ } catch (sycl::exception const &exc) {
316
+ std::cerr << exc.what () << " Exception caught at file:" << __FILE__
317
+ << " , line:" << __LINE__ << std::endl;
318
+ std::exit (1 );
319
+ }
320
+
321
+ void sycl_device_mgr::detect_sycl_visible_device_list () try {
322
+ std::vector<int > sycl_devices = get_sycl_visible_devices ();
323
+ int device_count = dpct::dev_mgr::instance ().device_count ();
324
+
325
+ for (int i = 0 ; i < sycl_devices.size (); i++) {
326
+ int id = sycl_devices[i];
327
+ if (id >= device_count) {
328
+ std::cerr << __func__ << " : invalid device_id:" << id
329
+ << " from GGML_SYCL_VISIBLE_DEVICES="
330
+ << getenv (" GGML_SYCL_VISIBLE_DEVICES" )
331
+ << " , available IDs: " ;
332
+ if (device_count > 1 ) {
333
+ std::cerr << " [0, " << device_count - 1 << " ]" ;
334
+ } else if (device_count == 1 ) {
335
+ std::cerr << " [0]" ;
336
+ } else {
337
+ std::cerr << " []" ;
338
+ }
339
+ std::cerr << std::endl;
340
+ }
341
+ sycl::device device = dpct::dev_mgr::instance ().get_device (id);
342
+ device_ids.push_back (id);
343
+ devices.push_back (device);
344
+ dpct::device_info prop;
345
+ dpct::get_device_info (prop, device);
346
+ work_group_sizes.push_back (prop.get_max_work_group_size ());
347
+ max_compute_units.push_back (prop.get_max_compute_units ());
348
+ }
349
+ return ;
350
+ } catch (sycl::exception const &exc) {
351
+ std::cerr << exc.what () << " Exception caught at file:" << __FILE__
352
+ << " , line:" << __LINE__ << std::endl;
353
+ std::exit (1 );
354
+ }
355
+
356
+ /*
357
+ Use all GPUs with same top max compute units
358
+ */
359
+ void sycl_device_mgr::detect_sycl_gpu_list_with_max_cu () try {
360
+ int device_count = dpct::dev_mgr::instance ().device_count ();
361
+ int local_max_compute_units = 0 ;
362
+ for (int id = 0 ; id < device_count; id++) {
363
+ sycl::device device = dpct::dev_mgr::instance ().get_device (id);
364
+ if (!device.is_gpu ())
365
+ continue ;
366
+ dpct::device_info prop;
367
+ dpct::get_device_info (prop, device);
368
+ if (local_max_compute_units < prop.get_max_compute_units ())
369
+ local_max_compute_units = prop.get_max_compute_units ();
370
+ }
371
+
372
+ for (int id = 0 ; id < device_count; id++) {
373
+ sycl::device device = dpct::dev_mgr::instance ().get_device (id);
374
+ if (!device.is_gpu ())
375
+ continue ;
376
+ dpct::device_info prop;
377
+ dpct::get_device_info (prop, device);
378
+ if (local_max_compute_units == prop.get_max_compute_units () &&
379
+ is_ext_oneapi_device (device)) {
380
+ device_ids.push_back (id);
381
+ devices.push_back (device);
382
+ work_group_sizes.push_back (prop.get_max_work_group_size ());
383
+ max_compute_units.push_back (prop.get_max_compute_units ());
384
+ }
385
+ }
386
+ return ;
387
+ } catch (sycl::exception const &exc) {
388
+ std::cerr << exc.what () << " Exception caught at file:" << __FILE__
389
+ << " , line:" << __LINE__ << std::endl;
390
+ std::exit (1 );
391
+ }
392
+
393
+ int sycl_device_mgr::get_device_count () { return (int )device_ids.size (); }
394
+
395
+ bool sycl_device_mgr::is_ext_oneapi_device (const sycl::device &dev) {
396
+ sycl::backend dev_backend = dev.get_backend ();
397
+ if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
398
+ dev_backend == sycl::backend::ext_oneapi_cuda ||
399
+ dev_backend == sycl::backend::ext_oneapi_hip)
400
+ return true ;
401
+ return false ;
402
+ }
403
+ // --sycl_device_mgr--
404
+
405
+ // --ggml_sycl_device_info--
406
+ void ggml_sycl_device_info::print_gpu_device_list () {
407
+ GGML_ASSERT (device_mgr);
408
+
409
+ char *hint = NULL ;
410
+ if (oneapi_device_selector_existed && sycl_visible_devices_existed) {
411
+ hint = " detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s and "
412
+ " GGML_SYCL_VISIBLE_DEVICES=%s\n " ;
413
+ fprintf (stderr, hint, device_mgr->get_device_count (), devices_list (),
414
+ getenv (" ONEAPI_DEVICE_SELECTOR" ),
415
+ getenv (" GGML_SYCL_VISIBLE_DEVICES" ));
416
+ } else if (oneapi_device_selector_existed) {
417
+ hint = " detect %d SYCL devices:[%s] by ONEAPI_DEVICE_SELECTOR=%s\n " ;
418
+ fprintf (stderr, hint, device_mgr->get_device_count (), devices_list (),
419
+ getenv (" ONEAPI_DEVICE_SELECTOR" ));
420
+ } else if (sycl_visible_devices_existed) {
421
+ hint = " detect %d SYCL devices:[%s] by GGML_SYCL_VISIBLE_DEVICES=%s\n " ;
422
+ fprintf (stderr, hint, device_mgr->get_device_count (), devices_list (),
423
+ getenv (" GGML_SYCL_VISIBLE_DEVICES" ));
424
+ } else {
425
+ hint = " detect %d SYCL level-zero GPUs:[%s] with top Max compute "
426
+ " units:%d, to use any SYCL devices, set/export "
427
+ " GGML_SYCL_VISIBLE_DEVICES or ONEAPI_DEVICE_SELECTOR\n " ;
428
+ fprintf (stderr, hint, device_mgr->get_device_count (), devices_list (),
429
+ device_mgr->max_compute_units [0 ]);
430
+ }
431
+ }
432
+
433
+ int ggml_sycl_device_info::work_group_size (int device_id) {
434
+ GGML_ASSERT (device_mgr);
435
+ return device_mgr->work_group_sizes [device_id];
436
+ }
437
+
438
+ void ggml_sycl_device_info::refresh_device () {
439
+ oneapi_device_selector_existed = env_existed (" ONEAPI_DEVICE_SELECTOR" );
440
+ sycl_visible_devices_existed = env_existed (" GGML_SYCL_VISIBLE_DEVICES" );
441
+ if (!device_mgr)
442
+ delete device_mgr;
443
+
444
+ if (sycl_visible_devices_existed) {
445
+ device_mgr = new sycl_device_mgr (SYCL_VISIBLE_DEVICES);
446
+ } else if (oneapi_device_selector_existed) {
447
+ device_mgr = new sycl_device_mgr (SYCL_ALL_DEVICES);
448
+ } else {
449
+ device_mgr = new sycl_device_mgr (SYCL_DEVICES_TOP_LEVEL_ZERO);
450
+ }
451
+
452
+ device_count = device_mgr->get_device_count ();
453
+
454
+ int64_t total_vram = 0 ;
455
+
456
+ for (int i = 0 ; i < device_count; ++i) {
457
+ int id = get_device_id (i);
458
+ devices[id].vmm = 0 ;
459
+ dpct::device_info prop;
460
+ SYCL_CHECK (CHECK_TRY_ERROR (dpct::get_device_info (
461
+ prop, dpct::dev_mgr::instance ().get_device (id))));
462
+
463
+ default_tensor_split[i] =
464
+ total_vram; // continue data, so use device index
465
+ total_vram += prop.get_global_mem_size ();
466
+
467
+ devices[id].cc =
468
+ 100 * prop.get_major_version () + 10 * prop.get_minor_version ();
469
+ }
470
+
471
+ for (int i = 0 ; i < device_count; ++i) {
472
+ default_tensor_split[i] /=
473
+ total_vram; // continue data, so use device index
474
+ }
475
+
476
+ print_gpu_device_list ();
477
+ }
478
+
479
+ bool ggml_sycl_device_info::is_allowed_device (int device_id) {
480
+ return device_mgr->is_allowed_device (device_id);
481
+ }
482
+
483
+ const char *ggml_sycl_device_info::devices_list () {
484
+ return device_mgr->device_list .c_str ();
485
+ }
486
+
487
+ int ggml_sycl_device_info::get_device_id (int device_index) {
488
+ if (device_index < device_mgr->device_ids .size ()) {
489
+ return device_mgr->device_ids .at (device_index);
490
+ } else {
491
+ std::cerr << __func__ << " :SYCL device:" << device_index
492
+ << " is out of range:[" << devices_list () << " ]" << std::endl;
493
+ std::exit (1 );
494
+ }
495
+ }
496
+
497
+ // --ggml_sycl_device_info--
0 commit comments