@@ -3559,31 +3559,12 @@ class sycl_gpu_mgr {
3559
3559
int work_group_size = 0;
3560
3560
std::string gpus_list = "";
3561
3561
3562
- /*
3563
- Use all GPU with same top max compute units
3564
- */
3565
3562
sycl_gpu_mgr() {
3566
3563
detect_sycl_gpu_list_with_max_cu();
3567
3564
get_allow_gpus();
3568
3565
create_context_with_gpus();
3569
3566
}
3570
3567
3571
- /*
3572
- Use the assigned GPU as only one
3573
- */
3574
- sycl_gpu_mgr(int main_gpu_id) {
3575
- sycl::device device = dpct::dev_mgr::instance().get_device(main_gpu_id);
3576
- dpct::device_info prop;
3577
- dpct::get_device_info(prop, device);
3578
- gpus.push_back(main_gpu_id);
3579
- devices.push_back(device);
3580
- work_group_size = prop.get_max_work_group_size();
3581
- max_compute_units = prop.get_max_compute_units();
3582
-
3583
- get_allow_gpus();
3584
- create_context_with_gpus();
3585
- }
3586
-
3587
3568
void create_context_with_gpus() {
3588
3569
sycl::context ctx = sycl::context(devices);
3589
3570
assert(gpus.size() > 0);
@@ -3599,7 +3580,7 @@ class sycl_gpu_mgr {
3599
3580
gpus_list += std::to_string(gpus[i]);
3600
3581
gpus_list += ",";
3601
3582
}
3602
- if (gpus_list.length() > 1 ) {
3583
+ if (gpus_list.length() > 2 ) {
3603
3584
gpus_list.pop_back();
3604
3585
}
3605
3586
}
@@ -3648,8 +3629,8 @@ class sycl_gpu_mgr {
3648
3629
if (gpus[i] == id)
3649
3630
return i;
3650
3631
}
3651
- printf("miss to get device index by id=%d\n", id );
3652
- GGML_ASSERT(false) ;
3632
+ assert(false );
3633
+ return -1 ;
3653
3634
}
3654
3635
3655
3636
int get_next_index(int id) {
@@ -3658,7 +3639,8 @@ class sycl_gpu_mgr {
3658
3639
if (gpus[i] == id)
3659
3640
return i;
3660
3641
}
3661
- GGML_ASSERT(false);
3642
+ assert(false);
3643
+ return -1;
3662
3644
}
3663
3645
};
3664
3646
@@ -3667,7 +3649,6 @@ static int g_device_count = -1;
3667
3649
static int g_all_sycl_device_count = -1;
3668
3650
static int g_main_device = -1;
3669
3651
static int g_main_device_id = -1;
3670
- static bool g_ggml_backend_sycl_buffer_type_initialized = false;
3671
3652
3672
3653
static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
3673
3654
@@ -13244,7 +13225,7 @@ void ggml_backend_sycl_print_sycl_devices() {
13244
13225
}
13245
13226
13246
13227
void print_gpu_device_list() {
13247
- fprintf(stderr, "detect %d SYCL GPUs: [%s] with top Max compute units:%d\n",
13228
+ fprintf(stderr, "detect %d SYCL GPUs: [%s] with Max compute units:%d\n",
13248
13229
g_sycl_gpu_mgr->get_gpu_count(),
13249
13230
g_sycl_gpu_mgr->gpus_list.c_str(),
13250
13231
g_sycl_gpu_mgr->max_compute_units);
@@ -13283,15 +13264,6 @@ void ggml_init_sycl() try {
13283
13264
#else
13284
13265
fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
13285
13266
#endif
13286
-
13287
- /* NOT REMOVE, keep it for next optimize for XMX.
13288
- #if defined(SYCL_USE_XMX)
13289
- fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
13290
- #else
13291
- fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
13292
- #endif
13293
- */
13294
-
13295
13267
if (CHECK_TRY_ERROR(g_all_sycl_device_count =
13296
13268
dpct::dev_mgr::instance().device_count()) != 0) {
13297
13269
initialized = true;
@@ -13300,61 +13272,68 @@ void ggml_init_sycl() try {
13300
13272
}
13301
13273
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
13302
13274
ggml_backend_sycl_print_sycl_devices();
13303
- if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
13304
- print_gpu_device_list();
13305
- initialized = true;
13306
- g_sycl_loaded = true;
13307
- }
13308
13275
13276
+ if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
13309
13277
13278
+ g_device_count = g_sycl_gpu_mgr->get_gpu_count();
13279
+ g_work_group_size = g_sycl_gpu_mgr->work_group_size;
13310
13280
13311
- g_device_count = g_sycl_gpu_mgr->get_gpu_count();
13312
- g_work_group_size = g_sycl_gpu_mgr->work_group_size;
13281
+ print_gpu_device_list();
13313
13282
13314
- int64_t total_vram = 0;
13283
+ int64_t total_vram = 0;
13315
13284
13285
+ /* NOT REMOVE, keep it for next optimize for XMX.
13286
+ #if defined(SYCL_USE_XMX)
13287
+ fprintf(stderr, "%s: SYCL_USE_XMX: yes\n", __func__);
13288
+ #else
13289
+ fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
13290
+ #endif
13291
+ */
13292
+ for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
13293
+ g_device_caps[id].vmm = 0;
13294
+ g_device_caps[id].device_id = -1;
13295
+ g_device_caps[id].cc = 0;
13296
+ g_tensor_split[id] = 0;
13297
+ g_default_tensor_split[id] = 0;
13298
+ }
13316
13299
13317
- for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
13318
- g_device_caps[id].vmm = 0;
13319
- g_device_caps[id].device_id = -1;
13320
- g_device_caps[id].cc = 0;
13321
- g_tensor_split[id] = 0;
13322
- g_default_tensor_split[id] = 0;
13323
- }
13300
+ for (int i = 0; i < g_device_count; ++i) {
13301
+ int device_id = g_sycl_gpu_mgr->gpus[i];
13302
+ g_device_caps[i].vmm = 0;
13324
13303
13325
- for (int i = 0; i < g_device_count; ++i) {
13326
- int device_id = g_sycl_gpu_mgr->gpus[i];
13327
- g_device_caps[i].vmm = 0 ;
13304
+ dpct::device_info prop;
13305
+ SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
13306
+ prop, dpct::dev_mgr::instance().get_device(device_id)))) ;
13328
13307
13329
- dpct::device_info prop;
13330
- SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
13331
- prop, dpct::dev_mgr::instance().get_device(device_id))));
13308
+ g_default_tensor_split[i] = total_vram;
13309
+ total_vram += prop.get_global_mem_size();
13332
13310
13333
- g_default_tensor_split[i] = total_vram;
13334
- total_vram += prop.get_global_mem_size();
13311
+ g_device_caps[i].cc =
13312
+ 100 * prop.get_major_version() + 10 * prop.get_minor_version();
13313
+ }
13335
13314
13336
- g_device_caps[i].cc =
13337
- 100 * prop.get_major_version() + 10 * prop.get_minor_version() ;
13338
- }
13315
+ for (int i = 0; i < g_device_count; ++i) {
13316
+ g_default_tensor_split[i] /= total_vram ;
13317
+ }
13339
13318
13340
- for (int i = 0; i < g_device_count; ++i) {
13341
- g_default_tensor_split[i] /= total_vram;
13342
- }
13319
+ for (int i = 0; i < g_device_count; ++i) {
13320
+ SYCL_CHECK(ggml_sycl_set_device(i));
13343
13321
13344
- for (int i = 0; i < g_device_count; ++i) {
13345
- SYCL_CHECK(ggml_sycl_set_device(i));
13322
+ // create sycl streams
13323
+ for (int is = 0; is < MAX_STREAMS; ++is) {
13324
+ SYCL_CHECK(CHECK_TRY_ERROR(
13325
+ g_syclStreams[i][is] =
13326
+ dpct::get_current_device().create_queue(
13327
+ g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
13328
+ }
13346
13329
13347
- // create sycl streams
13348
- for (int is = 0; is < MAX_STREAMS; ++is) {
13349
- SYCL_CHECK(CHECK_TRY_ERROR(
13350
- g_syclStreams[i][is] =
13351
- dpct::get_current_device().create_queue(
13352
- g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
13330
+ const dpct::queue_ptr stream = g_syclStreams[i][0];
13331
+ // create sycl handle
13332
+ SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
13353
13333
}
13354
13334
13355
- const dpct::queue_ptr stream = g_syclStreams[i][0];
13356
- // create sycl handle
13357
- SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
13335
+ initialized = true;
13336
+ g_sycl_loaded = true;
13358
13337
}
13359
13338
}
13360
13339
catch (sycl::exception const &exc) {
@@ -16753,24 +16732,22 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
16753
16732
/* .is_host = */ nullptr,
16754
16733
};
16755
16734
16756
- ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
16757
- if (device_index>=g_device_count or device_index<0) {
16758
- printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
16759
- device_index, g_device_count-1);
16760
- GGML_ASSERT(device_index<g_device_count);
16761
- }
16735
+ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
16762
16736
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
16763
16737
16764
- if (!g_ggml_backend_sycl_buffer_type_initialized) {
16738
+ static bool ggml_backend_sycl_buffer_type_initialized = false;
16739
+
16740
+ if (!ggml_backend_sycl_buffer_type_initialized) {
16765
16741
for (int i = 0; i < g_device_count; i++) {
16766
16742
ggml_backend_sycl_buffer_types[i] = {
16767
16743
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
16768
16744
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])},
16769
16745
};
16770
16746
}
16771
- g_ggml_backend_sycl_buffer_type_initialized = true;
16747
+ ggml_backend_sycl_buffer_type_initialized = true;
16772
16748
}
16773
- return &ggml_backend_sycl_buffer_types[device_index];
16749
+
16750
+ return &ggml_backend_sycl_buffer_types[device];
16774
16751
}
16775
16752
16776
16753
// sycl split buffer type
@@ -17519,17 +17496,6 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
17519
17496
return g_sycl_gpu_mgr->get_index(device_id);
17520
17497
}
17521
17498
17522
- GGML_API GGML_CALL void ggml_backend_sycl_set_single_device(int main_gpu_id) {
17523
- GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
17524
- printf("ggml_backend_sycl_set_single_device: use single device: %d\n", main_gpu_id);
17525
- if (g_sycl_gpu_mgr) {
17526
- delete g_sycl_gpu_mgr;
17527
- }
17528
- g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
17529
- ggml_init_sycl();
17530
- g_ggml_backend_sycl_buffer_type_initialized = false;
17531
- }
17532
-
17533
17499
extern "C" int ggml_backend_sycl_reg_devices();
17534
17500
17535
17501
int ggml_backend_sycl_reg_devices() {
0 commit comments