diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 54aa822c90d29..903d61fff6ea6 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -1116,7 +1116,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { #endif #ifdef GGML_USE_METAL - new_clip->backend = ggml_backend_metal_init(); + new_clip->backend = ggml_backend_metal_init(0); LOG_TEE("%s: CLIP using Metal backend\n", __func__); #endif diff --git a/examples/rpc/rpc-server.cpp b/examples/rpc/rpc-server.cpp index 6342e6488602a..ca7b5d22a8bc4 100644 --- a/examples/rpc/rpc-server.cpp +++ b/examples/rpc/rpc-server.cpp @@ -75,9 +75,9 @@ static ggml_backend_t create_backend() { } #elif GGML_USE_METAL fprintf(stderr, "%s: using Metal backend\n", __func__); - backend = ggml_backend_metal_init(); + backend = ggml_backend_metal_init(0); if (!backend) { - fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); + fprintf(stderr, "%s: ggml_backend_metal_init(0) failed\n", __func__); } #endif diff --git a/ggml/include/ggml-metal.h b/ggml/include/ggml-metal.h index d483cf1ac40c6..b4bc892e6833d 100644 --- a/ggml/include/ggml-metal.h +++ b/ggml/include/ggml-metal.h @@ -42,7 +42,7 @@ extern "C" { GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data); -GGML_API ggml_backend_t ggml_backend_metal_init(void); +GGML_API ggml_backend_t ggml_backend_metal_init(int deviceIndex); GGML_API bool ggml_backend_is_metal(ggml_backend_t backend); diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m index 9fc08ab3aa5cc..cf0bb0d5f8b63 100644 --- a/ggml/src/ggml-metal.m +++ b/ggml/src/ggml-metal.m @@ -6,6 +6,7 @@ #import #import +#import #undef MIN #undef MAX @@ -293,21 +294,59 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){ return data; } -static struct ggml_backend_metal_context * ggml_metal_init(int n_cb) { - GGML_METAL_LOG_INFO("%s: allocating\n", __func__); +static struct ggml_backend_metal_context * ggml_metal_init( int deviceIndex, int n_cb) { + GGML_METAL_LOG_INFO("%s: allocating\n", __func__); #if TARGET_OS_OSX && !GGML_METAL_NDEBUG - // Show all the Metal device instances in the system - NSArray * devices = MTLCopyAllDevices(); - for (id device in devices) { - GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]); - } - [devices release]; // since it was created by a *Copy* C method + // Show all the Metal device instances in the system + NSArray * devices = MTLCopyAllDevices(); + for (id device in devices) { + GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]); + } + [devices release]; // since it was created by a *Copy* C method #endif - // Pick and show default Metal device - id device = MTLCreateSystemDefaultDevice(); - GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); + +size_t size_arm; +NSMutableString *logMessages = [NSMutableString string]; + +// Check for Apple Silicon (M1, M2, etc.) +if (sysctlbyname("hw.optional.arm64", NULL, &size_arm, NULL, 0) == 0 && size_arm == 4) { + int isAppleSilicon = 0; + sysctlbyname("hw.optional.arm64", &isAppleSilicon, &size_arm, NULL, 0); + if (isAppleSilicon) { + [logMessages appendString:@"This Mac is running on an Apple Silicon (M) Series processor."]; + } else { + [logMessages appendString:@"This Mac is running on an Intel processor."]; + } +} else { + [logMessages appendString:@"This Mac is running on an Intel processor."]; +} + + GGML_METAL_LOG_INFO("%s'%s'\n", __func__, [logMessages UTF8String]); + // Pick and show default Metal device + id device = MTLCreateSystemDefaultDevice(); + NSString *defaultDeviceName = device.name; + GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); + GGML_METAL_LOG_INFO("%s: Passed GPU at index %d:\n", __func__, deviceIndex); + NSArray> *alldevices = MTLCopyAllDevices(); +// Check if passed device index is within range + if (deviceIndex<=(alldevices.count -1)) { + for (NSUInteger i = 0; i < alldevices.count; i++) { + id selectgpu = alldevices[i]; + NSString *deviceName = selectgpu.name; + if (i == deviceIndex) { + if (![defaultDeviceName isEqualToString:deviceName]) { + device = selectgpu; + GGML_METAL_LOG_INFO("%s: Picking Index GPU Name: %s\n", __func__, [ deviceName UTF8String]); + }else{ + [alldevices release]; + } + break; + } + } + +} // Configure context struct ggml_backend_metal_context * ctx = malloc(sizeof(struct ggml_backend_metal_context)); @@ -3238,8 +3277,8 @@ static ggml_guid_t ggml_backend_metal_guid(void) { return &guid; } -ggml_backend_t ggml_backend_metal_init(void) { - struct ggml_backend_metal_context * ctx = ggml_metal_init(GGML_DEFAULT_N_THREADS); +ggml_backend_t ggml_backend_metal_init(int deviceIndex) { + struct ggml_backend_metal_context * ctx = ggml_metal_init(deviceIndex,GGML_DEFAULT_N_THREADS); if (ctx == NULL) { GGML_METAL_LOG_ERROR("%s: error: failed to allocate context\n", __func__); return NULL; @@ -3295,8 +3334,8 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) { GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data); // silence warning GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data) { - return ggml_backend_metal_init(); + return ggml_backend_metal_init((int) (intptr_t) user_data); GGML_UNUSED(params); - GGML_UNUSED(user_data); + // GGML_UNUSED(user_data); } diff --git a/src/llama.cpp b/src/llama.cpp index 97dd1b3fea4b9..04b8c985ab912 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -16588,7 +16588,7 @@ struct llama_context * llama_new_context_with_model( // initialize backends #if defined(GGML_USE_METAL) if (model->n_gpu_layers > 0) { - ctx->backend_metal = ggml_backend_metal_init(); + ctx->backend_metal = ggml_backend_metal_init(model->main_gpu); if (ctx->backend_metal == nullptr) { LLAMA_LOG_ERROR("%s: failed to initialize Metal backend\n", __func__); llama_free(ctx);