From fa5b31a5cabd271aeec40d182fe1d5f3baa9a8be Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Thu, 3 Oct 2024 23:14:28 +0000 Subject: [PATCH] vulkan : add GGML_VK_FORCE_HEAP_INDEX env var Some vulkan devices (namely integrated graphics cards) have multiple memory heaps: a smaller dedicated memory and a larger shared memory. ggml uses the first usable memory type, which usually resides on the smaller dedicated memory heap. This can likely cause allocation failures. This patch adds an environment variable that forces allocation on a specific memory heap. --- ggml/src/ggml-vulkan.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 12ad9d810327f..abe1ba46ac30b 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -148,6 +148,7 @@ struct vk_device_struct { vk::PhysicalDeviceProperties properties; std::string name; uint64_t max_memory_allocation_size; + uint32_t force_heap_index; bool fp16; vk::Device device; uint32_t vendor_id; @@ -1008,9 +1009,12 @@ static void ggml_vk_queue_cleanup(vk_device& device, vk_queue& q) { q.cmd_buffer_idx = 0; } -static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) { +static uint32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags, uint32_t force_heap_index = UINT32_MAX) { for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) { vk::MemoryType memory_type = mem_props->memoryTypes[i]; + if (force_heap_index != UINT32_MAX && memory_type.heapIndex != force_heap_index) { + continue; + } if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) && (flags & memory_type.propertyFlags) == flags && mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) { @@ -1053,11 +1057,11 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, vk::Memor uint32_t memory_type_index = UINT32_MAX; - memory_type_index = find_properties(&mem_props, &mem_req, req_flags); + memory_type_index = find_properties(&mem_props, &mem_req, req_flags, device->force_heap_index); buf->memory_property_flags = req_flags; if (memory_type_index == UINT32_MAX && fallback_flags) { - memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags); + memory_type_index = find_properties(&mem_props, &mem_req, fallback_flags, device->force_heap_index); buf->memory_property_flags = fallback_flags; } @@ -1851,6 +1855,14 @@ static vk_device ggml_vk_get_device(size_t idx) { device->max_memory_allocation_size = props3.maxMemoryAllocationSize; } + const char* GGML_VK_FORCE_HEAP_INDEX = getenv("GGML_VK_FORCE_HEAP_INDEX"); + + if (GGML_VK_FORCE_HEAP_INDEX != nullptr) { + device->force_heap_index = std::stoi(GGML_VK_FORCE_HEAP_INDEX); + } else { + device->force_heap_index = UINT32_MAX; + } + device->vendor_id = device->properties.vendorID; device->subgroup_size = subgroup_props.subgroupSize; device->uma = device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;