Skip to content

Commit af04481

Browse files
authored
model : do not repack if a GPU device is present (ggml-org#12498)
ggml-ci
1 parent 960e726 commit af04481

File tree

1 file changed

+23
-10
lines changed

1 file changed

+23
-10
lines changed

src/llama-model.cpp

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -271,19 +271,32 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
271271
}
272272
}
273273

274-
// add extra buffer types
275-
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
276-
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
277-
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
278-
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
279-
if (ggml_backend_dev_get_extra_bufts_fn) {
280-
ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn(cpu_dev);
281-
while (extra_bufts && *extra_bufts) {
282-
buft_list.emplace_back(cpu_dev, *extra_bufts);
283-
++extra_bufts;
274+
bool has_gpu_device = false;
275+
for (auto * dev : devices) {
276+
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
277+
has_gpu_device = true;
278+
break;
284279
}
285280
}
286281

282+
// add extra buffer types, only if no GPU device is present
283+
// ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094
284+
if (!has_gpu_device) {
285+
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
286+
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
287+
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
288+
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
289+
if (ggml_backend_dev_get_extra_bufts_fn) {
290+
ggml_backend_buffer_type_t * extra_bufts = ggml_backend_dev_get_extra_bufts_fn(cpu_dev);
291+
while (extra_bufts && *extra_bufts) {
292+
buft_list.emplace_back(cpu_dev, *extra_bufts);
293+
++extra_bufts;
294+
}
295+
}
296+
} else {
297+
LLAMA_LOG_WARN("%s: disabling extra buffer types (i.e. repacking) since a GPU device is available\n", __func__);
298+
}
299+
287300
// add a host buffer type
288301
// storing the tensors in a host buffer is useful when the processing of large batches
289302
// is offloaded to a GPU device, since it reduces the time spent on data transfers

0 commit comments

Comments
 (0)