Skip to content

Commit e063cec

Browse files
ckastnerslaren
authored andcommitted
Implement GGML_CPU_ALL_VARIANTS for PowerPC (ggml-org#14286)
* Add PowerPC feature detection and scoring * ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for PowerPC * ggml-cpu: Delay some initializations until function is called When using GGML_BACKEND_DL=ON, these initializations might use instructions that are not supported by the current CPU. --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
1 parent dd1e26e commit e063cec

File tree

3 files changed

+53
-14
lines changed

3 files changed

+53
-14
lines changed

ggml/src/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
286286
foreach (feat ${ARGN})
287287
set(GGML_INTERNAL_${feat} ON)
288288
endforeach()
289+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
290+
foreach (feat ${ARGN})
291+
set(GGML_INTERNAL_${feat} ON)
292+
endforeach()
289293
endif()
290294

291295
ggml_add_cpu_backend_variant_impl(${tag_name})
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
337341
else()
338342
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
339343
endif()
344+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
345+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
346+
ggml_add_cpu_backend_variant(power0)
347+
ggml_add_cpu_backend_variant(power7_1 POWER7)
348+
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
349+
ggml_add_cpu_backend_variant(power8_1 POWER8)
350+
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
351+
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
352+
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
353+
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
354+
else()
355+
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
356+
endif()
340357
else()
341358
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
342359
endif()

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
388388
else()
389389
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
390390
endif()
391+
elseif(GGML_CPU_ALL_VARIANTS)
392+
# Begin with the lowest baseline
393+
set(ARCH_DEFINITIONS "")
394+
395+
# When a feature is selected, bump the MCPU to the first
396+
# version that supported it
397+
foreach(PVER RANGE 7 11)
398+
if(DEFINED GGML_INTERNAL_POWER${PVER})
399+
set(POWERPC_MCPU "power${PVER}")
400+
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
401+
endif()
402+
endforeach()
403+
if (GGML_INTERNAL_VSX)
404+
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
405+
list(APPEND ARCH_FLAGS -mvsx)
406+
endif()
407+
408+
if (DEFINED POWERPC_MCPU)
409+
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
410+
endif()
411+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
391412
else()
392413
if (GGML_CPU_POWERPC_CPUTYPE)
393414
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})

ggml/src/ggml-cpu/repack.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
14111411
}
14121412
};
14131413

1414-
// instance for Q4
1415-
static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1416-
static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1417-
static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1418-
static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1419-
1420-
// instance for IQ4
1421-
static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1422-
14231414
} // namespace ggml::cpu::repack
14241415

14251416
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
1417+
1418+
// instance for Q4
1419+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1420+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1421+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1422+
static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1423+
1424+
// instance for IQ4
1425+
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1426+
14261427
if (cur->type == GGML_TYPE_Q4_0) {
14271428
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
14281429
if (cur->ne[1] % 8 == 0) {
1429-
return &ggml::cpu::repack::q4_0_8x8_q8_0;
1430+
return &q4_0_8x8_q8_0;
14301431
}
14311432
}
14321433
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
14331434
if (cur->ne[1] % 4 == 0) {
1434-
return &ggml::cpu::repack::q4_0_4x8_q8_0;
1435+
return &q4_0_4x8_q8_0;
14351436
}
14361437
}
14371438
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14381439
if (cur->ne[1] % 4 == 0) {
1439-
return &ggml::cpu::repack::q4_0_4x4_q8_0;
1440+
return &q4_0_4x4_q8_0;
14401441
}
14411442
}
14421443
} else if (cur->type == GGML_TYPE_Q4_K) {
14431444
if (ggml_cpu_has_avx2()) {
14441445
if (cur->ne[1] % 8 == 0) {
1445-
return &ggml::cpu::repack::q4_K_8x8_q8_K;
1446+
return &q4_K_8x8_q8_K;
14461447
}
14471448
}
14481449
} else if (cur->type == GGML_TYPE_IQ4_NL) {
14491450
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14501451
if (cur->ne[1] % 4 == 0) {
1451-
return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
1452+
return &iq4_nl_4x4_q8_0;
14521453
}
14531454
}
14541455
}

0 commit comments

Comments
 (0)