From 68ca77a148e7c919a99d353d406519ec46ea8e2c Mon Sep 17 00:00:00 2001 From: cocochick Date: Fri, 15 Nov 2024 20:37:38 +0800 Subject: [PATCH] chore : Fix the error when compiling rocm build on windows using cmake (#9666) Fix the compilation error "call to undeclared function '_mm256_dpbusd_epi32'". The function _mm256_dpbusd_epi32 is defined in avxintrin.h, while _mm256_dpbusd_epi32 is defined in avx512vlvnniintrin.h. Therefore, __AVX__, __AVX512VNNI__, and __AVX512VL__ need to be defined. According to (#7743), DGGML_OPENMP=OFF is needed to add, so adding it in doc. --- docs/build.md | 2 +- ggml/src/ggml-cpu/ggml-cpu-aarch64.c | 2 +- ggml/src/ggml-cpu/ggml-cpu-quants.c | 2 +- ggml/src/ggml-cpu/llamafile/sgemm.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/build.md b/docs/build.md index 52de2b4e2c224..e2118a8ba71c7 100644 --- a/docs/build.md +++ b/docs/build.md @@ -259,7 +259,7 @@ You can download it from your Linux distro's package manager or from here: [ROCm - Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU): ```bash set PATH=%HIP_PATH%\bin;%PATH% - cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DGGML_HIP=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release + cmake -S . -B build -G Ninja -DGGML_OPENMP=OFF -DAMDGPU_TARGETS=gfx1100 -DGGML_HIP=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release cmake --build build ``` Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors) diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.c b/ggml/src/ggml-cpu/ggml-cpu-aarch64.c index b753ba767c15a..d3c7b6010122b 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.c @@ -161,7 +161,7 @@ static inline __m256i sum_i16_pairs_int32x8(const __m256i x) { } static inline __m256i mul_sum_us8_pairs_int32x8(const __m256i ax, const __m256i sy) { -#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__)) +#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__) const __m256i zero = _mm256_setzero_si256(); return _mm256_dpbusd_epi32(zero, ax, sy); #else diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c index f0e276b698795..4798207b9b33b 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-quants.c +++ b/ggml/src/ggml-cpu/ggml-cpu-quants.c @@ -103,7 +103,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) { } static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) { -#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__)) +#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__) const __m256i zero = _mm256_setzero_si256(); const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy); return _mm256_cvtepi32_ps(summed_pairs); diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp index da4146ec4f688..ad052202798f9 100644 --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp @@ -992,7 +992,7 @@ class tinyBLAS_Q0_AVX { inline __m256 updot(__m256i u, __m256i s) { __m256i res; -#if defined(__AVXVNNI__) || (defined(__AVX512VNNI__) && defined(__AVX512VL__)) +#if defined(__AVX__) && defined(__AVX512VNNI__) && defined(__AVX512VL__) res = _mm256_dpbusd_epi32(_mm256_setzero_si256(), u, s); #else res = _mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_maddubs_epi16(u, s));