From 65195d02389b3ee074d3224dc9abf1b84d98876d Mon Sep 17 00:00:00 2001 From: Daniel Massud 2 Date: Tue, 4 Mar 2025 14:23:37 -0500 Subject: [PATCH] fix: AVX2 intrinsics, const correctness, and SIMD headers --- CMakeLists.txt | 6 ++++++ ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp | 6 +++--- ggml/src/ggml.c | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b2a1845e5c7c..a24eb66eefa64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,11 @@ cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. project("llama.cpp" C CXX) + + +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + add_compile_options(-mfma -mavx2) +endif() + include(CheckIncludeFileCXX) #set(CMAKE_WARN_DEPRECATED YES) diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp index c24fd56e20886..3f1cc1ad77a30 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -114,7 +114,7 @@ static inline __m512 __avx512_repeat_f32cx16_load(__m128i x) { return _mm512_loadu_ps(tmp); } #endif -static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) { +static inline __m256 __avx_f32cx8_load(const ggml_fp16_t *x) { float tmp[8]; for (int i = 0; i < 8; i++) { @@ -123,7 +123,7 @@ static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) { return _mm256_loadu_ps(tmp); } -static inline __m256 __avx_repeat_f32cx8_load(ggml_fp16_t *x) { +static inline __m256 __avx_repeat_f32cx8_load(const ggml_fp16_t *x) { float tmp[8]; for (int i = 0; i < 4; i++) { @@ -133,7 +133,7 @@ static inline __m256 __avx_repeat_f32cx8_load(ggml_fp16_t *x) { return _mm256_loadu_ps(tmp); } -static inline __m256 __avx_rearranged_f32cx8_load(ggml_fp16_t *x, __m128i arrangeMask) { +static inline __m256 __avx_rearranged_f32cx8_load(const ggml_fp16_t *x, __m128i arrangeMask) { uint16_t tmphalf[8]; float tmp[8]; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 084240331ef93..484d1527e602e 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -2,6 +2,7 @@ #define _USE_MATH_DEFINES // For M_PI on MSVC #include "ggml-backend.h" +#include #include "ggml-impl.h" #include "ggml-threading.h" #include "ggml.h"