Skip to content

Commit 539eea2

Browse files
committed
Update ggml-impl.h
1 parent f3c00da commit 539eea2

File tree

1 file changed

+19
-17
lines changed

1 file changed

+19
-17
lines changed

ggml/src/ggml-impl.h

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@
1616
#include <arm_sve.h>
1717
#endif // __ARM_FEATURE_SVE
1818

19+
#if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__)
20+
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
21+
//
22+
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
23+
//
24+
#include <arm_neon.h>
25+
#endif
26+
1927
#if defined(__F16C__)
2028
#include <immintrin.h>
2129
#endif
@@ -303,35 +311,29 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
303311

304312
// FP16 to FP32 conversion
305313

306-
// 16-bit float
307-
// on Arm, we use __fp16
308-
// on x86, we use uint16_t
309-
//
310-
// for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616
311-
// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843
312-
//
313-
#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
314-
315-
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
316-
//
317-
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
318-
//
319-
#include <arm_neon.h>
314+
#if defined(__ARM_NEON)
315+
#if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
316+
typedef uint16_t ggml_fp16_internal_t;
317+
#else
318+
typedef __fp16 ggml_fp16_internal_t;
319+
#endif
320+
#endif
320321

322+
#if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
321323
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
322324
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
323325

324326
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
325327

326328
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
327-
__fp16 tmp;
329+
ggml_fp16_internal_t tmp;
328330
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
329331
return (float)tmp;
330332
}
331333

332334
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
333335
ggml_fp16_t res;
334-
__fp16 tmp = f;
336+
ggml_fp16_internal_t tmp = f;
335337
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
336338
return res;
337339
}
@@ -483,7 +485,7 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
483485
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
484486
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
485487

486-
#endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
488+
#endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
487489

488490
// precomputed f32 table for f16 (256 KB)
489491
// defined in ggml.c, initialized in ggml_init()

0 commit comments

Comments
 (0)