Skip to content

Commit 7ff7550

Browse files
committed
cast f32 and f16
Change-Id: I01ff87e2403cb5a30a578fa48437bb505ba556da
1 parent bca170b commit 7ff7550

File tree

8 files changed

+61
-16
lines changed

8 files changed

+61
-16
lines changed

cmake/riscv64-spacemit-linux-gnu-gcc.cmake

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,11 @@ set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
2121
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
2222
endif()
2323

24-
if(NOT DEFINED CMAKE_CXX_FLAGS)
25-
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba")
26-
endif()
27-
28-
if(NOT DEFINED CMAKE_CXX_FLAGS)
29-
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba")
30-
endif()
31-
3224
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
3325
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
3426
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
3527
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
36-
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba -latomic -mabi=lp64d ${CMAKE_C_FLAGS}")
37-
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba -latomic -mabi=lp64d ${CXX_FLAGS}")
28+
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
29+
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
30+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")
3831
add_definitions(-D__fp16=_Float16)

ggml/src/ggml-cpu/ggml-cpu-riscv64-spacemit.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,4 +1049,16 @@ void ggml_vec_cpy_rvv(void* dst, const void* src, size_t size) {
10491049
#if defined(__riscv) && defined(__riscv_v)
10501050
MlasCopy(dst, src, size);
10511051
#endif
1052+
}
1053+
1054+
void ggml_cpu_fp16_to_fp32_rvv(const ggml_fp16_t * x, float * y, int64_t n) {
1055+
#if defined(__riscv) && defined(__riscv_v)
1056+
GetMlasPlatform().CastF16ToF32Kernel(x, y, n);
1057+
#endif
1058+
}
1059+
1060+
void ggml_cpu_fp32_to_fp16_rvv(const float * x, ggml_fp16_t * y, int64_t n) {
1061+
#if defined(__riscv) && defined(__riscv_v)
1062+
GetMlasPlatform().CastF32ToF16Kernel(x, y, n);
1063+
#endif
10521064
}

ggml/src/ggml-cpu/ggml-cpu-riscv64-spacemit.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ void ggml_vec_cpy_rvv(void* dst, const void* src, size_t size);
3636

3737
void ggml_vec_mad_f16_hp_rvv(const void* x, void* y, float scalar, int size);
3838

39+
void ggml_vec_silu_f32_rvv(const float* lhs, float* out, size_t n);
40+
41+
void ggml_cpu_fp16_to_fp32_rvv(const ggml_fp16_t * x, float * y, int64_t n);
42+
43+
void ggml_cpu_fp32_to_fp16_rvv(const float * x, ggml_fp16_t * y, int64_t n);
44+
3945
#ifdef __cplusplus
4046
}
4147
#endif

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3161,7 +3161,6 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
31613161
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads, NULL);
31623162

31633163
cplan.work_data = (uint8_t *)ggml_new_buffer(ctx, cplan.work_size);
3164-
printf("cplan.work_data %p\n", cplan.work_data);
31653164
return ggml_graph_compute(cgraph, &cplan);
31663165
}
31673166

@@ -3185,6 +3184,9 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
31853184
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
31863185
_mm_storel_epi64((__m128i *)(y + i), y_vec);
31873186
}
3187+
#elif defined(GGML_USE_CPU_RISCV64_SPACEMIT)
3188+
ggml_cpu_fp32_to_fp16_rvv(x, y, n);
3189+
i += n;
31883190
#endif
31893191
for (; i < n; ++i) {
31903192
y[i] = GGML_FP32_TO_FP16(x[i]);
@@ -3211,6 +3213,9 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
32113213
__m128 y_vec = _mm_cvtph_ps(x_vec);
32123214
_mm_storeu_ps(y + i, y_vec);
32133215
}
3216+
#elif defined(GGML_USE_CPU_RISCV64_SPACEMIT)
3217+
ggml_cpu_fp16_to_fp32_rvv(x, y, n);
3218+
i += n;
32143219
#endif
32153220
for (; i < n; ++i) {
32163221
y[i] = GGML_FP16_TO_FP32(x[i]);

ggml/src/ggml-cpu/ops.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,10 +1016,17 @@ static void ggml_compute_forward_dup_bytes(
10161016
for (int64_t i03 = 0; i03 < ne03; i03++) {
10171017
for (int64_t i02 = 0; i02 < ne02; i02++) {
10181018
for (int64_t i01 = ir0; i01 < ir1; i01++) {
1019+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
1020+
ggml_vec_cpy_rvv(
1021+
((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3),
1022+
((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03),
1023+
rs);
1024+
#else
10191025
memcpy(
10201026
((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3),
10211027
((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03),
10221028
rs);
1029+
#endif
10231030
}
10241031
}
10251032
}

ggml/src/ggml-cpu/vec.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,6 @@
22

33
#include <cassert>
44

5-
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
6-
#include "ggml-cpu-riscv64-spacemit.h"
7-
#endif
8-
95
#if defined(_MSC_VER)
106
// disable "possible loss of data" to avoid hundreds of casts
117
// we should just be careful :)
@@ -200,6 +196,9 @@ void ggml_vec_silu_f32(const int n, float * y, const float * x) {
200196
for (; i + 3 < n; i += 4) {
201197
vst1q_f32(y + i, ggml_v_silu(vld1q_f32(x + i)));
202198
}
199+
#elif defined(GGML_USE_CPU_RISCV64_SPACEMIT)
200+
ggml_vec_silu_f32_rvv(x, y, n);
201+
i += n;
203202
#endif
204203
for (; i < n; ++i) {
205204
y[i] = ggml_silu_f32(x[i]);

ggml/src/ggml-cpu/vec.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
#include "simd-mappings.h"
77
#include "ggml.h"
88

9+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
10+
#include "ggml-cpu-riscv64-spacemit.h"
11+
#endif
12+
913
#if defined(GGML_USE_ACCELERATE)
1014
#include <Accelerate/Accelerate.h>
1115
#endif
@@ -54,7 +58,13 @@ inline static void ggml_vec_cpy_i32(const int n, int32_t * y, const int32_t * x)
5458

5559
inline static void ggml_vec_set_f16(const int n, ggml_fp16_t * x, const ggml_fp16_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
5660
inline static void ggml_vec_set_bf16(const int n, ggml_bf16_t * x, const ggml_bf16_t v) { for (int i = 0; i < n; ++i) x[i] = v; }
61+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
62+
inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) {
63+
ggml_vec_add_f32_rvv(x, y, z, n);
64+
}
65+
#else
5766
inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; }
67+
#endif
5868
inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
5969
for (int i = 0; i < n; ++i) {
6070
z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) + GGML_FP16_TO_FP32(y[i]));
@@ -77,8 +87,13 @@ inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp
7787
y[i] = GGML_FP32_TO_FP16(-GGML_FP16_TO_FP32(x[i]));
7888
}
7989
}
80-
90+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
91+
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) {
92+
ggml_vec_mul_f32_rvv(x, y, z, n);
93+
}
94+
#else
8195
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
96+
#endif
8297
inline static void ggml_vec_mul_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
8398
for (int i = 0; i < n; ++i) {
8499
z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) * GGML_FP16_TO_FP32(y[i]));
@@ -200,6 +215,9 @@ inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * GGML_RESTRICT y,
200215
for (int i = np; i < n; ++i) {
201216
y[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(y[i]) + GGML_FP16_TO_FP32(x[i])*v);
202217
}
218+
#elif defined(GGML_USE_CPU_RISCV64_SPACEMIT)
219+
ggml_vec_mad_f16_hp_rvv(x, y, v, n);
220+
return;
203221
#else
204222
// scalar
205223
for (int i = 0; i < n; ++i) {
@@ -284,6 +302,8 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
284302
for (int i = np; i < n; ++i) {
285303
y[i] *= v;
286304
}
305+
#elif defined(GGML_USE_CPU_RISCV64_SPACEMIT)
306+
ggml_vec_scale_f32_rvv(y, v, y, n);
287307
#else
288308
// scalar
289309
for (int i = 0; i < n; ++i) {
@@ -777,6 +797,8 @@ inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
777797
max = MAX(max, x[i]);
778798
}
779799
*s = max;
800+
#elif defined(GGML_USE_CPU_RISCV64_SPACEMIT)
801+
ggml_vec_max_f32_rvv(n, s, x);
780802
#else
781803
vDSP_maxv(x, 1, s, n);
782804
#endif

scripts/build-riscv64-spacemit.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ cmake -B build-riscv64-spacemit \
88
-DCMAKE_BUILD_TYPE=Release \
99
-DGGML_CPU_RISCV64_SPACEMIT=ON \
1010
-DLLAMA_CURL=OFF \
11+
-DGGML_RV_ZFH=ON \
1112
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
1213
-DCMAKE_TOOLCHAIN_FILE=${SCRIPTS_DIR}/../cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
1314
-DCMAKE_INSTALL_PREFIX=build-riscv64-spacemit/installed

0 commit comments

Comments
 (0)