Skip to content

Commit 77b3c26

Browse files
committed
add spacemit impl
Change-Id: I62403816a869fefabdfc2b7d5655daa7ef6a8504
1 parent 88e6e78 commit 77b3c26

File tree

9 files changed

+2069
-17
lines changed

9 files changed

+2069
-17
lines changed

README-SpacemiT-EN.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
## Intruction
2+
This repository is a RISCV derivative developed by SpacemiT in the community version of llama.cpp, using the Mlas library in the SpacemiT version of ONNXRuntime to replace some of the performance bottlenecks in the implementation of ggml.c. LLM can be used to obtain a great performance improvement in Q4_0 quantization.
3+
4+
## Build
5+
~~~ bash
6+
# Go to the llama.cpp directory and execute the script, you can change the number of threads, Release or Debug compilation options.
7+
bash scripts/build-riscv64-spacemit.sh
8+
~~~
9+
10+
## Build Options Description
11+
This repository adds `GGML_SPACEMIT` to the GGML backend, optionally turning it on and replacing it with the SpacemiT implementation.
12+
13+
## Quantization
14+
~~~ bash
15+
./llama-quantize --token-embedding-type q4_0 qwen2.5-3b-f32.gguf qwen2.5-3b-q4_0.gguf Q4_0
16+
~~~
17+
18+
## Performance
19+
Performance tests were performed using llama-bench and some of the model performance data are as follows(based on SpacemiT-K1@4threads).
20+
21+
| model name | prefill@64t(t/s) | decode@64t(t/s) |
22+
| :-----: | :----: | :----: |
23+
| qwen2.5-0.5b | 107.10 | 16.34 |
24+
| qwen2.5-1.5b | 32.11 | 5.46 |
25+
| tinyllama-1.1b | 40.47 | 7.55 |
26+
| llama3.2-1b | 41.6 | 7.32 |
27+
| gemma2-2b | 18.96 | 2.72 |

README-SpacemiT.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
## 简介
2+
本仓库是进迭时空(SpacemiT)在llama.cpp社区版本上开发的RISCV衍生版本, 使用进迭时空版本ONNXRuntime中的Mlas算子库替换了ggml.c中的部分性能瓶颈实现, LLM可在Q4_0量化下获得极大性能提升
3+
4+
## 编译构建
5+
~~~ bash
6+
# 进入llama.cpp目录执行脚本, 可自行修改线程数、Release Or Debug编译选项
7+
bash scripts/build-riscv64-spacemit.sh
8+
~~~
9+
10+
## 编译选项说明
11+
本仓库在GGML编译后端的之外增加了`GGML_CPU_RISCV64_SPACEMIT`, 可选择开启并替换为SpacemiT的RVV及IME实现
12+
13+
## 量化说明
14+
~~~ bash
15+
./llama-quantize --token-embedding-type q4_0 qwen2.5-3b-f32.gguf qwen2.5-3b-q4_0.gguf Q4_0
16+
# --token-embedding-type q4_0时达到最高性能,但需要考虑模型精度,建议设置为f32或f16
17+
~~~
18+
19+
## 性能
20+
使用llama-bench进行性能测试,部分模型性能数据如下。
21+
22+
| model name | prefill@64t(t/s) | decode@64t(t/s) |
23+
| :-----: | :----: | :----: |
24+
| qwen2.5-0.5b | 107.10 | 16.34 |
25+
| qwen2.5-1.5b | 32.11 | 5.46 |
26+
| tinyllama-1.1b | 40.47 | 7.55 |
27+
| llama3.2-1b | 41.6 | 7.32 |
28+
| gemma2-2b | 18.96 | 2.72 |

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,64 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
77

88
ggml_add_backend_library(${GGML_CPU_NAME})
99

10-
list (APPEND GGML_CPU_SOURCES
11-
ggml-cpu/ggml-cpu.c
12-
ggml-cpu/ggml-cpu.cpp
13-
ggml-cpu/ggml-cpu-aarch64.cpp
14-
ggml-cpu/ggml-cpu-aarch64.h
15-
ggml-cpu/ggml-cpu-hbm.cpp
16-
ggml-cpu/ggml-cpu-hbm.h
17-
ggml-cpu/ggml-cpu-quants.c
18-
ggml-cpu/ggml-cpu-quants.h
19-
ggml-cpu/ggml-cpu-traits.cpp
20-
ggml-cpu/ggml-cpu-traits.h
21-
ggml-cpu/amx/amx.cpp
22-
ggml-cpu/amx/amx.h
23-
ggml-cpu/amx/mmq.cpp
24-
ggml-cpu/amx/mmq.h
25-
ggml-cpu/ggml-cpu-impl.h
10+
if(GGML_CPU_RISCV64_SPACEMIT)
11+
list (APPEND GGML_CPU_SOURCES
12+
ggml-cpu/ggml-cpu.c
13+
ggml-cpu/ggml-cpu.cpp
14+
ggml-cpu/ggml-cpu-riscv64-spacemit.cpp
15+
ggml-cpu/ggml-cpu-riscv64-spacemit.h
16+
ggml-cpu/ggml-cpu-hbm.cpp
17+
ggml-cpu/ggml-cpu-hbm.h
18+
ggml-cpu/ggml-cpu-quants.c
19+
ggml-cpu/ggml-cpu-quants.h
20+
ggml-cpu/ggml-cpu-traits.cpp
21+
ggml-cpu/ggml-cpu-traits.h
22+
ggml-cpu/ggml-cpu-impl.h
2623
)
24+
25+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
26+
include(FetchContent)
27+
# TODO replace with git repo
28+
FetchContent_Declare(
29+
onnxruntime
30+
GIT_REPOSITORY ssh://$ENV{GERRIT_USER}@gerrit.dc.com:29418/DSA/onnxruntime
31+
GIT_TAG "c17089e2e45067e24911d95611d2196a3dd63694"
32+
)
33+
# FetchContent_Declare(
34+
# onnxruntime
35+
# GIT_REPOSITORY https://github.com/space-mit/onnxruntime
36+
# GIT_TAG "d0780f050bbaaf2951d2dc0c1cc8459803068fbe"
37+
# )
38+
39+
FetchContent_Populate(onnxruntime)
40+
41+
add_subdirectory(ggml-cpu/onnxruntime_mlas)
42+
add_compile_definitions(BUILD_MLAS_NO_ONNXRUNTIME)
43+
target_include_directories(${GGML_CPU_NAME} PRIVATE
44+
${onnxruntime_SOURCE_DIR}/onnxruntime/core/mlas/lib
45+
${onnxruntime_SOURCE_DIR}/onnxruntime/core/mlas/inc
46+
)
47+
target_link_libraries(${GGML_CPU_NAME} PRIVATE onnxruntime_mlas)
48+
endif()
49+
else()
50+
list (APPEND GGML_CPU_SOURCES
51+
ggml-cpu/ggml-cpu.c
52+
ggml-cpu/ggml-cpu.cpp
53+
ggml-cpu/ggml-cpu-aarch64.cpp
54+
ggml-cpu/ggml-cpu-aarch64.h
55+
ggml-cpu/ggml-cpu-hbm.cpp
56+
ggml-cpu/ggml-cpu-hbm.h
57+
ggml-cpu/ggml-cpu-quants.c
58+
ggml-cpu/ggml-cpu-quants.h
59+
ggml-cpu/ggml-cpu-traits.cpp
60+
ggml-cpu/ggml-cpu-traits.h
61+
ggml-cpu/amx/amx.cpp
62+
ggml-cpu/amx/amx.h
63+
ggml-cpu/amx/mmq.cpp
64+
ggml-cpu/amx/mmq.h
65+
ggml-cpu/ggml-cpu-impl.h
66+
)
67+
endif()
2768

2869
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
2970
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
@@ -312,7 +353,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
312353
message(STATUS "Unknown architecture")
313354
endif()
314355

315-
if (GGML_CPU_AARCH64)
356+
if (GGML_CPU_RISCV64_SPACEMIT)
357+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT)
358+
elseif (GGML_CPU_AARCH64)
316359
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
317360
endif()
318361

0 commit comments

Comments
 (0)