Skip to content

Commit 2b82db5

Browse files
committed
support qwen3
1 parent 9037a3a commit 2b82db5

28 files changed

+1517523
-3
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
# 最近更新! 🔥🔥🔥
1717

18+
- **2025.04.29**:🚀 Qwen最新推理模型**Qwen3**,BM1684X/1688已支持,详情见[Qwen3 Demo](./models/Qwen3/)
1819
- **2025.03.07**:🚀 Qwen最新推理模型**QWQ-32B****DeepSeek-R1-Distill-Qwen-32B**,1684x多芯demo已适配,详情见[LLM Template](./template/)
1920
- **2025.02.05**:🚀 DeepSeek时刻!!我们适配了**DeepSeek-R1-Distill-Qwen**系列模型,包括1.5B、7B和14B版本,详情见[LLM Template](./template/)
2021

@@ -67,6 +68,7 @@
6768
|Qwen2.5-1.5B |:white\_check\_mark:|:white\_check\_mark:|[LINK](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) |
6869
|Qwen2.5-7B |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) |
6970
|QWQ-32B |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/QWQ-32B) |
71+
|Qwen3-4B |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen3-4B) |
7072
|WizardCoder-15B |:white\_check\_mark:| |[LINK](https://huggingface.co/WizardLM/WizardCoder-15B-V1.0) |
7173
|Yi-6B-chat |:white\_check\_mark:| |[LINK](https://huggingface.co/01-ai/Yi-6B-Chat) |
7274
|Yi-34B-chat |:white\_check\_mark:| |[LINK](https://huggingface.co/01-ai/Yi-34B-Chat) |
@@ -75,9 +77,9 @@
7577
此外,还有一些多模态模型如下:
7678
|Model |BM1684X |BM1688 |Huggingface Link |
7779
|:- |:- |:- |:- |
78-
|Qwen-VL-Chat |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen-VL-Chat) |
79-
|Qwen2-VL-Chat |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) |
80-
|Qwen2.5-VL-Chat |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct) |
80+
|Qwen-VL |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen-VL-Chat) |
81+
|Qwen2-VL |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) |
82+
|Qwen2.5-VL |:white\_check\_mark:| |[LINK](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct) |
8183
|InternVL2-4B |:white\_check\_mark:|:white\_check\_mark:|[LINK](https://huggingface.co/OpenGVLab/InternVL2-4B) |
8284
|InternVL2-2B |:white\_check\_mark:|:white\_check\_mark:|[LINK](https://huggingface.co/OpenGVLab/InternVL2-2B) |
8385
|Stable Diffusion |:white\_check\_mark:| |[LINK](https://huggingface.co/runwayml/stable-diffusion-v1-5) |

models/Qwen3/README.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Qwen3
2+
3+
本工程实现BM1684X/BM1688部署大模型[Qwen3](https://huggingface.co/Qwen/Qwen3-4B)。通过[TPU-MLIR](https://github.com/sophgo/tpu-mlir)编译器将模型转换成bmodel,并采用c++代码将其部署到BM1684X/BM1688的PCIE环境,或者SoC环境。
4+
5+
6+
本文包括如何编译bmodel,和如何在BM1684X/BM1688环境运行bmodel。如何编译bmodel环节可以省去,直接用以下链接下载:
7+
8+
``` shell
9+
python3 -m dfss --url=open@sophgo.com:/ext_model_information/LLM/LLM-TPU/qwen3-4b_w4bf16_seq512_bm1684x_1dev_20250429_120231.bmodel
10+
```
11+
12+
## 编译LLM模型
13+
14+
此处介绍如何将LLM编译成bmodel。
15+
16+
#### 1. 从Huggingface下载`Qwen3-4B`
17+
18+
(比较大,会花费较长时间)
19+
20+
``` shell
21+
# 下载模型
22+
git lfs install
23+
git clone git@hf.co:Qwen/Qwen3-4B
24+
# 如果是8B,则如下:
25+
git clone git@hf.co:Qwen/Qwen3-8B
26+
```
27+
28+
#### 2. 下载docker,启动容器
29+
30+
``` shell
31+
docker pull sophgo/tpuc_dev:latest
32+
33+
# myname1234 is just an example, you can set your own name
34+
docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest
35+
```
36+
后文假定环境都在docker的`/workspace`目录。
37+
38+
#### 2. 下载`TPU-MLIR`代码并编译
39+
40+
(也可以直接下载编译好的release包解压)
41+
42+
``` shell
43+
cd /workspace
44+
git clone git@github.com:sophgo/tpu-mlir.git
45+
cd tpu-mlir
46+
source ./envsetup.sh #激活环境变量
47+
./build.sh #编译mlir
48+
```
49+
50+
#### 3. 编译模型生成bmodel
51+
52+
``` shell
53+
# 如果有提示transformers版本问题,pip3 install transformers --upgrade
54+
llm_convert.py -m /workspace/Qwen3-4B -s 512 --quantize w4bf16 -g 128 -c bm1684x --out_dir qwen3_4b
55+
```
56+
57+
## 编译与运行程序
58+
59+
请将程序拷贝到PCIE环境或者SoC环境后再编译
60+
61+
#### python demo
62+
63+
编译库文件,生成`chat.cpython*.so`文件,将该文件拷贝到`pipeline.py`文件目录
64+
65+
``` shell
66+
cd python_demo
67+
mkdir build
68+
cd build && cmake .. && make && cp *cpython* .. && cd ..
69+
```
70+
71+
* python demo
72+
73+
``` shell
74+
python3 pipeline.py -m qwen3_xxx.bmodel -c config
75+
```
76+
model为实际的model储存路径;config为配置文件路径
77+
78+
#### cpp demo
79+
80+
``` shell
81+
mkdir -p build
82+
cd build
83+
cmake .. && make && cd ..
84+
85+
# how to run
86+
./qwen2 -m qwen3_xxx.bmodel -c config
87+
```

models/Qwen3/cpp_demo/CMakeLists.txt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
cmake_minimum_required(VERSION 3.10)
2+
project(qwen2)
3+
4+
if (NOT DEFINED TARGET_ARCH)
5+
set(TARGET_ARCH pcie)
6+
endif()
7+
8+
include_directories(${PROJECT_SOURCE_DIR}/include)
9+
include_directories(/opt/sophon/libsophon-current/include)
10+
link_directories(/opt/sophon/libsophon-current/lib)
11+
12+
if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "aarch64")
13+
add_definitions(-DSOC_TARGET)
14+
link_directories(${PROJECT_SOURCE_DIR}/lib_soc)
15+
message("SoC mode, starting......")
16+
elseif (${TARGET_ARCH} STREQUAL "pcie")
17+
add_definitions(-DPCIE_TARGET)
18+
link_directories(${PROJECT_SOURCE_DIR}/lib_pcie)
19+
message("PCIE mode, starting......")
20+
endif()
21+
22+
23+
add_definitions(-DDEBUG --std=c++17 -fPIC -Wall -Werror)
24+
set(CMAKE_BUILD_TYPE "Debug")
25+
26+
add_executable(qwen2 demo.cpp)
27+
target_link_libraries(qwen2 PUBLIC bmrt bmlib tokenizers_cpp tokenizers_c sentencepiece pthread dl)
28+
target_compile_options(qwen2 PRIVATE -Wno-error=attributes)

models/Qwen3/cpp_demo/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# how to make
2+
3+
mkdir -p build
4+
cd build
5+
cmake ..
6+
make
7+
8+
# how to run
9+
10+
./qwen2 -m qwen2_xxx.bmodel -c config
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"architectures": [
3+
"Qwen3ForCausalLM"
4+
],
5+
"attention_bias": false,
6+
"attention_dropout": 0.0,
7+
"bos_token_id": 151643,
8+
"eos_token_id": 151645,
9+
"head_dim": 128,
10+
"hidden_act": "silu",
11+
"hidden_size": 2560,
12+
"initializer_range": 0.02,
13+
"intermediate_size": 9728,
14+
"max_position_embeddings": 40960,
15+
"max_window_layers": 36,
16+
"model_type": "qwen3",
17+
"num_attention_heads": 32,
18+
"num_hidden_layers": 36,
19+
"num_key_value_heads": 8,
20+
"rms_norm_eps": 1e-06,
21+
"rope_scaling": null,
22+
"rope_theta": 1000000,
23+
"sliding_window": null,
24+
"tie_word_embeddings": true,
25+
"torch_dtype": "bfloat16",
26+
"transformers_version": "4.51.0",
27+
"use_cache": true,
28+
"use_sliding_window": false,
29+
"vocab_size": 151936
30+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"bos_token_id": 151643,
3+
"do_sample": true,
4+
"eos_token_id": [
5+
151645,
6+
151643
7+
],
8+
"pad_token_id": 151643,
9+
"temperature": 0.6,
10+
"top_k": 20,
11+
"top_p": 0.95,
12+
"transformers_version": "4.51.0"
13+
}

0 commit comments

Comments
 (0)