sophgo
diff --git a/‎README.md
Lines changed: 5 additions & 3 deletions b/‎README.md
Lines changed: 5 additions & 3 deletions
diff --git a/‎models/Qwen3/README.md
Lines changed: 87 additions & 0 deletions b/‎models/Qwen3/README.md
Lines changed: 87 additions & 0 deletions
diff --git a/‎models/Qwen3/cpp_demo/CMakeLists.txt
Lines changed: 28 additions & 0 deletions b/‎models/Qwen3/cpp_demo/CMakeLists.txt
Lines changed: 28 additions & 0 deletions
diff --git a/‎models/Qwen3/cpp_demo/README.md
Lines changed: 10 additions & 0 deletions b/‎models/Qwen3/cpp_demo/README.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎models/Qwen3/cpp_demo/config/config.json
Lines changed: 30 additions & 0 deletions b/‎models/Qwen3/cpp_demo/config/config.json
Lines changed: 30 additions & 0 deletions
diff --git a/‎models/Qwen3/cpp_demo/config/generation_config.json
Lines changed: 13 additions & 0 deletions b/‎models/Qwen3/cpp_demo/config/generation_config.json
Lines changed: 13 additions & 0 deletions
@@ -15,6 +15,7 @@
 
 # 最近更新！ 🔥🔥🔥
 
+- **2025.04.29**：🚀 Qwen最新推理模型**Qwen3**，BM1684X/1688已支持，详情见[Qwen3 Demo](./models/Qwen3/)。
 - **2025.03.07**：🚀 Qwen最新推理模型**QWQ-32B**和**DeepSeek-R1-Distill-Qwen-32B**，1684x多芯demo已适配，详情见[LLM Template](./template/)。
 - **2025.02.05**：🚀 DeepSeek时刻！！我们适配了**DeepSeek-R1-Distill-Qwen**系列模型，包括1.5B、7B和14B版本，详情见[LLM Template](./template/)。
 
@@ -67,6 +68,7 @@
 |Qwen2.5-1.5B                 |:white\_check\_mark:|:white\_check\_mark:|[LINK](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)                   |
 |Qwen2.5-7B                   |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)                   |
 |QWQ-32B                      |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/QWQ-32B)                               |
+|Qwen3-4B                     |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen3-4B)                   |
 |WizardCoder-15B              |:white\_check\_mark:|                    |[LINK](https://huggingface.co/WizardLM/WizardCoder-15B-V1.0)              |
 |Yi-6B-chat                   |:white\_check\_mark:|                    |[LINK](https://huggingface.co/01-ai/Yi-6B-Chat)                           |
 |Yi-34B-chat                  |:white\_check\_mark:|                    |[LINK](https://huggingface.co/01-ai/Yi-34B-Chat)                          |
@@ -75,9 +77,9 @@
 此外，还有一些多模态模型如下：
 |Model                        |BM1684X             |BM1688              |Huggingface Link                                                          |
 |:-                           |:-                  |:-                  |:-                                                                        |
-|Qwen-VL-Chat                 |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen-VL-Chat)                          |
-|Qwen2-VL-Chat                |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)                  |
-|Qwen2.5-VL-Chat                |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)                  |
+|Qwen-VL                      |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen-VL-Chat)                          |
+|Qwen2-VL                     |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)                  |
+|Qwen2.5-VL                   |:white\_check\_mark:|                    |[LINK](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct)                  |
 |InternVL2-4B                 |:white\_check\_mark:|:white\_check\_mark:|[LINK](https://huggingface.co/OpenGVLab/InternVL2-4B)                     |
 |InternVL2-2B                 |:white\_check\_mark:|:white\_check\_mark:|[LINK](https://huggingface.co/OpenGVLab/InternVL2-2B)                     |
 |Stable Diffusion             |:white\_check\_mark:|                    |[LINK](https://huggingface.co/runwayml/stable-diffusion-v1-5)             |
 
@@ -0,0 +1,87 @@
+# Qwen3
+
+本工程实现BM1684X/BM1688部署大模型[Qwen3](https://huggingface.co/Qwen/Qwen3-4B)。通过[TPU-MLIR](https://github.com/sophgo/tpu-mlir)编译器将模型转换成bmodel，并采用c++代码将其部署到BM1684X/BM1688的PCIE环境，或者SoC环境。
+
+
+本文包括如何编译bmodel，和如何在BM1684X/BM1688环境运行bmodel。如何编译bmodel环节可以省去，直接用以下链接下载：
+
+``` shell
+python3 -m dfss --url=open@sophgo.com:/ext_model_information/LLM/LLM-TPU/qwen3-4b_w4bf16_seq512_bm1684x_1dev_20250429_120231.bmodel
+```
+
+## 编译LLM模型
+
+此处介绍如何将LLM编译成bmodel。
+
+#### 1. 从Huggingface下载`Qwen3-4B`
+
+(比较大，会花费较长时间)
+
+``` shell
+# 下载模型
+git lfs install
+git clone git@hf.co:Qwen/Qwen3-4B
+# 如果是8B，则如下：
+git clone git@hf.co:Qwen/Qwen3-8B
+```
+
+#### 2. 下载docker，启动容器
+
+``` shell
+docker pull sophgo/tpuc_dev:latest
+
+# myname1234 is just an example, you can set your own name
+docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest
+```
+后文假定环境都在docker的`/workspace`目录。
+
+#### 2. 下载`TPU-MLIR`代码并编译
+
+(也可以直接下载编译好的release包解压)
+
+``` shell
+cd /workspace
+git clone git@github.com:sophgo/tpu-mlir.git
+cd tpu-mlir
+source ./envsetup.sh  #激活环境变量
+./build.sh #编译mlir
+```
+
+#### 3. 编译模型生成bmodel
+
+``` shell
+# 如果有提示transformers版本问题，pip3 install transformers --upgrade
+llm_convert.py -m /workspace/Qwen3-4B -s 512 --quantize w4bf16 -g 128 -c bm1684x --out_dir qwen3_4b
+```
+
+## 编译与运行程序
+
+请将程序拷贝到PCIE环境或者SoC环境后再编译
+
+#### python demo
+
+编译库文件，生成`chat.cpython*.so`文件，将该文件拷贝到`pipeline.py`文件目录
+
+``` shell
+cd python_demo
+mkdir build 
+cd build && cmake .. && make && cp *cpython* .. && cd ..
+```
+
+* python demo
+
+``` shell
+python3 pipeline.py -m qwen3_xxx.bmodel -c config 
+```
+model为实际的model储存路径；config为配置文件路径
+
+#### cpp demo
+
+``` shell
+mkdir -p build
+cd build
+cmake .. && make && cd ..
+
+# how to run
+./qwen2 -m qwen3_xxx.bmodel -c config
+```
@@ -0,0 +1,28 @@
+cmake_minimum_required(VERSION 3.10)
+project(qwen2)
+
+if (NOT DEFINED TARGET_ARCH)
+    set(TARGET_ARCH pcie)
+endif()
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+include_directories(/opt/sophon/libsophon-current/include)
+link_directories(/opt/sophon/libsophon-current/lib)
+
+if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "aarch64")
+    add_definitions(-DSOC_TARGET)
+    link_directories(${PROJECT_SOURCE_DIR}/lib_soc)
+    message("SoC mode, starting......")
+elseif (${TARGET_ARCH} STREQUAL "pcie")
+    add_definitions(-DPCIE_TARGET)
+    link_directories(${PROJECT_SOURCE_DIR}/lib_pcie)
+    message("PCIE mode, starting......")
+endif()
+
+
+add_definitions(-DDEBUG --std=c++17 -fPIC -Wall -Werror)
+set(CMAKE_BUILD_TYPE "Debug")
+
+add_executable(qwen2 demo.cpp)
+target_link_libraries(qwen2 PUBLIC bmrt bmlib tokenizers_cpp tokenizers_c sentencepiece pthread dl)
+target_compile_options(qwen2 PRIVATE -Wno-error=attributes)
@@ -0,0 +1,10 @@
+# how to make
+
+mkdir -p build
+cd build
+cmake ..
+make
+
+# how to run
+
+./qwen2 -m qwen2_xxx.bmodel -c config
@@ -0,0 +1,30 @@
+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
@@ -0,0 +1,13 @@
+{
+    "bos_token_id": 151643,
+    "do_sample": true,
+    "eos_token_id": [
+        151645,
+        151643
+    ],
+    "pad_token_id": 151643,
+    "temperature": 0.6,
+    "top_k": 20,
+    "top_p": 0.95,
+    "transformers_version": "4.51.0"
+}