Skip to content

Commit 2a09eff

Browse files
CSWYF3634076huydhn
authored andcommitted
[Model] Add Ernie4.5 and Ernie4.5MoE Model Support (vllm-project#20220)
Signed-off-by: wangyafeng <wangyafeng@baidu.com>
1 parent 3f9c4b7 commit 2a09eff

File tree

5 files changed

+634
-0
lines changed

5 files changed

+634
-0
lines changed

docs/models/supported_models.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,8 @@ Specified using `--task generate`.
330330
| `DeepseekV2ForCausalLM` | DeepSeek-V2 | `deepseek-ai/DeepSeek-V2`, `deepseek-ai/DeepSeek-V2-Chat` etc. | | ✅︎ | ✅︎ |
331331
| `DeepseekV3ForCausalLM` | DeepSeek-V3 | `deepseek-ai/DeepSeek-V3-Base`, `deepseek-ai/DeepSeek-V3` etc. | | ✅︎ | ✅︎ |
332332
| `Dots1ForCausalLM` | dots.llm1 | `rednote-hilab/dots.llm1.base`, `rednote-hilab/dots.llm1.inst` etc. | | ✅︎ | ✅︎ |
333+
| `Ernie4_5_ForCausalLM` | Ernie4.5 | `baidu/ERNIE-4.5-0.3B-PT`,etc. | | ✅︎ | ✅︎ |
334+
| `Ernie4_5_MoeForCausalLM` | Ernie4.5MoE | `baidu/ERNIE-4.5-21B-A3B-PT`, `baidu/ERNIE-4.5-300B-A47B-PT`, etc. | | ✅︎ | ✅︎ |
333335
| `ExaoneForCausalLM` | EXAONE-3 | `LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ |
334336
| `FalconForCausalLM` | Falcon | `tiiuae/falcon-7b`, `tiiuae/falcon-40b`, `tiiuae/falcon-rw-7b`, etc. | | ✅︎ | ✅︎ |
335337
| `FalconMambaForCausalLM` | FalconMamba | `tiiuae/falcon-mamba-7b`, `tiiuae/falcon-mamba-7b-instruct`, etc. | | ✅︎ | ✅︎ |

tests/models/registry.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ def check_available_online(
162162
trust_remote_code=True),
163163
"DeepseekV3ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V3", # noqa: E501
164164
trust_remote_code=True),
165+
"Ernie4_5_ForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-0.3B-PT",
166+
trust_remote_code=True),
167+
"Ernie4_5_MoeForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-21B-A3B-PT",
168+
trust_remote_code=True),
165169
"ExaoneForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), # noqa: E501
166170
"Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"), # noqa: E501
167171
"FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"),

vllm/model_executor/models/ernie45.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
# Copyright 2025 The Baidu team.
5+
# Copyright 2023 The vLLM team.
6+
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
7+
#
8+
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
9+
# and OPT implementations in this library. It has been modified from its
10+
# original forms to accommodate minor architectural differences compared
11+
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
12+
#
13+
# Licensed under the Apache License, Version 2.0 (the "License");
14+
# you may not use this file except in compliance with the License.
15+
# You may obtain a copy of the License at
16+
#
17+
# http://www.apache.org/licenses/LICENSE-2.0
18+
#
19+
# Unless required by applicable law or agreed to in writing, software
20+
# distributed under the License is distributed on an "AS IS" BASIS,
21+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22+
# See the License for the specific language governing permissions and
23+
# limitations under the License.
24+
"""Inference-only Erine model compatible with HuggingFace weights."""
25+
from vllm.config import VllmConfig
26+
from vllm.model_executor.models.llama import LlamaForCausalLM
27+
28+
from .utils import PPMissingLayer
29+
30+
31+
class Ernie4_5_ForCausalLM(LlamaForCausalLM):
32+
33+
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
34+
super().__init__(vllm_config=vllm_config, prefix=prefix)
35+
# Hack Llama model to fit HF format Ernie4.5 dense implementation
36+
# Attention difference between Ernie and Llama:
37+
# 1. rotary_dim and no Neox style.
38+
# 2. There is no bias for o_proj in attention
39+
for layer in self.model.layers:
40+
if not isinstance(layer, PPMissingLayer):
41+
layer.self_attn.rotary_emb.is_neox_style = False
42+
layer.self_attn.o_proj.bias = None
43+
layer.self_attn.o_proj.skip_bias_add = True

0 commit comments

Comments
 (0)