Skip to content

Commit 525be24

Browse files
[Bug fix] Fixed the garbled text issues in Qwen3-8B (#2737)
* fix qwen3.py * update * update lm_head tie_word_embeddings * update tie_word_embeddings * fix * fix tie_word_embedding not in config.json --------- Co-authored-by: lizexu <lizexu@baidu.com>
1 parent d0f4d6b commit 525be24

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

fastdeploy/model_executor/models/qwen3.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ def __init__(
164164

165165
self.num_layers = fd_config.model_config.num_layers
166166
fd_config.model_config.prefix_name = "model"
167-
fd_config.model_config.tie_word_embeddings = True
168167

169168
self.embeddings = VocabParallelEmbedding(
170169
fd_config=fd_config,
@@ -240,14 +239,13 @@ def __init__(self, fd_config: FDConfig):
240239
self.model = Qwen3Model(fd_config=fd_config)
241240

242241
self.ori_vocab_size = fd_config.model_config.ori_vocab_size
243-
242+
self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings
244243
self.lm_head = ParallelLMHead(
245244
fd_config=fd_config,
246245
embedding_dim=fd_config.model_config.hidden_size,
247246
num_embeddings=fd_config.model_config.vocab_size,
248-
prefix=(f"{fd_config.model_config.prefix_name}.embed_tokens"),
247+
prefix="lm_head",
249248
)
250-
self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings
251249

252250
@classmethod
253251
def name(self):
@@ -269,7 +267,8 @@ def set_state_dict(self, state_dict):
269267
if self.tie_word_embeddings:
270268
self.lm_head.out_linear.weight.set_value(
271269
self.model.embeddings.word_embeddings.weight.transpose([1, 0]))
272-
self.lm_head.load_state_dict(state_dict)
270+
else:
271+
self.lm_head.load_state_dict(state_dict)
273272

274273
def compute_logits(self, hidden_states: paddle.Tensor):
275274
"""
@@ -324,6 +323,7 @@ def get_tensor_parallel_split_mappings(num_layers):
324323

325324
base_actions = {
326325
# Row Linear
326+
"lm_head.weight": partial(fn, is_column=True),
327327
"embed_tokens.weight": partial(fn, is_column=False),
328328
"layers.0.self_attn.o_proj.weight": partial(fn,
329329
is_column=False),

fastdeploy/worker/worker_process.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,12 +577,15 @@ def initialize_fd_config(config_or_args) -> FDConfig:
577577
# Get model config from model directory
578578
model_config_dict, _ = ModelConfig.get_config_dict(config_or_args.model_name_or_path)
579579

580+
581+
580582
# Handle MoE related configs
581583
if 'num_experts' in model_config_dict:
582584
model_config_dict['moe_num_experts'] = model_config_dict.pop('num_experts')
583585
if 'num_experts_per_tok' in model_config_dict:
584586
model_config_dict['moe_topk'] = model_config_dict.pop('num_experts_per_tok')
585587

588+
586589
# Set default values for model config
587590
model_config_dict["head_dim"] = model_config_dict.get(
588591
"head_dim", model_config_dict["hidden_size"] // model_config_dict["num_attention_heads"])
@@ -592,6 +595,8 @@ def initialize_fd_config(config_or_args) -> FDConfig:
592595
model_config = ModelConfig.from_dict(model_config_dict)
593596
model_config.head_dim = model_config_dict["head_dim"]
594597
paddle.set_default_dtype(config_or_args.dtype)
598+
if 'tie_word_embeddings' in model_config_dict:
599+
model_config_dict['tie_word_embeddings'] = model_config_dict.pop('tie_word_embeddings')
595600

596601
# Initialize all config components
597602
device_config = DeviceConfig()

0 commit comments

Comments
 (0)