Skip to content

Commit 2d1184a

Browse files
authored
[Fix] fix expert_parallel bug in decoder stage (#2848)
1 parent 17314ee commit 2d1184a

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def init_ep(self, layer: nn.Layer) -> None:
4949
from .ep import EPDecoderRunner
5050
self.ep_decoder_runner = EPDecoderRunner(
5151
layer.top_k, layer.hidden_size, layer.num_experts,
52-
layer.model_config.num_max_dispatch_tokens_per_rank,
52+
layer.fd_config.model_config.num_max_dispatch_tokens_per_rank,
5353
layer.ep_size, layer.ep_rank)
5454
else:
5555
from .ep import EPPrefillRunner

fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def apply_ep_decode(
241241
[
242242
layer.num_local_experts,
243243
layer.ep_size *
244-
layer.model_config.num_max_dispatch_tokens_per_rank,
244+
layer.fd_config.model_config.num_max_dispatch_tokens_per_rank,
245245
layer.moe_intermediate_size * 2,
246246
],
247247
dtype=paddle.bfloat16,
@@ -251,7 +251,7 @@ def apply_ep_decode(
251251
[
252252
layer.num_local_experts,
253253
layer.ep_size *
254-
layer.model_config.num_max_dispatch_tokens_per_rank,
254+
layer.fd_config.model_config.num_max_dispatch_tokens_per_rank,
255255
layer.hidden_size,
256256
],
257257
dtype=paddle.bfloat16,

0 commit comments

Comments
 (0)