Skip to content

Commit 3de2ed7

Browse files
authored
[Bugfix] Remove assertion of expert_map being None (#20714)
Signed-off-by: Ming Yang <yming@meta.com> Signed-off-by: Ming Yang <minos.future@gmail.com>
1 parent 299252e commit 3de2ed7

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@
66
import torch
77

88
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
9+
from vllm.logger import init_logger
910
from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
1011
from vllm.model_executor.layers.fused_moe.utils import (
1112
_validate_scale_shape, moe_kernel_quantize_input)
1213
from vllm.utils import cdiv, round_up
1314

15+
logger = init_logger(__name__)
16+
1417

1518
def pplx_hidden_dim_scale_bytes(
1619
max_num_tokens: int,
@@ -101,9 +104,15 @@ def prepare(
101104
hidden_dim = a1.size(-1) # K
102105

103106
assert topk_ids.size(0) == num_tokens
104-
assert expert_map is None, """with expert map, -1 id is used for
105-
non-local token; this causes error when casting ids to the
106-
topk_indices_dtype() uint32"""
107+
# expert_map should be None because with expert map, -1 id is used for
108+
# non-local token; this causes error when casting ids to the
109+
# topk_indices_dtype() int32
110+
#
111+
if expert_map is not None:
112+
logger.warn_once(
113+
"The PPLX backend does not support expert mapping. "
114+
"The provided `expert_map` will be ignored.")
115+
expert_map = None #noqa: F841
107116

108117
# Is this always going to be a1.device?
109118
device = a1.device

0 commit comments

Comments
 (0)