File tree Expand file tree Collapse file tree 1 file changed +12
-3
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +12
-3
lines changed Original file line number Diff line number Diff line change 6
6
import torch
7
7
8
8
import vllm .model_executor .layers .fused_moe .modular_kernel as mk
9
+ from vllm .logger import init_logger
9
10
from vllm .model_executor .layers .fused_moe .config import FusedMoEQuantConfig
10
11
from vllm .model_executor .layers .fused_moe .utils import (
11
12
_validate_scale_shape , moe_kernel_quantize_input )
12
13
from vllm .utils import cdiv , round_up
13
14
15
+ logger = init_logger (__name__ )
16
+
14
17
15
18
def pplx_hidden_dim_scale_bytes (
16
19
max_num_tokens : int ,
@@ -101,9 +104,15 @@ def prepare(
101
104
hidden_dim = a1 .size (- 1 ) # K
102
105
103
106
assert topk_ids .size (0 ) == num_tokens
104
- assert expert_map is None , """with expert map, -1 id is used for
105
- non-local token; this causes error when casting ids to the
106
- topk_indices_dtype() uint32"""
107
+ # expert_map should be None because with expert map, -1 id is used for
108
+ # non-local token; this causes error when casting ids to the
109
+ # topk_indices_dtype() int32
110
+ #
111
+ if expert_map is not None :
112
+ logger .warn_once (
113
+ "The PPLX backend does not support expert mapping. "
114
+ "The provided `expert_map` will be ignored." )
115
+ expert_map = None #noqa: F841
107
116
108
117
# Is this always going to be a1.device?
109
118
device = a1 .device
You can’t perform that action at this time.
0 commit comments