Skip to content

Commit 639b868

Browse files
committed
fix comments + lint
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent 3e0acf9 commit 639b868

File tree

4 files changed

+22
-17
lines changed

4 files changed

+22
-17
lines changed

tests/kernels/moe/test_batched_moe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ def ref_impl(
7777
B_scale: Optional[torch.Tensor],
7878
block_shape: Optional[list[int]],
7979
) -> torch.Tensor:
80+
assert (A.dtype.itemsize > 1
81+
or (A_scale is not None and B_scale is not None))
82+
8083
num_expert_tokens_cpu = num_expert_tokens.clone()
8184
num_expert_tokens_cpu = num_expert_tokens_cpu.to(device="cpu")
8285
num_experts = num_expert_tokens.size(0)

tests/kernels/moe/utils.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -167,31 +167,31 @@ def make_test_weights(
167167
assert quant_dtype == torch.float8_e4m3fn, "only fp8 supported"
168168
w1_l = [None] * e
169169
w2_l = [None] * e
170-
w1_s = [None] * e
171-
w2_s = [None] * e
170+
w1_s_l = [None] * e
171+
w2_s_l = [None] * e
172172
for idx in range(e):
173173
if block_shape is not None:
174-
w1_l[idx], w1_s[idx] = per_block_cast_to_fp8(
174+
w1_l[idx], w1_s_l[idx] = per_block_cast_to_fp8(
175175
w1_16[idx],
176176
block_shape[1],
177177
)
178-
w2_l[idx], w2_s[idx] = per_block_cast_to_fp8(
178+
w2_l[idx], w2_s_l[idx] = per_block_cast_to_fp8(
179179
w2_16[idx],
180180
block_shape[1],
181181
)
182182
else:
183-
tmp, w1_s[idx] = per_token_group_quant_fp8(
183+
tmp, w1_s_l[idx] = per_token_group_quant_fp8(
184184
w1_16[idx].view(1, -1), w1_16[idx].numel())
185185
w1_l[idx] = tmp.view(*w1_16[idx].shape)
186186

187-
tmp, w2_s[idx] = per_token_group_quant_fp8(
187+
tmp, w2_s_l[idx] = per_token_group_quant_fp8(
188188
w2_16[idx].view(1, -1), w2_16[idx].numel())
189189
w2_l[idx] = tmp.view(*w2_16[idx].shape)
190190

191191
w1 = torch.stack(w1_l)
192192
w2 = torch.stack(w2_l)
193-
w1_s = torch.stack(w1_s)
194-
w2_s = torch.stack(w2_s)
193+
w1_s = torch.stack(w1_s_l)
194+
w2_s = torch.stack(w2_s_l)
195195
if w1_s.ndim == 2:
196196
assert w1_s.shape[-1] == 1
197197
w1_s = w1_s.view(-1, 1, 1)

tests/kernels/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,10 @@ def torch_experts(
10751075
or (global_num_experts == w1.shape[0] and expert_map is None)
10761076
or (expert_map is not None
10771077
and global_num_experts == expert_map.shape[0]))
1078+
1079+
assert (quant_dtype is None
1080+
or (w1_scale is not None and w2_scale is not None))
1081+
10781082
M, K = a.shape
10791083
#N = w1.shape[1]
10801084
topk = topk_ids.shape[1]

vllm/model_executor/layers/fused_moe/modular_kernel.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,13 @@ def _moe_problem_size(
8686

8787
class FusedMoEActivationFormat(Enum):
8888
"""
89-
Add comment
89+
The standard activation format (num_tokens, hidden dim).
9090
"""
9191
Standard = "standard",
9292
"""
93-
Add comment
93+
The batched experts format (num experts, max tokens per expert, hidden dim)
9494
"""
95-
TopkReplicated = "topk_replicated",
96-
"""
97-
Add comment
98-
"""
99-
BatchedExperts = "standard",
95+
BatchedExperts = "batched_experts",
10096

10197

10298
# TODO: pass FusedMoEParallelConfig in as ctor parameter?
@@ -171,7 +167,8 @@ def finalize(
171167
@abstractmethod
172168
def activation_format(self) -> FusedMoEActivationFormat:
173169
"""
174-
Add comment
170+
A property indicating the output format of the activations for the
171+
'prepare' method.
175172
"""
176173
raise NotImplementedError
177174

@@ -217,7 +214,8 @@ def __init__(
217214
def activation_formats(
218215
self) -> tuple[FusedMoEActivationFormat, FusedMoEActivationFormat]:
219216
"""
220-
Add comment
217+
A property which is a tuple of the input and output activation formats
218+
for the 'apply' method.
221219
"""
222220
raise NotImplementedError
223221

0 commit comments

Comments
 (0)