Skip to content

Commit b5d7cba

Browse files
committed
add some asserts to make lint happy
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent e79b40a commit b5d7cba

File tree

5 files changed

+7
-2
lines changed

5 files changed

+7
-2
lines changed

vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def apply(
103103
):
104104
import deep_gemm as dg
105105
assert hidden_states.ndim == 3
106+
assert self.block_shape is not None
106107

107108
a1q = hidden_states
108109
_, N, K = w1.size()

vllm/model_executor/layers/fused_moe/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def make(
341341

342342
if quant_config is not None and isinstance(quant_config,
343343
QuantizationConfig):
344-
block_shape = quant_config.weight_block_size
344+
block_shape = quant_config.get("weight_block_size", None)
345345
per_act_token_quant = False
346346
per_out_ch_quant = False
347347
quant_dtype: Optional[torch.dtype] = None

vllm/model_executor/layers/fused_moe/deep_gemm_moe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def workspace_shapes(
9292
self, a: torch.Tensor, aq: torch.Tensor, M: int, N: int, K: int,
9393
topk: int, global_num_experts: int, local_num_experts: int
9494
) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...], torch.dtype]:
95+
assert self.block_shape is not None
9596
# We use global_num_experts due to how moe_align_block_size handles
9697
# expert_maps.
9798
num_experts = global_num_experts
@@ -124,6 +125,7 @@ def apply(
124125
expert_num_tokens: Optional[torch.Tensor],
125126
):
126127
import deep_gemm as dg
128+
assert self.block_shape is not None
127129

128130
a1q = hidden_states
129131
_, N, K = w1.size()

vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __init__(
2424
allow_deep_gemm: bool = False,
2525
):
2626
super().__init__(
27-
FusedMoEQuantConfig(
27+
FusedMoEQuantConfig.make(
2828
use_fp8_w8a8=use_fp8_w8a8,
2929
use_int8_w8a8=use_int8_w8a8,
3030
use_int8_w8a16=use_int8_w8a16,

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,8 @@ def apply(
373373
global_num_experts=global_num_experts,
374374
expert_map=expert_map)
375375

376+
assert self.fused_experts_func is not None
377+
376378
return self.fused_experts_func(
377379
hidden_states=x,
378380
w1=layer.w13_weight,

0 commit comments

Comments
 (0)