Skip to content

Commit 680de26

Browse files
committed
lint
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent 1d5de67 commit 680de26

File tree

5 files changed

+13
-14
lines changed

5 files changed

+13
-14
lines changed

vllm/model_executor/layers/fused_moe/config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,10 @@ def quant_dtype(self) -> Optional[torch.dtype]:
250250

251251
@property
252252
def block_shape(self) -> Optional[list[int]]:
253-
if self.quant_config is not None:
254-
return self.quant_config.block_shape
255-
else:
256-
return None
253+
if self.quant_config is not None:
254+
return self.quant_config.block_shape
255+
else:
256+
return None
257257

258258
@property
259259
def per_act_token_quant(self) -> bool:

vllm/model_executor/layers/fused_moe/fused_batched_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,8 +560,8 @@ def prepare(
560560
quant_config.per_act_token_quant,
561561
quant_config.block_shape,
562562
))
563-
if (quant_config.block_shape is None and
564-
not quant_config.per_act_token_quant):
563+
if (quant_config.block_shape is None
564+
and not quant_config.per_act_token_quant):
565565
b_a1_scale[idx] = b_s
566566
else:
567567
#print(f"XXXXX rhs={rhs.shape} b_s={b_s.shape}")

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,7 @@ def init_prepare_finalize(self, moe: FusedMoEConfig,
9494
)
9595

9696
logger.debug("All2All %s, %s = %s/%s", moe.quant_dtype,
97-
moe.block_shape, hidden_dim_bytes,
98-
hidden_scale_bytes)
97+
moe.block_shape, hidden_dim_bytes, hidden_scale_bytes)
9998

10099
all_to_all_args = dict(
101100
max_num_tokens=moe.max_num_tokens,
@@ -225,8 +224,6 @@ def select_gemm_impl(self, prepare_finalize: FusedMoEPrepareAndFinalize,
225224
all2all_manager = get_ep_group().device_communicator.all2all_manager
226225
assert all2all_manager is not None
227226

228-
experts: Optional[FusedMoEPermuteExpertsUnpermute] = None
229-
230227
use_batched_experts = prepare_finalize.max_num_tokens_per_rank(
231228
) is not None
232229
if use_batched_experts:

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,8 @@ def select_gemm_impl(self, prepare_finalize, moe):
587587

588588
assert moe is not None
589589

590-
# method on prepare_finalize? sketchy getting world_size from prepare_finalize
590+
# TODO(bnell) method on prepare_finalize? sketchy getting world_size
591+
# from prepare_finalize
591592
max_experts_per_worker = (
592593
(moe.num_experts + prepare_finalize.world_size - 1) //
593594
prepare_finalize.world_size)

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -790,14 +790,15 @@ def select_gemm_impl(self, prepare_finalize, moe):
790790

791791
if use_batched_experts:
792792
logger.debug(
793-
"BatchedTritonOrDeepGemmExperts(%s): max_tokens_per_rank=%s, block_size=%s, per_act_token=%s",
793+
"BatchedTritonOrDeepGemmExperts(%s): "
794+
"max_tokens_per_rank=%s, block_size=%s, per_act_token=%s",
794795
self.__class__.__name__, max_num_tokens_per_rank,
795796
self.quant_config.weight_block_size, False)
796797
return BatchedTritonOrDeepGemmExperts(
797798
max_num_tokens=
798799
max_num_tokens_per_rank, # get from prepare_finalize?
799-
world_size=prepare_finalize.world_size, # sketchy
800-
dp_size=prepare_finalize.dp_size, # sketchy
800+
world_size=prepare_finalize.world_size, # TODOsketchy
801+
dp_size=prepare_finalize.dp_size, # TODO sketchy
801802
use_fp8_w8a8=True,
802803
block_shape=self.quant_config.weight_block_size,
803804
per_act_token_quant=False, #?

0 commit comments

Comments
 (0)