Skip to content

Commit 07d96ca

Browse files
committed
fix lint
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent ab6bb13 commit 07d96ca

File tree

1 file changed

+3
-6
lines changed
  • vllm/model_executor/layers/fused_moe

1 file changed

+3
-6
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,6 @@ def init_prepare_finalize(self, moe: FusedMoEConfig,
121121

122122
handle = all2all_manager.get_handle(all_to_all_args)
123123

124-
#assert moe.tp_size == all2all_manager.tp_group.world_size
125-
126124
prepare_finalize = PplxPrepareAndFinalize(
127125
handle,
128126
max_num_tokens=moe.max_num_tokens,
@@ -160,10 +158,9 @@ def init_prepare_finalize(self, moe: FusedMoEConfig,
160158
# Note : We may want to use FP8 dispatch even otherwise just to
161159
# reduce datamovement
162160
assert moe.quant_config is not None
163-
use_fp8_dispatch = (moe.quant_config.quant_dtype
164-
== current_platform.fp8_dtype()
165-
and moe.quant_config.block_shape[1]
166-
== DEEPEP_QUANT_BLOCK_SHAPE)
161+
use_fp8_dispatch = (
162+
moe.quant_config.quant_dtype == current_platform.fp8_dtype()
163+
and moe.quant_config.block_shape == DEEPEP_QUANT_BLOCK_SHAPE)
167164

168165
# Note (varun): Whether to use FP8 dispatch or not needs some
169166
# profiling. Turning it off for now.

0 commit comments

Comments
 (0)