Skip to content

Commit b6d9a87

Browse files
committed
feat: remove blocker
1 parent c8055da commit b6d9a87

File tree

1 file changed

+0
-15
lines changed
  • vllm/model_executor/layers/fused_moe

1 file changed

+0
-15
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -765,21 +765,6 @@ def __init__(
765765
assert isinstance(quant_method, FusedMoEMethodBase)
766766
self.quant_method = quant_method
767767

768-
if self.enable_eplb:
769-
from vllm.model_executor.layers.quantization.fp8 import (
770-
Fp8MoEMethod)
771-
if not isinstance(quant_method, Fp8MoEMethod) and not isinstance(
772-
quant_method, UnquantizedFusedMoEMethod):
773-
# TODO: Add support for additional quantization methods.
774-
# The implementation for other quantization methods does not
775-
# contain essential differences, but the current quant API
776-
# design causes duplicated work when extending to new
777-
# quantization methods, so I'm leaving it for now.
778-
# If you plan to add support for more quantization methods,
779-
# please refer to the implementation in `Fp8MoEMethod`.
780-
raise NotImplementedError("EPLB is only supported for FP8 "
781-
"quantization for now.")
782-
783768
moe_quant_params = {
784769
"num_experts": self.local_num_experts,
785770
"hidden_size": hidden_size,

0 commit comments

Comments
 (0)