feat: remove blocker

b8zhong · b8zhong · commit b6d9a877d8b6 · 2025-07-15T09:29:59.000-04:00
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -765,21 +765,6 @@ def __init__(
         assert isinstance(quant_method, FusedMoEMethodBase)
         self.quant_method = quant_method
 
-        if self.enable_eplb:
-            from vllm.model_executor.layers.quantization.fp8 import (
-                Fp8MoEMethod)
-            if not isinstance(quant_method, Fp8MoEMethod) and not isinstance(
-                    quant_method, UnquantizedFusedMoEMethod):
-                # TODO: Add support for additional quantization methods.
-                # The implementation for other quantization methods does not
-                # contain essential differences, but the current quant API
-                # design causes duplicated work when extending to new
-                # quantization methods, so I'm leaving it for now.
-                # If you plan to add support for more quantization methods,
-                # please refer to the implementation in `Fp8MoEMethod`.
-                raise NotImplementedError("EPLB is only supported for FP8 "
-                                          "quantization for now.")
-
         moe_quant_params = {
             "num_experts": self.local_num_experts,
             "hidden_size": hidden_size,