File tree Expand file tree Collapse file tree 1 file changed +0
-15
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +0
-15
lines changed Original file line number Diff line number Diff line change @@ -765,21 +765,6 @@ def __init__(
765
765
assert isinstance (quant_method , FusedMoEMethodBase )
766
766
self .quant_method = quant_method
767
767
768
- if self .enable_eplb :
769
- from vllm .model_executor .layers .quantization .fp8 import (
770
- Fp8MoEMethod )
771
- if not isinstance (quant_method , Fp8MoEMethod ) and not isinstance (
772
- quant_method , UnquantizedFusedMoEMethod ):
773
- # TODO: Add support for additional quantization methods.
774
- # The implementation for other quantization methods does not
775
- # contain essential differences, but the current quant API
776
- # design causes duplicated work when extending to new
777
- # quantization methods, so I'm leaving it for now.
778
- # If you plan to add support for more quantization methods,
779
- # please refer to the implementation in `Fp8MoEMethod`.
780
- raise NotImplementedError ("EPLB is only supported for FP8 "
781
- "quantization for now." )
782
-
783
768
moe_quant_params = {
784
769
"num_experts" : self .local_num_experts ,
785
770
"hidden_size" : hidden_size ,
You can’t perform that action at this time.
0 commit comments