File tree Expand file tree Collapse file tree 2 files changed +10
-7
lines changed Expand file tree Collapse file tree 2 files changed +10
-7
lines changed Original file line number Diff line number Diff line change 9
9
import triton .language as tl
10
10
11
11
from tests .kernels .moe .utils import (batched_moe , make_test_weights ,
12
- torch_moe2 , triton_moe )
12
+ torch_moe2 , triton_moe ,
13
+ per_block_cast_to_fp8 )
13
14
from tests .kernels .quant_utils import native_w8a8_block_matmul
14
15
from vllm .config import VllmConfig , set_current_vllm_config
15
16
from vllm .model_executor .layers .fused_moe .fused_batched_moe import (
Original file line number Diff line number Diff line change 23
23
torch_moe2 )
24
24
from tests .pplx_utils import ProcessGroupInfo , parallel_launch
25
25
from vllm .config import VllmConfig , set_current_vllm_config
26
- from vllm .model_executor .layers .fused_moe import override_config
26
+ from vllm .model_executor .layers .fused_moe import (override_config ,
27
+ FusedMoEConfig ,
28
+ fused_topk ,
29
+ get_default_config ,
30
+ FusedMoEModularKernel ,
31
+ BatchedTritonExperts ,
32
+ FusedMoEModularKernel )
27
33
from vllm .model_executor .layers .fused_moe .fused_batched_moe import (
28
- BatchedPrepareAndFinalize , BatchedTritonExperts , NaiveBatchedExperts )
29
- from vllm .model_executor .layers .fused_moe .fused_moe import (fused_topk ,
30
- get_default_config )
31
- from vllm .model_executor .layers .fused_moe .modular_kernel import (
32
- FusedMoEModularKernel )
34
+ BatchedPrepareAndFinalize , NaiveBatchedExperts )
33
35
from vllm .platforms import current_platform
34
36
from vllm .utils import round_up
35
37
You can’t perform that action at this time.
0 commit comments