Skip to content

Commit 6b4e406

Browse files
committed
fix merge
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent 4fdeb70 commit 6b4e406

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

vllm/model_executor/layers/fused_moe/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from contextlib import contextmanager
55
from typing import Any, Optional
66

7+
from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig
78
from vllm.model_executor.layers.fused_moe.layer import (
89
FusedMoE, FusedMoEMethodBase, FusedMoeWeightScaleSupported)
910
from vllm.model_executor.layers.fused_moe.modular_kernel import (
@@ -29,12 +30,12 @@ def get_config() -> Optional[dict[str, Any]]:
2930

3031
__all__ = [
3132
"FusedMoE",
33+
"FusedMoEConfig",
3234
"FusedMoEMethodBase",
3335
"FusedMoeWeightScaleSupported",
3436
"FusedMoEPermuteExpertsUnpermute",
3537
"FusedMoEActivationFormat",
3638
"FusedMoEPrepareAndFinalize",
37-
"MoEConfig",
3839
"override_config",
3940
"get_config",
4041
]

vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(self,
4040
self.max_num_tokens = max_num_tokens
4141
self.world_size = world_size
4242
self.dp_size = dp_size
43+
self.allow_deep_gemm = allow_deep_gemm
4344

4445
# BatchedTritonKernel doesn't support block quantization
4546
# at the moment.
@@ -56,7 +57,7 @@ def __init__(self,
5657
) if self.block_shape is None else None
5758

5859
is_fp8_128_block_quantized = (
59-
self.use_fp8_w8a8 and self.block_shape
60+
use_fp8_w8a8 and self.block_shape
6061
== BatchedDeepGemmExperts.DEEPGEMM_BLOCK_SHAPE)
6162

6263
self.batched_deep_gemm_experts = BatchedDeepGemmExperts(

vllm/model_executor/layers/fused_moe/deep_gemm_moe.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
MoEPrepareAndFinalizeNoEP)
1616
from vllm.model_executor.layers.fused_moe.utils import (
1717
_resize_cache, per_token_group_quant_fp8)
18-
from vllm.model_executor.layers.quantization.deepgemm import ( # noqa: E501
19-
m_grouped_gemm_fp8_fp8_bf16_nt_contiguous_deepgemm as m_grouped_gemm_fp8_fp8_bf16_nt_contiguous_deepgemm)
2018
from vllm.utils import round_up
2119

2220
logger = init_logger(__name__)

0 commit comments

Comments
 (0)