Skip to content

Commit deb4319

Browse files
author
weijinqian_v1
committed
handle code clean
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
1 parent af85566 commit deb4319

File tree

10 files changed

+55
-46
lines changed

10 files changed

+55
-46
lines changed

tests/ut/test_distributed_tensor_parallel.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
4-
import pytest
5-
import torch
64
import importlib
75
from unittest.mock import MagicMock, patch
6+
7+
import pytest
8+
import torch
9+
810
from vllm_ascend.distributed.tensor_parallel import (
911
_gather_along_first_dim, _gather_along_last_dim,
1012
_reduce_scatter_along_first_dim, _reduce_scatter_along_last_dim,
11-
all_to_all_sp2hp, all_to_all_hp2sp)
13+
all_to_all_hp2sp, all_to_all_sp2hp)
1214

1315

1416
# 测试用的固定数据

tests/ut/test_moe_util.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
4-
import torch
5-
import pytest
64
import math
5+
6+
import pytest
7+
import torch
8+
9+
from vllm_ascend.ops.moe_dispatcher.moe_utils import (
10+
get_capacity, group_limited_topk, permute, sort_chunks_by_idxs,
11+
topk_softmax_with_capacity, unpermute)
12+
713
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
814

9-
from vllm_ascend.ops.moe_dispatcher.moe_utils import permute, get_capacity, topk_softmax_with_capacity, group_limited_topk, unpermute, sort_chunks_by_idxs
1015

1116

1217
class TestMoeUtils:

tests/ut/test_token_dispatcher.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
44

5-
import torch
65
import pytest
7-
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
8-
6+
import torch
97
from pytest_mock import MockerFixture
8+
9+
from vllm_ascend.ops.moe_dispatcher.token_dispatcher import (
10+
MoEAlltoAllSeqOverLapDispatcher, MoeDispatcherConfig)
1011
from vllm_ascend.utils import adapt_patch # noqa E402
1112

12-
from vllm_ascend.ops.moe_dispatcher.token_dispatcher import MoeDispatcherConfig, MoEAlltoAllSeqOverLapDispatcher
13+
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
14+
15+
1316

1417
adapt_patch(True)
1518

vllm_ascend/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ def register_model():
88
from .deepseek_mtp import CustomDeepSeekMTP # noqa: F401
99
from .deepseek_v2 import CustomDeepseekV2ForCausalLM # noqa: F401
1010
from .deepseek_v2 import CustomDeepseekV3ForCausalLM # noqa: F401
11+
from .moe_block import AscendSparseMoeBlock # noqa: F401
1112
from .qwen2_5_vl import \
1213
AscendQwen2_5_VLForConditionalGeneration # noqa: F401
1314
from .qwen2_vl import AscendQwen2VLForConditionalGeneration # noqa: F401
14-
from .moe_block import AscendSparseMoeBlock # noqa: F401
1515
from .qwen3 import CustomQwen3ForCausalLM # noqa: F401
1616

1717
ModelRegistry.register_model(

vllm_ascend/models/deepseek_dbo.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@
3434
from transformers import PretrainedConfig
3535
from vllm.attention import AttentionMetadata
3636
from vllm.config import CacheConfig, ModelConfig, VllmConfig
37-
from vllm.distributed import (get_pp_group,
38-
get_tensor_model_parallel_rank,
37+
from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank,
3938
get_tensor_model_parallel_world_size,
4039
get_tp_group, tensor_model_parallel_all_reduce)
4140
from vllm.distributed.parallel_state import get_dp_group, get_ep_group
@@ -55,8 +54,9 @@
5554
from vllm.sequence import IntermediateTensors
5655

5756
import vllm_ascend.envs as envs_ascend
58-
from vllm_ascend.distributed.tensor_parallel import gather_from_sequence_parallel_region
5957
from vllm_ascend.ascend_forward_context import FusedMoEState
58+
from vllm_ascend.distributed.tensor_parallel import \
59+
gather_from_sequence_parallel_region
6060
from vllm_ascend.models.deepseek_v2 import (CustomDeepseekV2DecoderLayer,
6161
CustomDeepseekV2MLP,
6262
CustomDeepseekV2MoE)
@@ -69,9 +69,9 @@
6969
from vllm_ascend.multistream.metadata import (MultiStreamConfig,
7070
MultiStreamStepMetadata,
7171
make_multistream_metadata_ds)
72+
from vllm_ascend.ops.fused_moe import select_experts
7273
from vllm_ascend.quantization.w8a8_dynamic import (
7374
AscendW8A8DynamicLinearMethod, apply_mlp)
74-
from vllm_ascend.ops.fused_moe import select_experts
7575
from vllm_ascend.utils import dispose_tensor
7676

7777
VLLM_ASCEND_ENABLE_DBO: bool = envs_ascend.VLLM_ASCEND_ENABLE_DBO

vllm_ascend/models/moe_block.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,18 @@
1919

2020
import torch
2121
import vllm.model_executor.models.qwen3_moe as qwen3
22-
2322
from torch import nn
23+
from transformers import PretrainedConfig
2424
from vllm.attention import AttentionMetadata
25-
from vllm.distributed import (get_tensor_model_parallel_world_size,
26-
get_tp_group)
27-
from vllm.distributed.parallel_state import get_dp_group
25+
from vllm.distributed import get_tensor_model_parallel_world_size, get_tp_group
26+
from vllm.distributed.parallel_state import get_dp_group, get_ep_group
2827
from vllm.forward_context import get_forward_context
2928
from vllm.model_executor.layers.linear import ReplicatedLinear
29+
from vllm.model_executor.layers.quantization import QuantizationConfig
3030

3131
from vllm_ascend.ascend_config import get_ascend_config
32-
from vllm.distributed.parallel_state import get_ep_group
3332
from vllm_ascend.ops.fused_moe import AscendFusedMoE
3433

35-
from transformers import PretrainedConfig
36-
from vllm.model_executor.layers.quantization import QuantizationConfig
37-
3834

3935
class AscendSparseMoeBlock(nn.Module):
4036

vllm_ascend/models/qwen3_dbo.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,42 +21,44 @@
2121
# limitations under the License.
2222
# # Adapted from
2323
# """Inference-only Qwen3 model."""
24-
from typing import Optional, Union, List
2524
from types import SimpleNamespace
25+
from typing import List, Optional, Union
2626

2727
import torch
2828
import torch_npu
2929
from torch import nn
3030
from transformers import PretrainedConfig
31-
32-
from vllm.model_executor.models.qwen3_moe import Qwen3MoeDecoderLayer, Qwen3MoeModel
33-
from vllm.config import CacheConfig, VllmConfig
34-
from vllm.model_executor.layers.quantization import QuantizationConfig
3531
from vllm.attention import AttentionMetadata
32+
from vllm.compilation.decorators import support_torch_compile
33+
from vllm.config import CacheConfig, VllmConfig
34+
from vllm.distributed import (get_pp_group,
35+
get_tensor_model_parallel_world_size,
36+
get_tp_group)
3637
from vllm.forward_context import get_forward_context, set_forward_context
37-
from vllm.distributed import get_tensor_model_parallel_world_size, get_tp_group, \
38-
get_pp_group
39-
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
38+
from vllm.model_executor.layers.layernorm import RMSNorm
39+
from vllm.model_executor.layers.logits_processor import LogitsProcessor
40+
from vllm.model_executor.layers.quantization import QuantizationConfig
41+
from vllm.model_executor.layers.vocab_parallel_embedding import (
42+
ParallelLMHead, VocabParallelEmbedding)
43+
from vllm.model_executor.models.qwen3_moe import (Qwen3MoeDecoderLayer,
44+
Qwen3MoeForCausalLM,
45+
Qwen3MoeModel)
4046
from vllm.model_executor.models.utils import (
4147
make_empty_intermediate_tensors_factory, make_layers, maybe_prefix)
42-
from vllm.model_executor.layers.layernorm import RMSNorm
4348
from vllm.sequence import IntermediateTensors
44-
from vllm.model_executor.models.qwen3_moe import Qwen3MoeForCausalLM
45-
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
46-
from vllm.model_executor.layers.logits_processor import LogitsProcessor
47-
from vllm.compilation.decorators import support_torch_compile
4849

50+
import vllm_ascend.envs as envs_ascend
51+
from vllm_ascend.distributed.tensor_parallel import \
52+
gather_from_sequence_parallel_region
53+
from vllm_ascend.multistream.base import MSEventKey
4954
from vllm_ascend.multistream.context import (
5055
advance_step_multistream_layer_context, get_multistream_layer_context)
51-
from vllm_ascend.multistream.base import MSEventKey
5256
from vllm_ascend.multistream.layers import (MultiStreamPostTransformerLayer,
5357
MultiStreamPreTransformerLayer)
5458
from vllm_ascend.multistream.metadata import (MultiStreamConfig,
5559
MultiStreamStepMetadata,
5660
make_multistream_metadata_ds)
57-
from vllm_ascend.ops.fused_moe import select_experts, apply_mlp
58-
from vllm_ascend.distributed.tensor_parallel import gather_from_sequence_parallel_region
59-
import vllm_ascend.envs as envs_ascend
61+
from vllm_ascend.ops.fused_moe import apply_mlp, select_experts
6062

6163
VLLM_ASCEND_ENABLE_DBO: bool = envs_ascend.VLLM_ASCEND_ENABLE_DBO
6264

vllm_ascend/multistream/ms_split.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
import numpy as np
55
import torch
66

7-
from vllm_ascend.attention.attention_v1 import AscendAttentionState, AscendMetadata
7+
from vllm_ascend.attention.attention_v1 import (AscendAttentionState,
8+
AscendMetadata)
89

910
from .base import MSAttentionMetadataSplitConfig
1011

vllm_ascend/ops/fused_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@
4040
from vllm_ascend.ascend_config import get_ascend_config
4141
from vllm_ascend.ascend_forward_context import FusedMoEState
4242
from vllm_ascend.ops.expert_load_balancer import ExpertLoadBalancer
43+
from vllm_ascend.ops.moe_dispatcher.token_dispatcher import (
44+
MoEAlltoAllSeqOverLapDispatcher, MoeDispatcherConfig)
4345
from vllm_ascend.utils import (AscendSocVersion, dispose_tensor,
4446
get_ascend_soc_version, npu_stream_switch,
4547
npu_wait_tensor)
46-
from vllm_ascend.ops.moe_dispatcher.token_dispatcher import (
47-
MoEAlltoAllSeqOverLapDispatcher, MoeDispatcherConfig)
4848

4949
VLLM_ASCEND_MOE_ALL2ALL_BUFFER: bool = envs_ascend.VLLM_ASCEND_MOE_ALL2ALL_BUFFER
5050

vllm_ascend/ops/moe_dispatcher/token_dispatcher.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@
2424

2525
import torch
2626
import torch_npu
27-
2827
from vllm.distributed.parallel_state import get_ep_group
28+
2929
from vllm_ascend.distributed.tensor_parallel import (
3030
all_gather_last_dim_from_tensor_parallel_region, all_to_all_hp2sp,
3131
all_to_all_sp2hp, gather_from_sequence_parallel_region,
3232
reduce_scatter_last_dim_to_tensor_parallel_region)
3333
from vllm_ascend.ops.comm_utils import async_all_to_all
3434
from vllm_ascend.ops.moe_dispatcher.moe_utils import (
35-
get_capacity, permute, topk_softmax_with_capacity,
36-
unpermute)
35+
get_capacity, permute, topk_softmax_with_capacity, unpermute)
36+
3737
""" We use the following notation throughout this file:
3838
H: hidden size
3939
B: micro batch size

0 commit comments

Comments
 (0)