Skip to content

Commit adf3f74

Browse files
author
weijinqian_v1
committed
handle code clean
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
1 parent df52070 commit adf3f74

File tree

15 files changed

+517
-500
lines changed

15 files changed

+517
-500
lines changed

tests/multicard/test_qwen3_moe.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
32
# Copyright 2023 The vLLM team.
43
#
@@ -32,7 +31,12 @@
3231

3332
@pytest.mark.parametrize("model", MODELS)
3433
@pytest.mark.parametrize("max_tokens", [32])
35-
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3", "VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ": "1", "VLLM_ASCEND_ENABLE_DBO": "1"})
34+
@patch.dict(
35+
os.environ, {
36+
"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3",
37+
"VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ": "1",
38+
"VLLM_ASCEND_ENABLE_DBO": "1"
39+
})
3640
def test_qwen3_moe_inference(model, max_tokens):
3741
script = "examples/offline_data_parallel.py"
3842

@@ -68,4 +72,4 @@ def test_qwen3_moe_inference(model, max_tokens):
6872
assert "DP rank 0 needs to process" in output
6973
assert "DP rank 1 needs to process" in output
7074
assert "Generated text:" in output
71-
assert proc.returncode == 0
75+
assert proc.returncode == 0

tests/singlecard/test_offline_inference.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,3 @@ def test_models_topk() -> None:
131131
enforce_eager=True,
132132
gpu_memory_utilization=0.7) as vllm_model:
133133
vllm_model.generate(example_prompts, sampling_params)
134-

tests/ut/test_distributed_tensor_parallel.py

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
14
import pytest
25
import torch
36
import importlib
47
from unittest.mock import MagicMock, patch
58
from vllm_ascend.distributed.tensor_parallel import (
69
_gather_along_first_dim, _gather_along_last_dim,
710
_reduce_scatter_along_first_dim, _reduce_scatter_along_last_dim,
8-
all_to_all_sp2hp, all_to_all_hp2sp
9-
)
11+
all_to_all_sp2hp, all_to_all_hp2sp)
12+
1013

1114
# 测试用的固定数据
1215
@pytest.fixture
@@ -37,7 +40,8 @@ class TestDistributedCommunication:
3740
"""测试分布式通信函数"""
3841

3942
@pytest.mark.parametrize("world_size", [1, 4])
40-
def test_gather_along_first_dim(self, test_tensor, mock_group, mock_dist, world_size):
43+
def test_gather_along_first_dim(self, test_tensor, mock_group, mock_dist,
44+
world_size):
4145
"""测试_gather_along_first_dim"""
4246
mock_dist.get_world_size.return_value = world_size
4347

@@ -48,14 +52,17 @@ def test_gather_along_first_dim(self, test_tensor, mock_group, mock_dist, world_
4852
else:
4953
assert result.shape == (32, 16) # 8*4=32
5054

51-
def test_gather_along_first_dim_unequal_split(self, test_tensor, mock_group):
55+
def test_gather_along_first_dim_unequal_split(self, test_tensor,
56+
mock_group):
5257
"""测试不等分分割情况"""
5358
output_split_sizes = [5, 10, 15, 2]
54-
result = _gather_along_first_dim(test_tensor, mock_group, output_split_sizes)
59+
result = _gather_along_first_dim(test_tensor, mock_group,
60+
output_split_sizes)
5561
assert result.shape == (32, 16) # 5+10+15+2=32
5662

5763
@pytest.mark.parametrize("world_size", [1, 4])
58-
def test_gather_along_last_dim(self, test_tensor_last_dim, mock_group, mock_dist, world_size):
64+
def test_gather_along_last_dim(self, test_tensor_last_dim, mock_group,
65+
mock_dist, world_size):
5966
"""测试_gather_along_last_dim"""
6067
mock_dist.get_world_size.return_value = world_size
6168

@@ -64,13 +71,14 @@ def test_gather_along_last_dim(self, test_tensor_last_dim, mock_group, mock_dist
6471
if world_size == 1:
6572
assert torch.equal(result, test_tensor_last_dim)
6673
else:
67-
assert result.shape == (8, 16, 32*world_size) # 8*4=32
74+
assert result.shape == (8, 16, 32 * world_size) # 8*4=32
6875

6976
@pytest.mark.parametrize("input_shape,expected_shape", [
7077
((32, 16), (8, 16)),
7178
((40, 10), (10, 10)),
7279
])
73-
def test_reduce_scatter_along_first_dim(self, mock_group, input_shape, expected_shape):
80+
def test_reduce_scatter_along_first_dim(self, mock_group, input_shape,
81+
expected_shape):
7482
input_tensor = torch.randn(*input_shape)
7583
result = _reduce_scatter_along_first_dim(input_tensor, mock_group)
7684
assert result.shape == expected_shape
@@ -81,34 +89,40 @@ def test_reduce_scatter_along_last_dim(self, mock_group):
8189
assert result.shape == (8, 16, 8) # 32/4=8
8290

8391
@pytest.mark.parametrize("func,input_shape,expected_shape", [
84-
("all_gather_last_dim_from_tensor_parallel_region", (8, 16, 32), (8, 16, 128)),
92+
("all_gather_last_dim_from_tensor_parallel_region", (8, 16, 32),
93+
(8, 16, 128)),
8594
("reduce_scatter_to_sequence_parallel_region", (32, 16), (8, 16)),
86-
("reduce_scatter_last_dim_to_tensor_parallel_region", (8, 16, 32), (8, 16, 8)),
95+
("reduce_scatter_last_dim_to_tensor_parallel_region", (8, 16, 32),
96+
(8, 16, 8)),
8797
("gather_from_sequence_parallel_region", (8, 16), (32, 16)),
8898
])
89-
def test_wrapper_functions(self, mock_group, func, input_shape, expected_shape):
99+
def test_wrapper_functions(self, mock_group, func, input_shape,
100+
expected_shape):
90101
"""测试包装函数"""
91-
mod = importlib.import_module('vllm_ascend.distributed.tensor_parallel')
102+
mod = importlib.import_module(
103+
'vllm_ascend.distributed.tensor_parallel')
92104
globals = mod.__dict__
93105
test_func = globals[func]
94106
input_tensor = torch.randn(*input_shape)
95107
result = test_func(input_tensor, mock_group)
96108
assert result.shape == expected_shape
97109

98-
99-
@pytest.mark.parametrize("input_shape,output_shape", [
100-
((8, 16), (32, 4)), # [num_tokens/TP, H] -> [num_tokens, H/TP]
101-
])
110+
@pytest.mark.parametrize(
111+
"input_shape,output_shape",
112+
[
113+
((8, 16), (32, 4)), # [num_tokens/TP, H] -> [num_tokens, H/TP]
114+
])
102115
def test_all_to_all_sp2hp(self, mock_group, input_shape, output_shape):
103116
input_tensor = torch.randn(*input_shape)
104117
result = all_to_all_sp2hp(input_tensor, mock_group)
105118
assert result.shape == output_shape
106119

107-
108-
@pytest.mark.parametrize("input_shape,output_shape", [
109-
((32, 4), (8, 16)), # [num_tokens, H/TP] -> [num_tokens/TP, H]
110-
])
120+
@pytest.mark.parametrize(
121+
"input_shape,output_shape",
122+
[
123+
((32, 4), (8, 16)), # [num_tokens, H/TP] -> [num_tokens/TP, H]
124+
])
111125
def test_all_to_all_hp2sp(self, mock_group, input_shape, output_shape):
112126
input_tensor = torch.randn(*input_shape)
113127
result = all_to_all_hp2sp(input_tensor, mock_group)
114-
assert result.shape == output_shape
128+
assert result.shape == output_shape

tests/ut/test_moe_util.py

Lines changed: 46 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import torch
55
import pytest
66
import math
7-
import vllm_ascend.patch.worker.patch_common.patch_utils
7+
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa
88

99
from vllm_ascend.ops.moe_dispatcher.moe_utils import permute, get_capacity, topk_softmax_with_capacity, group_limited_topk, unpermute, sort_chunks_by_idxs
1010

@@ -22,141 +22,118 @@ def setup(self):
2222
self.num_groups = 2
2323
self.scaling_factor = 1.0
2424

25-
2625
def test_group_limited_topk(self, setup):
2726
# Test group-limited topk routing
2827
scores = torch.randn(self.num_tokens, self.num_experts)
29-
probs, indices = group_limited_topk(
30-
scores,
31-
topk=self.topk,
32-
num_tokens=self.num_tokens,
33-
num_experts=self.num_experts,
34-
num_groups=self.num_groups,
35-
group_topk=self.group_topk
36-
)
28+
probs, indices = group_limited_topk(scores,
29+
topk=self.topk,
30+
num_tokens=self.num_tokens,
31+
num_experts=self.num_experts,
32+
num_groups=self.num_groups,
33+
group_topk=self.group_topk)
3734

3835
assert probs.shape == (self.num_tokens, self.topk)
3936
assert indices.shape == (self.num_tokens, self.topk)
4037
assert torch.all(indices < self.num_experts)
4138

42-
4339
@pytest.mark.parametrize("score_function", ["softmax"])
4440
def test_topk_softmax_with_capacity(self, setup, score_function):
4541
# Test topk softmax with capacity
4642
logits = torch.randn(self.num_tokens, self.num_experts)
4743

4844
# Test without capacity
4945
probs, routing_map, tokens_per_expert, top_indices = topk_softmax_with_capacity(
50-
logits,
51-
topk=self.topk,
52-
score_function=score_function
53-
)
46+
logits, topk=self.topk, score_function=score_function)
5447
assert probs.shape == (self.num_tokens, self.num_experts)
5548
assert routing_map.shape == (self.num_tokens, self.num_experts)
56-
assert tokens_per_expert.shape == (self.num_experts,)
49+
assert tokens_per_expert.shape == (self.num_experts, )
5750

5851
# Test with group routing
5952
probs, routing_map, tokens_per_expert, top_indices = topk_softmax_with_capacity(
6053
logits,
6154
topk=self.topk,
6255
num_groups=self.num_groups,
6356
group_topk=self.group_topk,
64-
score_function=score_function
65-
)
57+
score_function=score_function)
6658
assert probs.shape == (self.num_tokens, self.num_experts)
6759

68-
6960
def test_get_capacity(self, setup):
7061
# Test capacity calculation
71-
capacity = get_capacity(
72-
num_tokens=self.num_tokens,
73-
num_experts=self.num_experts,
74-
capacity_factor=self.capacity_factor
75-
)
76-
expected = math.ceil((self.num_tokens / self.num_experts) * self.capacity_factor)
62+
capacity = get_capacity(num_tokens=self.num_tokens,
63+
num_experts=self.num_experts,
64+
capacity_factor=self.capacity_factor)
65+
expected = math.ceil(
66+
(self.num_tokens / self.num_experts) * self.capacity_factor)
7767
assert capacity == expected
7868

7969
# Test with min capacity
8070
min_capacity = 5
81-
capacity = get_capacity(
82-
num_tokens=self.num_tokens,
83-
num_experts=self.num_experts,
84-
capacity_factor=self.capacity_factor,
85-
min_capacity=min_capacity
86-
)
71+
capacity = get_capacity(num_tokens=self.num_tokens,
72+
num_experts=self.num_experts,
73+
capacity_factor=self.capacity_factor,
74+
min_capacity=min_capacity)
8775
assert capacity == min_capacity
8876

89-
9077
def test_permute(self, setup):
9178
# Test token permutation
9279
tokens = torch.randn(self.num_tokens, self.hidden_size)
93-
routing_map = torch.randint(0, 2, (self.num_tokens, self.num_experts)).bool()
80+
routing_map = torch.randint(
81+
0, 2, (self.num_tokens, self.num_experts)).bool()
9482

9583
# Basic permutation
9684
permuted_tokens, sorted_indices = permute(tokens, routing_map)
9785
assert permuted_tokens.shape[0] == routing_map.sum()
9886
assert sorted_indices.shape[0] == routing_map.sum()
9987

10088
# With drop and pad
101-
capacity = get_capacity(
102-
num_tokens=self.num_tokens * self.topk,
103-
num_experts=self.num_experts,
104-
capacity_factor=self.capacity_factor
105-
)
89+
capacity = get_capacity(num_tokens=self.num_tokens * self.topk,
90+
num_experts=self.num_experts,
91+
capacity_factor=self.capacity_factor)
10692
num_out_tokens = capacity * self.num_experts
10793
permuted_tokens, sorted_indices = permute(
10894
tokens,
10995
routing_map,
11096
num_out_tokens=num_out_tokens,
111-
drop_and_pad=True
112-
)
97+
drop_and_pad=True)
11398
assert permuted_tokens.shape[0] == num_out_tokens
11499
assert sorted_indices.shape[0] == num_out_tokens
115100

116-
117101
def test_unpermute(self, setup):
118102
# Test token unpermutation
119103
tokens = torch.randn(self.num_tokens, self.hidden_size)
120-
routing_map = torch.randint(0, 2, (self.num_tokens, self.num_experts)).bool()
104+
routing_map = torch.randint(
105+
0, 2, (self.num_tokens, self.num_experts)).bool()
121106
probs = torch.rand(self.num_tokens, self.num_experts)
122107

123108
# First permute
124109
permuted_tokens, sorted_indices = permute(tokens, routing_map)
125110

126111
# Then unpermute
127-
restored_tokens = unpermute(
128-
permuted_tokens,
129-
sorted_indices,
130-
tokens.shape,
131-
probs=probs,
132-
routing_map=routing_map
133-
)
112+
restored_tokens = unpermute(permuted_tokens,
113+
sorted_indices,
114+
tokens.shape,
115+
probs=probs,
116+
routing_map=routing_map)
134117
assert restored_tokens.shape == tokens.shape
135118

136119
# With drop and pad
137-
capacity = get_capacity(
138-
num_tokens=self.num_tokens * self.topk,
139-
num_experts=self.num_experts,
140-
capacity_factor=self.capacity_factor
141-
)
120+
capacity = get_capacity(num_tokens=self.num_tokens * self.topk,
121+
num_experts=self.num_experts,
122+
capacity_factor=self.capacity_factor)
142123
num_out_tokens = capacity * self.num_experts
143124
permuted_tokens, sorted_indices = permute(
144125
tokens,
145126
routing_map,
146127
num_out_tokens=num_out_tokens,
147-
drop_and_pad=True
148-
)
149-
restored_tokens = unpermute(
150-
permuted_tokens,
151-
sorted_indices,
152-
tokens.shape,
153-
probs=probs,
154-
routing_map=routing_map,
155-
drop_and_pad=True
156-
)
128+
drop_and_pad=True)
129+
restored_tokens = unpermute(permuted_tokens,
130+
sorted_indices,
131+
tokens.shape,
132+
probs=probs,
133+
routing_map=routing_map,
134+
drop_and_pad=True)
157135
assert restored_tokens.shape == tokens.shape
158136

159-
160137
def test_sort_chunks_by_idxs(self, setup):
161138
# Test chunk sorting
162139
input_tensor = torch.randn(10, self.hidden_size)
@@ -167,10 +144,10 @@ def test_sort_chunks_by_idxs(self, setup):
167144
assert output.shape == input_tensor.shape
168145

169146
# Verify the order is correct
170-
expected = torch.cat([input_tensor[5:], input_tensor[0: 3], input_tensor[3: 5]])
147+
expected = torch.cat(
148+
[input_tensor[5:], input_tensor[0:3], input_tensor[3:5]])
171149
assert torch.allclose(output, expected)
172150

173-
174151
@pytest.mark.parametrize("score_function", ["softmax"])
175152
def test_score_functions(self, setup, score_function):
176153
# Test different score functions
@@ -181,8 +158,7 @@ def test_score_functions(self, setup, score_function):
181158
logits,
182159
topk=self.topk,
183160
score_function=score_function,
184-
expert_bias=expert_bias
185-
)
161+
expert_bias=expert_bias)
186162
assert probs.shape == (self.num_tokens, self.num_experts)
187163
assert routing_map.shape == (self.num_tokens, self.num_experts)
188-
assert tokens_per_expert.shape == (self.num_experts,)
164+
assert tokens_per_expert.shape == (self.num_experts, )

0 commit comments

Comments
 (0)