vllm-project
diff --git a/‎tests/multicard/test_qwen3_moe.py
Lines changed: 7 additions & 3 deletions b/‎tests/multicard/test_qwen3_moe.py
Lines changed: 7 additions & 3 deletions
diff --git a/‎tests/singlecard/test_offline_inference.py
Lines changed: 0 additions & 1 deletion b/‎tests/singlecard/test_offline_inference.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎tests/ut/test_distributed_tensor_parallel.py
Lines changed: 35 additions & 21 deletions b/‎tests/ut/test_distributed_tensor_parallel.py
Lines changed: 35 additions & 21 deletions
diff --git a/‎tests/ut/test_moe_util.py
Lines changed: 46 additions & 70 deletions b/‎tests/ut/test_moe_util.py
Lines changed: 46 additions & 70 deletions
@@ -1,4 +1,3 @@
-
 # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
 # Copyright 2023 The vLLM team.
 #
@@ -32,7 +31,12 @@
 
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [32])
-@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3", "VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ": "1", "VLLM_ASCEND_ENABLE_DBO": "1"})
+@patch.dict(
+    os.environ, {
+        "ASCEND_RT_VISIBLE_DEVICES": "0,1,2,3",
+        "VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ": "1",
+        "VLLM_ASCEND_ENABLE_DBO": "1"
+    })
 def test_qwen3_moe_inference(model, max_tokens):
     script = "examples/offline_data_parallel.py"
 
@@ -68,4 +72,4 @@ def test_qwen3_moe_inference(model, max_tokens):
     assert "DP rank 0 needs to process" in output
     assert "DP rank 1 needs to process" in output
     assert "Generated text:" in output
-    assert proc.returncode == 0
+    assert proc.returncode == 0
@@ -131,4 +131,3 @@ def test_models_topk() -> None:
                     enforce_eager=True,
                     gpu_memory_utilization=0.7) as vllm_model:
         vllm_model.generate(example_prompts, sampling_params)
-
@@ -1,12 +1,15 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
 import pytest
 import torch
 import importlib
 from unittest.mock import MagicMock, patch
 from vllm_ascend.distributed.tensor_parallel import (
     _gather_along_first_dim, _gather_along_last_dim,
     _reduce_scatter_along_first_dim, _reduce_scatter_along_last_dim,
-    all_to_all_sp2hp, all_to_all_hp2sp
-)
+    all_to_all_sp2hp, all_to_all_hp2sp)
+
 
 # 测试用的固定数据
 @pytest.fixture
@@ -37,7 +40,8 @@ class TestDistributedCommunication:
     """测试分布式通信函数"""
 
     @pytest.mark.parametrize("world_size", [1, 4])
-    def test_gather_along_first_dim(self, test_tensor, mock_group, mock_dist, world_size):
+    def test_gather_along_first_dim(self, test_tensor, mock_group, mock_dist,
+                                    world_size):
         """测试_gather_along_first_dim"""
         mock_dist.get_world_size.return_value = world_size
 
@@ -48,14 +52,17 @@ def test_gather_along_first_dim(self, test_tensor, mock_group, mock_dist, world_
         else:
             assert result.shape == (32, 16)  # 8*4=32
 
-    def test_gather_along_first_dim_unequal_split(self, test_tensor, mock_group):
+    def test_gather_along_first_dim_unequal_split(self, test_tensor,
+                                                  mock_group):
         """测试不等分分割情况"""
         output_split_sizes = [5, 10, 15, 2]
-        result = _gather_along_first_dim(test_tensor, mock_group, output_split_sizes)
+        result = _gather_along_first_dim(test_tensor, mock_group,
+                                         output_split_sizes)
         assert result.shape == (32, 16)  # 5+10+15+2=32
 
     @pytest.mark.parametrize("world_size", [1, 4])
-    def test_gather_along_last_dim(self, test_tensor_last_dim, mock_group, mock_dist, world_size):
+    def test_gather_along_last_dim(self, test_tensor_last_dim, mock_group,
+                                   mock_dist, world_size):
         """测试_gather_along_last_dim"""
         mock_dist.get_world_size.return_value = world_size
 
@@ -64,13 +71,14 @@ def test_gather_along_last_dim(self, test_tensor_last_dim, mock_group, mock_dist
         if world_size == 1:
             assert torch.equal(result, test_tensor_last_dim)
         else:
-            assert result.shape == (8, 16, 32*world_size)  # 8*4=32
+            assert result.shape == (8, 16, 32 * world_size)  # 8*4=32
 
     @pytest.mark.parametrize("input_shape,expected_shape", [
         ((32, 16), (8, 16)),
         ((40, 10), (10, 10)),
     ])
-    def test_reduce_scatter_along_first_dim(self, mock_group, input_shape, expected_shape):
+    def test_reduce_scatter_along_first_dim(self, mock_group, input_shape,
+                                            expected_shape):
         input_tensor = torch.randn(*input_shape)
         result = _reduce_scatter_along_first_dim(input_tensor, mock_group)
         assert result.shape == expected_shape
@@ -81,34 +89,40 @@ def test_reduce_scatter_along_last_dim(self, mock_group):
         assert result.shape == (8, 16, 8)  # 32/4=8
 
     @pytest.mark.parametrize("func,input_shape,expected_shape", [
-        ("all_gather_last_dim_from_tensor_parallel_region", (8, 16, 32), (8, 16, 128)),
+        ("all_gather_last_dim_from_tensor_parallel_region", (8, 16, 32),
+         (8, 16, 128)),
         ("reduce_scatter_to_sequence_parallel_region", (32, 16), (8, 16)),
-        ("reduce_scatter_last_dim_to_tensor_parallel_region", (8, 16, 32), (8, 16, 8)),
+        ("reduce_scatter_last_dim_to_tensor_parallel_region", (8, 16, 32),
+         (8, 16, 8)),
         ("gather_from_sequence_parallel_region", (8, 16), (32, 16)),
     ])
-    def test_wrapper_functions(self, mock_group, func, input_shape, expected_shape):
+    def test_wrapper_functions(self, mock_group, func, input_shape,
+                               expected_shape):
         """测试包装函数"""
-        mod = importlib.import_module('vllm_ascend.distributed.tensor_parallel')
+        mod = importlib.import_module(
+            'vllm_ascend.distributed.tensor_parallel')
         globals = mod.__dict__
         test_func = globals[func]
         input_tensor = torch.randn(*input_shape)
         result = test_func(input_tensor, mock_group)
         assert result.shape == expected_shape
 
-
-    @pytest.mark.parametrize("input_shape,output_shape", [
-        ((8, 16), (32, 4)),  # [num_tokens/TP, H] -> [num_tokens, H/TP]
-    ])
+    @pytest.mark.parametrize(
+        "input_shape,output_shape",
+        [
+            ((8, 16), (32, 4)),  # [num_tokens/TP, H] -> [num_tokens, H/TP]
+        ])
     def test_all_to_all_sp2hp(self, mock_group, input_shape, output_shape):
         input_tensor = torch.randn(*input_shape)
         result = all_to_all_sp2hp(input_tensor, mock_group)
         assert result.shape == output_shape
 
-
-    @pytest.mark.parametrize("input_shape,output_shape", [
-        ((32, 4), (8, 16)),  # [num_tokens, H/TP] -> [num_tokens/TP, H]
-    ])
+    @pytest.mark.parametrize(
+        "input_shape,output_shape",
+        [
+            ((32, 4), (8, 16)),  # [num_tokens, H/TP] -> [num_tokens/TP, H]
+        ])
     def test_all_to_all_hp2sp(self, mock_group, input_shape, output_shape):
         input_tensor = torch.randn(*input_shape)
         result = all_to_all_hp2sp(input_tensor, mock_group)
-        assert result.shape == output_shape
+        assert result.shape == output_shape
@@ -4,7 +4,7 @@
 import torch
 import pytest
 import math
-import vllm_ascend.patch.worker.patch_common.patch_utils
+import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import]  # isort: skip  # noqa
 
 from vllm_ascend.ops.moe_dispatcher.moe_utils import permute, get_capacity, topk_softmax_with_capacity, group_limited_topk, unpermute, sort_chunks_by_idxs
 
@@ -22,141 +22,118 @@ def setup(self):
         self.num_groups = 2
         self.scaling_factor = 1.0
 
-
     def test_group_limited_topk(self, setup):
         # Test group-limited topk routing
         scores = torch.randn(self.num_tokens, self.num_experts)
-        probs, indices = group_limited_topk(
-            scores,
-            topk=self.topk,
-            num_tokens=self.num_tokens,
-            num_experts=self.num_experts,
-            num_groups=self.num_groups,
-            group_topk=self.group_topk
-        )
+        probs, indices = group_limited_topk(scores,
+                                            topk=self.topk,
+                                            num_tokens=self.num_tokens,
+                                            num_experts=self.num_experts,
+                                            num_groups=self.num_groups,
+                                            group_topk=self.group_topk)
 
         assert probs.shape == (self.num_tokens, self.topk)
         assert indices.shape == (self.num_tokens, self.topk)
         assert torch.all(indices < self.num_experts)
 
-
     @pytest.mark.parametrize("score_function", ["softmax"])
     def test_topk_softmax_with_capacity(self, setup, score_function):
         # Test topk softmax with capacity
         logits = torch.randn(self.num_tokens, self.num_experts)
 
         # Test without capacity
         probs, routing_map, tokens_per_expert, top_indices = topk_softmax_with_capacity(
-            logits,
-            topk=self.topk,
-            score_function=score_function
-        )
+            logits, topk=self.topk, score_function=score_function)
         assert probs.shape == (self.num_tokens, self.num_experts)
         assert routing_map.shape == (self.num_tokens, self.num_experts)
-        assert tokens_per_expert.shape == (self.num_experts,)
+        assert tokens_per_expert.shape == (self.num_experts, )
 
         # Test with group routing
         probs, routing_map, tokens_per_expert, top_indices = topk_softmax_with_capacity(
             logits,
             topk=self.topk,
             num_groups=self.num_groups,
             group_topk=self.group_topk,
-            score_function=score_function
-        )
+            score_function=score_function)
         assert probs.shape == (self.num_tokens, self.num_experts)
 
-
     def test_get_capacity(self, setup):
         # Test capacity calculation
-        capacity = get_capacity(
-            num_tokens=self.num_tokens,
-            num_experts=self.num_experts,
-            capacity_factor=self.capacity_factor
-        )
-        expected = math.ceil((self.num_tokens / self.num_experts) * self.capacity_factor)
+        capacity = get_capacity(num_tokens=self.num_tokens,
+                                num_experts=self.num_experts,
+                                capacity_factor=self.capacity_factor)
+        expected = math.ceil(
+            (self.num_tokens / self.num_experts) * self.capacity_factor)
         assert capacity == expected
 
         # Test with min capacity
         min_capacity = 5
-        capacity = get_capacity(
-            num_tokens=self.num_tokens,
-            num_experts=self.num_experts,
-            capacity_factor=self.capacity_factor,
-            min_capacity=min_capacity
-        )
+        capacity = get_capacity(num_tokens=self.num_tokens,
+                                num_experts=self.num_experts,
+                                capacity_factor=self.capacity_factor,
+                                min_capacity=min_capacity)
         assert capacity == min_capacity
 
-
     def test_permute(self, setup):
         # Test token permutation
         tokens = torch.randn(self.num_tokens, self.hidden_size)
-        routing_map = torch.randint(0, 2, (self.num_tokens, self.num_experts)).bool()
+        routing_map = torch.randint(
+            0, 2, (self.num_tokens, self.num_experts)).bool()
 
         # Basic permutation
         permuted_tokens, sorted_indices = permute(tokens, routing_map)
         assert permuted_tokens.shape[0] == routing_map.sum()
         assert sorted_indices.shape[0] == routing_map.sum()
 
         # With drop and pad
-        capacity = get_capacity(
-            num_tokens=self.num_tokens * self.topk,
-            num_experts=self.num_experts,
-            capacity_factor=self.capacity_factor
-        )
+        capacity = get_capacity(num_tokens=self.num_tokens * self.topk,
+                                num_experts=self.num_experts,
+                                capacity_factor=self.capacity_factor)
         num_out_tokens = capacity * self.num_experts
         permuted_tokens, sorted_indices = permute(
             tokens,
             routing_map,
             num_out_tokens=num_out_tokens,
-            drop_and_pad=True
-        )
+            drop_and_pad=True)
         assert permuted_tokens.shape[0] == num_out_tokens
         assert sorted_indices.shape[0] == num_out_tokens
 
-
     def test_unpermute(self, setup):
         # Test token unpermutation
         tokens = torch.randn(self.num_tokens, self.hidden_size)
-        routing_map = torch.randint(0, 2, (self.num_tokens, self.num_experts)).bool()
+        routing_map = torch.randint(
+            0, 2, (self.num_tokens, self.num_experts)).bool()
         probs = torch.rand(self.num_tokens, self.num_experts)
 
         # First permute
         permuted_tokens, sorted_indices = permute(tokens, routing_map)
 
         # Then unpermute
-        restored_tokens = unpermute(
-            permuted_tokens,
-            sorted_indices,
-            tokens.shape,
-            probs=probs,
-            routing_map=routing_map
-        )
+        restored_tokens = unpermute(permuted_tokens,
+                                    sorted_indices,
+                                    tokens.shape,
+                                    probs=probs,
+                                    routing_map=routing_map)
         assert restored_tokens.shape == tokens.shape
 
         # With drop and pad
-        capacity = get_capacity(
-            num_tokens=self.num_tokens * self.topk,
-            num_experts=self.num_experts,
-            capacity_factor=self.capacity_factor
-        )
+        capacity = get_capacity(num_tokens=self.num_tokens * self.topk,
+                                num_experts=self.num_experts,
+                                capacity_factor=self.capacity_factor)
         num_out_tokens = capacity * self.num_experts
         permuted_tokens, sorted_indices = permute(
             tokens,
             routing_map,
             num_out_tokens=num_out_tokens,
-            drop_and_pad=True
-        )
-        restored_tokens = unpermute(
-            permuted_tokens,
-            sorted_indices,
-            tokens.shape,
-            probs=probs,
-            routing_map=routing_map,
-            drop_and_pad=True
-        )
+            drop_and_pad=True)
+        restored_tokens = unpermute(permuted_tokens,
+                                    sorted_indices,
+                                    tokens.shape,
+                                    probs=probs,
+                                    routing_map=routing_map,
+                                    drop_and_pad=True)
         assert restored_tokens.shape == tokens.shape
 
-
     def test_sort_chunks_by_idxs(self, setup):
         # Test chunk sorting
         input_tensor = torch.randn(10, self.hidden_size)
@@ -167,10 +144,10 @@ def test_sort_chunks_by_idxs(self, setup):
         assert output.shape == input_tensor.shape
 
         # Verify the order is correct
-        expected = torch.cat([input_tensor[5:], input_tensor[0: 3], input_tensor[3: 5]])
+        expected = torch.cat(
+            [input_tensor[5:], input_tensor[0:3], input_tensor[3:5]])
         assert torch.allclose(output, expected)
 
-
     @pytest.mark.parametrize("score_function", ["softmax"])
     def test_score_functions(self, setup, score_function):
         # Test different score functions
@@ -181,8 +158,7 @@ def test_score_functions(self, setup, score_function):
             logits,
             topk=self.topk,
             score_function=score_function,
-            expert_bias=expert_bias
-        )
+            expert_bias=expert_bias)
         assert probs.shape == (self.num_tokens, self.num_experts)
         assert routing_map.shape == (self.num_tokens, self.num_experts)
-        assert tokens_per_expert.shape == (self.num_experts,)
+        assert tokens_per_expert.shape == (self.num_experts, )