1
+
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3
+ # Copyright 2023 The vLLM team.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ # This file is a part of the vllm-ascend project.
17
+ #
18
+ """
19
+ Compare the outputs of vLLM with and without aclgraph.
20
+ Run `pytest tests/multicard/test_data_parallel.py`.
21
+ """
22
+
23
+ import os
24
+ import subprocess
25
+ import sys
26
+ from unittest .mock import patch
27
+
28
+ import pytest
29
+
30
+ MODELS = ["vllm-ascend/Qwen3-30B-A3B-Puring" ]
31
+
32
+
33
+ @pytest .mark .parametrize ("model" , MODELS )
34
+ @pytest .mark .parametrize ("max_tokens" , [32 ])
35
+ @patch .dict (os .environ , {"ASCEND_RT_VISIBLE_DEVICES" : "0,1,2,3" , "VLLM_ASCEND_ENABLE_MOE_ALL2ALL_SEQ" : "1" , "VLLM_ASCEND_ENABLE_DBO" : "1" })
36
+ def test_qwen3_moe_inference (model , max_tokens ):
37
+ script = "examples/offline_data_parallel.py"
38
+
39
+ env = os .environ .copy ()
40
+
41
+ cmd = [
42
+ sys .executable ,
43
+ script ,
44
+ "--model" ,
45
+ model ,
46
+ "--dp-size" ,
47
+ "2" ,
48
+ "--tp-size" ,
49
+ "2" ,
50
+ "--node-size" ,
51
+ "1" ,
52
+ "--node-rank" ,
53
+ "0" ,
54
+ "--trust-remote-code" ,
55
+ "--enforce-eager" ,
56
+ ]
57
+
58
+ print (f"Running subprocess: { ' ' .join (cmd )} " )
59
+ proc = subprocess .run (cmd ,
60
+ env = env ,
61
+ stdout = subprocess .PIPE ,
62
+ stderr = subprocess .STDOUT ,
63
+ timeout = 600 )
64
+ output = proc .stdout .decode ()
65
+
66
+ print (output )
67
+
68
+ assert "DP rank 0 needs to process" in output
69
+ assert "DP rank 1 needs to process" in output
70
+ assert "Generated text:" in output
71
+ assert proc .returncode == 0
0 commit comments