Skip to content

Commit 892e873

Browse files
committed
add qwen3 model tests
Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent d96da1f commit 892e873

File tree

5 files changed

+98
-5
lines changed

5 files changed

+98
-5
lines changed

.github/workflows/nightly_benchmarks.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ jobs:
6565
options: >-
6666
--device /dev/davinci0
6767
--device /dev/davinci1
68+
--device /dev/davinci2
69+
--device /dev/davinci3
6870
--device /dev/davinci_manager
6971
--device /dev/devmm_svm
7072
--device /dev/hisi_hdc

benchmarks/scripts/patch_benchmark_dataset.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
88

9+
VLLM_EDITABLE_PATH = "/_w/vllm-ascend/vllm-ascend/vllm-empty/vllm/benchmarks/datasets.py"
10+
911

1012
# TDOO(Potabk): Remove this patch when the issue is fixed in the upstream
1113
class StreamingFalseTransformer(cst.CSTTransformer):
@@ -68,10 +70,9 @@ def patch_file(path):
6870
description=
6971
"Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
7072
)
71-
parser.add_argument(
72-
"--path",
73-
type=str,
74-
default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
75-
help="Path to the benchmark_dataset.py file")
73+
parser.add_argument("--path",
74+
type=str,
75+
default=VLLM_EDITABLE_PATH,
76+
help="Path to the benchmark_dataset.py file")
7677
args = parser.parse_args()
7778
patch_file(args.path)

benchmarks/tests/latency-tests.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,25 @@
1919
"num_iters_warmup": 5,
2020
"num_iters": 15
2121
}
22+
},
23+
{
24+
"test_name": "latency_qwen3_30B_A3B_tp4",
25+
"parameters": {
26+
"model": "Qwen/Qwen3-30B-A3B",
27+
"tensor_parallel_size": 4,
28+
"load_format": "dummy",
29+
"num_iters_warmup": 5,
30+
"num_iters": 15
31+
}
32+
},
33+
{
34+
"test_name": "latency_qwen3_32B_tp4",
35+
"parameters": {
36+
"model": "Qwen/Qwen3-32B",
37+
"tensor_parallel_size": 4,
38+
"load_format": "dummy",
39+
"num_iters_warmup": 5,
40+
"num_iters": 15
41+
}
2242
}
2343
]

benchmarks/tests/serving-tests.json

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,5 +73,53 @@
7373
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
7474
"num_prompts": 200
7575
}
76+
},
77+
{
78+
"test_name": "serving_qwen3_30B_A3B_tp1",
79+
"qps_list": [
80+
1,
81+
4,
82+
16,
83+
"inf"
84+
],
85+
"server_parameters": {
86+
"model": "Qwen/Qwen3-30B-A3B",
87+
"tensor_parallel_size": 4,
88+
"swap_space": 16,
89+
"disable_log_stats": "",
90+
"disable_log_requests": "",
91+
"load_format": "dummy"
92+
},
93+
"client_parameters": {
94+
"model": "Qwen/Qwen3-30B-A3B",
95+
"endpoint_type": "vllm",
96+
"dataset_name": "sharegpt",
97+
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
98+
"num_prompts": 200
99+
}
100+
},
101+
{
102+
"test_name": "serving_qwen3_32B_tp1",
103+
"qps_list": [
104+
1,
105+
4,
106+
16,
107+
"inf"
108+
],
109+
"server_parameters": {
110+
"model": "Qwen/Qwen3-32B",
111+
"tensor_parallel_size": 4,
112+
"swap_space": 16,
113+
"disable_log_stats": "",
114+
"disable_log_requests": "",
115+
"load_format": "dummy"
116+
},
117+
"client_parameters": {
118+
"model": "Qwen/Qwen3-32B",
119+
"endpoint_type": "vllm",
120+
"dataset_name": "sharegpt",
121+
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
122+
"num_prompts": 200
123+
}
76124
}
77125
]

benchmarks/tests/throughput-tests.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,28 @@
3333
"num_prompts": 200,
3434
"backend": "vllm"
3535
}
36+
},
37+
{
38+
"test_name": "throughput_qwen3_30B_A3B_tp1",
39+
"parameters": {
40+
"model": "Qwen/Qwen3-30B-A3B",
41+
"tensor_parallel_size": 4,
42+
"load_format": "dummy",
43+
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
44+
"num_prompts": 200,
45+
"backend": "vllm"
46+
}
47+
},
48+
{
49+
"test_name": "throughput_qwen3_32B_tp1",
50+
"parameters": {
51+
"model": "Qwen/Qwen3-32B",
52+
"tensor_parallel_size": 4,
53+
"load_format": "dummy",
54+
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
55+
"num_prompts": 200,
56+
"backend": "vllm"
57+
}
3658
}
3759
]
3860

0 commit comments

Comments
 (0)