Skip to content

Commit 0de5ef8

Browse files
Potabkwangxiaoxin (A)
authored andcommitted
[CI][Benchmark] Add new model and v1 test to perf benchmarks (#1099)
### What this PR does / why we need it? - Add qwen2.5-7b-instruct test - Add v1 test --------- Signed-off-by: wangli <wangli858794774@gmail.com> Signed-off-by: wangxiaoxin (A) <wangxiaoxin7@huawei.com>
1 parent 6794647 commit 0de5ef8

File tree

4 files changed

+62
-11
lines changed

4 files changed

+62
-11
lines changed

.github/workflows/nightly_benchmarks.yaml

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,18 @@ jobs:
4141
test:
4242
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
4343

44-
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}
44+
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
4545
runs-on: 'linux-arm64-npu-static-8'
4646
strategy:
4747
matrix:
4848
include:
4949
- vllm_branch: v0.9.1
5050
vllm_ascend_branch: main
51+
vllm_use_v1: 0
52+
- vllm_branch: v0.9.0
53+
vllm_ascend_branch: main
54+
vllm_use_v1: 1
55+
max-parallel: 1
5156
container:
5257
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
5358
volumes:
@@ -67,6 +72,7 @@ jobs:
6772
HF_TOKEN: ${{ secrets.HF_TOKEN }}
6873
ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
6974
ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
75+
VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
7076
steps:
7177
- name: Check npu and CANN info
7278
run: |
@@ -136,7 +142,7 @@ jobs:
136142
- name: Install elastic_tool
137143
if: github.event_name != 'pull_request'
138144
run: |
139-
pip install escli-tool==0.2.1
145+
pip install escli-tool==0.2.2
140146
141147
- name: Collect pr info from vllm-project/vllm-ascend
142148
if: github.event_name != 'pull_request'
@@ -173,17 +179,17 @@ jobs:
173179
echo "vllm branch: ${{ matrix.vllm_branch }}"
174180
echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
175181
echo "------------------------"
182+
176183
cd /github/home
177184
bash benchmarks/scripts/run-performance-benchmarks.sh
178185
# send the result to es
179-
if [[ "${{ github.event_name }}" != "pull request" ]]; then
180-
escli add --vllm_branch ${{ matrix.vllm_branch }} \
181-
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
182-
--commit_id $commit_id \
183-
--commit_title "$commit_title" \
184-
--created_at "$commit_time_no_tz" \
185-
--res_dir ./benchmarks/results
186-
rm -rf ./benchmarks/results
187-
fi
186+
escli add --vllm_branch ${{ matrix.vllm_branch }} \
187+
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
188+
--commit_id $commit_id \
189+
--commit_title "$commit_title" \
190+
--created_at "$commit_time_no_tz" \
191+
--res_dir ./benchmarks/results \
192+
--extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
193+
rm -rf ./benchmarks/results
188194
cd -
189195
done < commit_log.txt

benchmarks/tests/latency-tests.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,15 @@
99
"num_iters_warmup": 5,
1010
"num_iters": 15
1111
}
12+
},
13+
{
14+
"test_name": "latency_qwen2_5_7B_tp1",
15+
"parameters": {
16+
"model": "Qwen/Qwen2.5-7B-Instruct",
17+
"tensor_parallel_size": 1,
18+
"load_format": "dummy",
19+
"num_iters_warmup": 5,
20+
"num_iters": 15
21+
}
1222
}
1323
]

benchmarks/tests/serving-tests.json

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,29 @@
4949
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
5050
"num_prompts": 200
5151
}
52+
},
53+
{
54+
"test_name": "serving_qwen2_5_7B_tp1",
55+
"qps_list": [
56+
1,
57+
4,
58+
16,
59+
"inf"
60+
],
61+
"server_parameters": {
62+
"model": "Qwen/Qwen2.5-7B-Instruct",
63+
"tensor_parallel_size": 1,
64+
"swap_space": 16,
65+
"disable_log_stats": "",
66+
"disable_log_requests": "",
67+
"load_format": "dummy"
68+
},
69+
"client_parameters": {
70+
"model": "Qwen/Qwen2.5-7B-Instruct",
71+
"backend": "vllm",
72+
"dataset_name": "sharegpt",
73+
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
74+
"num_prompts": 200
75+
}
5276
}
5377
]

benchmarks/tests/throughput-tests.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@
2222
"dataset_path": "lmarena-ai/vision-arena-bench-v0.1",
2323
"num_prompts": 200
2424
}
25+
},
26+
{
27+
"test_name": "throughput_qwen2_5_7B_tp1",
28+
"parameters": {
29+
"model": "Qwen/Qwen2.5-7B-Instruct",
30+
"tensor_parallel_size": 1,
31+
"load_format": "dummy",
32+
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
33+
"num_prompts": 200,
34+
"backend": "vllm"
35+
}
2536
}
2637
]
2738

0 commit comments

Comments
 (0)