Skip to content

Commit affca6f

Browse files
[Test] Add accuracy test report workflow (#542)
### What this PR does / why we need it? 1. Provide accuracy test report for development branch release. 2. Models and datasets for accuracy test: | Model | datasets | |---------------------------- | --------------------------- | | Qwen2.5-7B-Instruct | ceval-val, gsm8k, mmlu | | Qwen3-8B | ceval-val, gsm8k, mmlu | | Llama-3.1-8B-Instruct | ceval-val, gsm8k, mmlu | | Qwen2.5-VL-7B-Instruct | mmmu_val | ### Does this PR introduce _any_ user-facing change? This PR will display the accuracy test report of the release versionin docs/source/developer_guide/accuracy_report。 Qwen2.5-7B-Instruct.md Qwen3-8B.md Llama-3.1-8B-Instruct.md Qwen2.5-VL-7B-Instruct .md Signed-off-by: hfadzxy <starmoon_zhang@163.com>
1 parent ba9714c commit affca6f

File tree

6 files changed

+587
-2
lines changed

6 files changed

+587
-2
lines changed

.github/actionlint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ self-hosted-runner:
22
# Labels of self-hosted runner in array of strings.
33
labels:
44
- linux-arm64-npu-1
5+
- linux-arm64-npu-2
56
- linux-arm64-npu-4
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: Accuracy Report
19+
on:
20+
workflow_dispatch:
21+
inputs:
22+
branch:
23+
description: 'choose a dev branch to pr'
24+
required: true
25+
vllm-ascend-version:
26+
description: 'what vllm-ascend version to accuracy test?'
27+
required: true
28+
type: string
29+
jobs:
30+
download:
31+
runs-on: ubuntu-latest
32+
steps:
33+
- name: Checkout repository
34+
uses: actions/checkout@v4
35+
with:
36+
ref: ${{ github.event.inputs.branch }}
37+
38+
- name: Debug List Artifacts
39+
run: gh api /repos/${{ github.repository }}/actions/artifacts
40+
env:
41+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
42+
43+
- name: Query artifact run id for Qwen2.5-VL-7B-Instruct V0 latest artifact
44+
id: get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0
45+
run: |
46+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
47+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
48+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-VL-7B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
49+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
50+
env:
51+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
52+
53+
- name: Query artifact run id for Qwen2.5-7B-Instruct V0 latest artifact
54+
id: get_Qwen2_5_7B_Instruct_latest_run_id_V0
55+
run: |
56+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
57+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
58+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-7B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
59+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
60+
env:
61+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62+
63+
- name: Query artifact run id for Llama-3.1-8B-Instruct V0 latest artifact
64+
id: get_Llama_3_1_8B_Instruct_latest_run_id_V0
65+
run: |
66+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
67+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
68+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
69+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
70+
env:
71+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
72+
73+
- name: Query artifact run id for Qwen3-8B V0 latest artifact
74+
id: get_Qwen3_8B_latest_run_id_V0
75+
run: |
76+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
77+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
78+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
79+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
80+
env:
81+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
82+
83+
- name: Download Qwen/Qwen2.5-VL-7B-Instruct V0 Artifact
84+
uses: actions/download-artifact@v4
85+
with:
86+
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-VL-7B-Instruct-V0-report
87+
path: ./docs/source/developer_guide/evaluation/accuracy_report
88+
github-token: ${{ secrets.GITHUB_TOKEN }}
89+
repository: vllm-project/vllm-ascend
90+
run-id: ${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
91+
92+
- name: Download Qwen/Qwen2.5-7B-Instruct Artifact
93+
uses: actions/download-artifact@v4
94+
with:
95+
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-7B-Instruct-V0-report
96+
path: ./docs/source/developer_guide/evaluation/accuracy_report
97+
github-token: ${{ secrets.GITHUB_TOKEN }}
98+
repository: vllm-project/vllm-ascend
99+
run-id: ${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
100+
101+
- name: Download meta-llama/Llama-3.1-8B-Instruct Artifact
102+
uses: actions/download-artifact@v4
103+
with:
104+
name: ${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report
105+
path: ./docs/source/developer_guide/evaluation/accuracy_report
106+
github-token: ${{ secrets.GITHUB_TOKEN }}
107+
repository: vllm-project/vllm-ascend
108+
run-id: ${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
109+
110+
- name: Download Qwen/Qwen3-8B Artifact
111+
uses: actions/download-artifact@v4
112+
with:
113+
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-V0-report
114+
path: ./docs/source/developer_guide/evaluation/accuracy_report
115+
github-token: ${{ secrets.GITHUB_TOKEN }}
116+
repository: vllm-project/vllm-ascend
117+
run-id: ${{ steps.get_Qwen3_8B_latest_run_id_V0.outputs.runid }}
118+
119+
- name: Display Files
120+
working-directory: ./docs/source/developer_guide/evaluation/accuracy_report
121+
run: |
122+
cat ./Qwen2.5-VL-7B-Instruct.md
123+
cat ./Llama-3.1-8B-Instruct.md
124+
cat ./Qwen2.5-7B-Instruct.md
125+
cat ./Qwen3-8B.md
126+
127+
- name: Create Pull Request for markdown update
128+
uses: peter-evans/create-pull-request@v7
129+
with:
130+
token: ${{ secrets.PR_TOKEN }}
131+
base: ${{ github.ref_name }}
132+
branch: auto-pr/accuracy-test
133+
commit-message: "Update accuracy report for ${{ github.event.inputs.branch }}"
134+
add-paths: ./docs/source/developer_guide/evaluation/accuracy_report/*.md
135+
title: "[Doc]Update accuracy report for ${{ github.event.inputs.branch }}"
136+
body: |
137+
The accuracy results running on Ascend NPU have changed, I'm updating the report.
138+
Please review the changes.
139+
140+
- [Workflow run][1]
141+
- [Qwen2.5-7B-Instruct accuracy report][2]
142+
- [Llama-3.1-8B-Instruct accuracy report][3]
143+
- [Qwen2.5-VL-7B-Instruct accuracy report][4]
144+
- [Qwen3-8B accuracy report][5]
145+
146+
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
147+
[2]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
148+
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
149+
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
150+
[5]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_latest_run_id_V0.outputs.runid }}

.github/workflows/accuracy_test.yaml

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: Accuracy Tests
19+
20+
on:
21+
workflow_dispatch:
22+
inputs:
23+
vllm-version:
24+
description: 'what vllm version to accuracy test?'
25+
required: true
26+
type: string
27+
vllm-ascend-version:
28+
description: 'what vllm-ascend version to accuracy test?'
29+
required: true
30+
type: string
31+
models:
32+
description: 'choose model(all/Qwen2.5-7B-Instruct/Llama-3.1-8B-Instruct/Qwen2.5-VL-7B-Instruct/Qwen3-8B)'
33+
required: true
34+
type: choice
35+
options:
36+
- all
37+
- Qwen/Qwen2.5-7B-Instruct
38+
- meta-llama/Llama-3.1-8B-Instruct
39+
- Qwen/Qwen2.5-VL-7B-Instruct
40+
- Qwen/Qwen3-8B
41+
default: 'all'
42+
43+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
44+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
45+
# It's used to activate ascend-toolkit environment variables.
46+
defaults:
47+
run:
48+
shell: bash -el {0}
49+
50+
jobs:
51+
model_tests:
52+
name: Model Test - ${{ matrix.model_name }}
53+
runs-on: 'linux-arm64-npu-2'
54+
strategy:
55+
matrix:
56+
include: ${{ fromJSON(
57+
(github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B","output_file":"Qwen3-8B"}]') ||
58+
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"}]') ||
59+
(github.event.inputs.models == 'meta-llama/Llama-3.1-8B-Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"}]') ||
60+
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}]') ||
61+
(github.event.inputs.models == 'Qwen/Qwen3-8B' && '[{"model_name":"Qwen/Qwen3-8B","output_file":"Qwen3-8B"}]')
62+
) }}
63+
fail-fast: false
64+
65+
container:
66+
image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
67+
env:
68+
HF_ENDPOINT: https://hf-mirror.com
69+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
70+
DATASET_SOURCE: ModelScope
71+
72+
steps:
73+
- name: Checkout repository
74+
uses: actions/checkout@v4
75+
76+
- name: Check npu and CANN info
77+
run: |
78+
npu-smi info
79+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
80+
81+
- name: Config mirrors
82+
run: |
83+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
84+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
85+
apt-get update -y
86+
apt install git -y
87+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
88+
89+
- name: Install system dependencies
90+
run: |
91+
apt-get -y install `cat packages.txt`
92+
apt-get -y install gcc g++ cmake libnuma-dev
93+
94+
95+
- name: Install system dependencies
96+
run: |
97+
apt-get -y install `cat packages.txt`
98+
apt-get -y install gcc g++ cmake libnuma-dev
99+
100+
- name: Checkout vllm-project/vllm repo
101+
uses: actions/checkout@v4
102+
with:
103+
repository: vllm-project/vllm
104+
path: ./vllm-empty
105+
ref: ${{ github.event.inputs.vllm-version }}
106+
107+
- name: Install vllm-project/vllm from source
108+
working-directory: ./vllm-empty
109+
run: VLLM_TARGET_DEVICE=empty pip install -e .
110+
111+
112+
- name: Checkout vllm-project/vllm-ascend repo
113+
uses: actions/checkout@v4
114+
with:
115+
repository: vllm-project/vllm-ascend
116+
path: ./vllm-ascend
117+
ref: ${{ github.event.inputs.vllm-ascend-version }}
118+
fetch-depth: 0
119+
120+
- name: Install pta
121+
run: |
122+
if [ ! -d /root/.cache/pta ]; then
123+
mkdir -p /root/.cache/pta
124+
fi
125+
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
126+
cd /root/.cache/pta
127+
rm -rf pytorch_v2.5.1_py310*
128+
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
129+
tar -zxvf pytorch_v2.5.1_py310.tar.gz
130+
fi
131+
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
132+
133+
- name: Install vllm-project/vllm-ascend
134+
working-directory: ./vllm-ascend
135+
run: |
136+
pip install -r requirements-dev.txt
137+
pip install -e .
138+
139+
- name: Checkout EleutherAI/lm-evaluation-harness repo
140+
uses: actions/checkout@v4
141+
with:
142+
repository: EleutherAI/lm-evaluation-harness
143+
path: ./lm-eval
144+
fetch-depth: 0
145+
146+
- name: Install EleutherAI/lm-evaluation-harness
147+
working-directory: ./lm-eval
148+
run: |
149+
pip install -e .
150+
pip install ray datasets==2.16.0 transformers==4.50.3 huggingface-hub==0.29.3
151+
152+
- name: Collect version info
153+
run: |
154+
for dir in /usr/local/Ascend/ascend-toolkit/*; do
155+
dname=$(basename "$dir")
156+
if [ "$dname" != "latest" ]; then
157+
TOOLKIT_DIR="$dname"
158+
break
159+
fi
160+
done
161+
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
162+
CANN_VERSION=$(grep "version=" "$INFO_FILE" \
163+
| head -n1 \
164+
| cut -d'=' -f2 \
165+
| tr -d '"')
166+
{
167+
echo "CANN_VERSION=$CANN_VERSION"
168+
pip show torch | grep "Version:" | awk '{print "TORCH_VERSION="$2}'
169+
pip show torch_npu | grep "Version:" | awk '{print "TORCH_NPU_VERSION="$2}'
170+
pip show vllm | grep "Version:" | awk '{print "VLLM_VERSION="$2}' | sed 's/+.*//'
171+
} >> "$GITHUB_ENV"
172+
173+
- name: Print versions
174+
run: |
175+
echo "CANN: ${{ env.CANN_VERSION }}"
176+
echo "Torch NPU: ${{ env.TORCH_NPU_VERSION }}"
177+
echo "Torch: ${{ env.TORCH_VERSION }}"
178+
echo "vLLM: ${{ env.VLLM_VERSION }}"
179+
180+
- name: Run Accuracy Test for V0
181+
working-directory: ./benchmarks
182+
env:
183+
VLLM_USE_V1: 0
184+
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
185+
run: |
186+
mkdir -p ./accuracy/V0
187+
python ./scripts/run_accuracy.py \
188+
--model "${{ matrix.model_name }}" \
189+
--output "./accuracy/V0/${{ matrix.output_file }}.md" \
190+
--vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version }}" \
191+
--cann_version "${{ env.CANN_VERSION }}" \
192+
--torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
193+
--torch_version "${{ env.TORCH_VERSION }}" \
194+
--vllm_version "${{ env.VLLM_VERSION }}"
195+
196+
- name: Upload Report for V0
197+
uses: actions/upload-artifact@v4
198+
with:
199+
name: "${{ github.event.inputs.vllm-ascend-version }}-${{ matrix.output_file }}-V0-report"
200+
path: ./benchmarks/accuracy/V0/${{ matrix.output_file }}.md
201+
if-no-files-found: warn
202+
retention-days: 90
203+
overwrite: true

0 commit comments

Comments
 (0)