20
20
A comma-separated list of models to benchmark, leave empty to run everything
21
21
required : false
22
22
type : string
23
+ gpus :
24
+ description : |
25
+ A comma-separated list of GPUs to benchmark, i.e. h100, mi300
26
+ required : true
27
+ type : string
28
+ default : h100,mi300
23
29
pull_request :
24
30
paths :
25
31
- .github/workflows/vllm-benchmark.yml
@@ -47,13 +53,15 @@ jobs:
47
53
shell : bash
48
54
env :
49
55
MODELS : ${{ inputs.models || '' }}
56
+ GPUS : ${{ inputs.gpus || '' }}
50
57
run : |
51
58
set -eux
52
59
53
60
# The generated matrix is grouped by model and runner
54
61
python .github/scripts/generate_vllm_benchmark_matrix.py \
55
62
--benchmark-configs-dir vllm-benchmarks/benchmarks \
56
- --models "${MODELS}"
63
+ --models "${MODELS}" \
64
+ --gpus "${GPUS}"
57
65
58
66
benchmarks :
59
67
name : Run vLLM benchmarks
63
71
fail-fast : false
64
72
runs-on : ${{ matrix.runner }}
65
73
environment : pytorch-x-vllm
74
+ permissions :
75
+ id-token : write
76
+ contents : read
66
77
steps :
67
78
- name : Checkout repository
68
79
uses : actions/checkout@v4
@@ -80,23 +91,63 @@ jobs:
80
91
python-version : ' 3.12'
81
92
cache : ' pip'
82
93
83
- - name : Set GPU device name
94
+ - name : Check if the device is supported
95
+ shell : bash
96
+ run : |
97
+ set -eux
98
+
99
+ if command -v nvidia-smi; then
100
+ DEVICE_NAME=cuda
101
+ nvidia-smi
102
+ elif command -v rocm-smi; then
103
+ DEVICE_NAME=rocm
104
+ rocm-smi
105
+ else
106
+ echo "Only CUDA and ROCm benchmarks are supported at the moment"
107
+ exit 1
108
+ fi
109
+ echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
110
+
111
+ - name : Set GPU name and type
84
112
working-directory : vllm-benchmarks
113
+ shell : bash
85
114
run : |
86
- export GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
87
- echo "GPU_DEVICE=$GPU_DEVICE" >> $GITHUB_ENV
115
+ set -eux
116
+
117
+ if [[ "${DEVICE_NAME}" == "cuda" ]]; then
118
+ DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
119
+ elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
120
+ DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
121
+ fi
122
+ echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
88
123
89
124
- name : Install dependencies
125
+ shell : bash
90
126
run : |
91
127
set -eux
92
- pip install -r .github/scripts/requirements.txt
128
+
129
+ if [[ "${DEVICE_NAME}" == "cuda" ]]; then
130
+ pip install -r .github/scripts/requirements.txt
131
+ elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
132
+ pip install -r .github/scripts/requirements.txt \
133
+ --extra-index-url https://download.pytorch.org/whl/rocm6.3
134
+ fi
135
+
136
+ - name : Set Docker registry
137
+ shell : bash
138
+ run : |
139
+ if [[ "${DEVICE_NAME}" == "cuda" ]]; then
140
+ DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
141
+ elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
142
+ DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
143
+ fi
144
+ echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
93
145
94
146
- name : Check for last benchmark commit
95
147
working-directory : vllm-benchmarks
96
148
env :
97
149
HEAD_BRANCH : ${{ inputs.vllm_branch || 'main' }}
98
150
HEAD_SHA : ${{ inputs.vllm_commit || '' }}
99
- DOCKER_IMAGE_PREFIX : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
100
151
MODELS : ${{ matrix.models }}
101
152
run : |
102
153
set -eux
@@ -117,7 +168,7 @@ jobs:
117
168
fi
118
169
119
170
NOT_EXIST=0
120
- S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE }/benchmark_results_${MODELS//\//_}.json"
171
+ S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_ }/benchmark_results_${MODELS//\//_}.json"
121
172
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
122
173
123
174
if [[ ${NOT_EXIST} == "1" ]]; then
@@ -130,10 +181,15 @@ jobs:
130
181
131
182
echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
132
183
133
- - name : Setup GPU_FLAG for docker run
184
+ - name : Setup CUDA GPU_FLAG for docker run
185
+ if : env.DEVICE_NAME == 'cuda'
134
186
run : |
135
187
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
136
188
189
+ - name : Setup ROCm
190
+ if : env.DEVICE_NAME == 'rocm'
191
+ uses : pytorch/pytorch/./.github/actions/setup-rocm@main
192
+
137
193
- name : Setup SCCACHE_SERVER_PORT environment for docker run when on container
138
194
run : |
139
195
echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
@@ -165,7 +221,7 @@ jobs:
165
221
SCCACHE_BUCKET : ossci-compiler-cache-circleci-v2
166
222
SCCACHE_REGION : us-east-1
167
223
HF_TOKEN : ${{ secrets.HF_TOKEN }}
168
- DOCKER_IMAGE : public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo :${{ env.HEAD_SHA }}
224
+ DOCKER_IMAGE : ${{ env.DOCKER_IMAGE_PREFIX }} :${{ env.HEAD_SHA }}
169
225
# vLLM-related environment variables
170
226
ENGINE_VERSION : v1
171
227
SAVE_TO_PYTORCH_BENCHMARK_FORMAT : 1
@@ -177,7 +233,8 @@ jobs:
177
233
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
178
234
-e SCCACHE_BUCKET \
179
235
-e SCCACHE_REGION \
180
- -e GPU_DEVICE \
236
+ -e DEVICE_NAME \
237
+ -e DEVICE_TYPE \
181
238
-e HF_TOKEN \
182
239
-e ENGINE_VERSION \
183
240
-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
@@ -189,6 +246,16 @@ jobs:
189
246
"${DOCKER_IMAGE}" \
190
247
bash -xc "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"
191
248
249
+ - name : Authenticate with AWS
250
+ # AWS CUDA runners already have access to the bucket via its runner IAM role
251
+ if : env.DEVICE_NAME != 'cuda'
252
+ uses : aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
253
+ with :
254
+ role-to-assume : arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
255
+ # The max duration enforced by the server side
256
+ role-duration-seconds : 18000
257
+ aws-region : us-east-1
258
+
192
259
- name : Upload the benchmark results
193
260
env :
194
261
BENCHMARK_RESULTS : vllm-benchmarks/vllm/benchmarks/results
@@ -203,5 +270,5 @@ jobs:
203
270
--repo vllm-benchmarks/vllm \
204
271
--benchmark-name "vLLM benchmark" \
205
272
--benchmark-results "${BENCHMARK_RESULTS}" \
206
- --device "${GPU_DEVICE }" \
273
+ --device "${DEVICE_TYPE// /_ }" \
207
274
--model "${MODELS//\//_}"
0 commit comments