Skip to content

Commit 319ad22

Browse files
authored
Add ROCm benchmarks (#41)
* Add ROCm benchmarks Signed-off-by: Huy Do <huydhn@gmail.com> * Fix typo Signed-off-by: Huy Do <huydhn@gmail.com> * Add missing argument Signed-off-by: Huy Do <huydhn@gmail.com> * More tweaks Signed-off-by: Huy Do <huydhn@gmail.com> * Use rocm-smi Signed-off-by: Huy Do <huydhn@gmail.com> * Authenticate with AWS on ROCm Signed-off-by: Huy Do <huydhn@gmail.com> * Ready for review Signed-off-by: Huy Do <huydhn@gmail.com> * Beautifier Signed-off-by: Huy Do <huydhn@gmail.com> * Add id-token Signed-off-by: Huy Do <huydhn@gmail.com> * Use DEVICE_NAME and DEVICE_TYPE consistently Signed-off-by: Huy Do <huydhn@gmail.com> --------- Signed-off-by: Huy Do <huydhn@gmail.com>
1 parent 9bfb423 commit 319ad22

File tree

2 files changed

+109
-21
lines changed

2 files changed

+109
-21
lines changed

.github/scripts/generate_vllm_benchmark_matrix.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,26 @@
1010

1111

1212
logging.basicConfig(level=logging.INFO)
13-
1413
# Those are H100 runners from https://github.com/pytorch-labs/pytorch-gha-infra/blob/main/multi-tenant/inventory/manual_inventory
14+
# while ROCm runner are provided by AMD
1515
RUNNERS_MAPPING = {
1616
1: [
1717
"linux.aws.h100",
18+
"linux.rocm.gpu.mi300.2", # No single ROCm GPU?
1819
],
1920
# NB: There is no 2xH100 runner at the momement, so let's use the next one
2021
# in the list here which is 4xH100
2122
2: [
2223
"linux.aws.h100.4",
24+
"linux.rocm.gpu.mi300.2",
2325
],
2426
4: [
2527
"linux.aws.h100.4",
28+
"linux.rocm.gpu.mi300.4",
2629
],
2730
8: [
2831
"linux.aws.h100.8",
32+
"linux.rocm.gpu.mi300.8",
2933
],
3034
}
3135

@@ -71,6 +75,12 @@ def parse_args() -> Any:
7175
default="",
7276
help="the comma-separated list of models to benchmark",
7377
)
78+
parser.add_argument(
79+
"--gpus",
80+
type=str,
81+
default="",
82+
help="the comma-separated list of GPUs to benchmark",
83+
)
7484

7585
return parser.parse_args()
7686

@@ -97,13 +107,15 @@ def set_output(name: str, val: Any) -> None:
97107

98108

99109
def generate_benchmark_matrix(
100-
benchmark_configs_dir: str, models: List[str]
110+
benchmark_configs_dir: str, models: List[str], gpus: List[str]
101111
) -> Dict[str, Any]:
102112
"""
103113
Parse all the JSON files in vLLM benchmark configs directory to get the
104114
model name and tensor parallel size (aka number of GPUs)
105115
"""
106116
get_all_models = True if not models else False
117+
use_all_gpus = True if not gpus else False
118+
107119
benchmark_matrix: Dict[str, Any] = {
108120
"include": [],
109121
}
@@ -141,24 +153,33 @@ def generate_benchmark_matrix(
141153
assert tp in RUNNERS_MAPPING
142154

143155
for runner in RUNNERS_MAPPING[tp]:
144-
benchmark_matrix["include"].append(
145-
{
146-
"runner": runner,
147-
# I opt to return a comma-separated list of models here
148-
# so that we could run multiple models on the same runner
149-
"models": model,
150-
}
151-
)
156+
found_runner = False
157+
for gpu in gpus:
158+
if gpu.lower() in runner:
159+
found_runner = True
160+
break
161+
162+
if found_runner or use_all_gpus:
163+
benchmark_matrix["include"].append(
164+
{
165+
"runner": runner,
166+
# I opt to return a comma-separated list of models here
167+
# so that we could run multiple models on the same runner
168+
"models": model,
169+
}
170+
)
152171

153172
return benchmark_matrix
154173

155174

156175
def main() -> None:
157176
args = parse_args()
158177
models = [m.strip().lower() for m in args.models.split(",") if m.strip()]
178+
gpus = [m.strip().lower() for m in args.gpus.split(",") if m.strip()]
159179
benchmark_matrix = generate_benchmark_matrix(
160180
args.benchmark_configs_dir,
161181
models,
182+
gpus,
162183
)
163184
set_output("benchmark_matrix", benchmark_matrix)
164185

.github/workflows/vllm-benchmark.yml

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ on:
2020
A comma-separated list of models to benchmark, leave empty to run everything
2121
required: false
2222
type: string
23+
gpus:
24+
description: |
25+
A comma-separated list of GPUs to benchmark, i.e. h100, mi300
26+
required: true
27+
type: string
28+
default: h100,mi300
2329
pull_request:
2430
paths:
2531
- .github/workflows/vllm-benchmark.yml
@@ -47,13 +53,15 @@ jobs:
4753
shell: bash
4854
env:
4955
MODELS: ${{ inputs.models || '' }}
56+
GPUS: ${{ inputs.gpus || '' }}
5057
run: |
5158
set -eux
5259
5360
# The generated matrix is grouped by model and runner
5461
python .github/scripts/generate_vllm_benchmark_matrix.py \
5562
--benchmark-configs-dir vllm-benchmarks/benchmarks \
56-
--models "${MODELS}"
63+
--models "${MODELS}" \
64+
--gpus "${GPUS}"
5765
5866
benchmarks:
5967
name: Run vLLM benchmarks
@@ -63,6 +71,9 @@ jobs:
6371
fail-fast: false
6472
runs-on: ${{ matrix.runner }}
6573
environment: pytorch-x-vllm
74+
permissions:
75+
id-token: write
76+
contents: read
6677
steps:
6778
- name: Checkout repository
6879
uses: actions/checkout@v4
@@ -80,23 +91,63 @@ jobs:
8091
python-version: '3.12'
8192
cache: 'pip'
8293

83-
- name: Set GPU device name
94+
- name: Check if the device is supported
95+
shell: bash
96+
run: |
97+
set -eux
98+
99+
if command -v nvidia-smi; then
100+
DEVICE_NAME=cuda
101+
nvidia-smi
102+
elif command -v rocm-smi; then
103+
DEVICE_NAME=rocm
104+
rocm-smi
105+
else
106+
echo "Only CUDA and ROCm benchmarks are supported at the moment"
107+
exit 1
108+
fi
109+
echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
110+
111+
- name: Set GPU name and type
84112
working-directory: vllm-benchmarks
113+
shell: bash
85114
run: |
86-
export GPU_DEVICE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
87-
echo "GPU_DEVICE=$GPU_DEVICE" >> $GITHUB_ENV
115+
set -eux
116+
117+
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
118+
DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
119+
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
120+
DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
121+
fi
122+
echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
88123
89124
- name: Install dependencies
125+
shell: bash
90126
run: |
91127
set -eux
92-
pip install -r .github/scripts/requirements.txt
128+
129+
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
130+
pip install -r .github/scripts/requirements.txt
131+
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
132+
pip install -r .github/scripts/requirements.txt \
133+
--extra-index-url https://download.pytorch.org/whl/rocm6.3
134+
fi
135+
136+
- name: Set Docker registry
137+
shell: bash
138+
run: |
139+
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
140+
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
141+
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
142+
DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
143+
fi
144+
echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
93145
94146
- name: Check for last benchmark commit
95147
working-directory: vllm-benchmarks
96148
env:
97149
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
98150
HEAD_SHA: ${{ inputs.vllm_commit || '' }}
99-
DOCKER_IMAGE_PREFIX: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
100151
MODELS: ${{ matrix.models }}
101152
run: |
102153
set -eux
@@ -117,7 +168,7 @@ jobs:
117168
fi
118169
119170
NOT_EXIST=0
120-
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${GPU_DEVICE}/benchmark_results_${MODELS//\//_}.json"
171+
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
121172
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1
122173
123174
if [[ ${NOT_EXIST} == "1" ]]; then
@@ -130,10 +181,15 @@ jobs:
130181
131182
echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
132183
133-
- name: Setup GPU_FLAG for docker run
184+
- name: Setup CUDA GPU_FLAG for docker run
185+
if: env.DEVICE_NAME == 'cuda'
134186
run: |
135187
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
136188
189+
- name: Setup ROCm
190+
if: env.DEVICE_NAME == 'rocm'
191+
uses: pytorch/pytorch/./.github/actions/setup-rocm@main
192+
137193
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
138194
run: |
139195
echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
@@ -165,7 +221,7 @@ jobs:
165221
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
166222
SCCACHE_REGION: us-east-1
167223
HF_TOKEN: ${{ secrets.HF_TOKEN }}
168-
DOCKER_IMAGE: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:${{ env.HEAD_SHA }}
224+
DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}
169225
# vLLM-related environment variables
170226
ENGINE_VERSION: v1
171227
SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1
@@ -177,7 +233,8 @@ jobs:
177233
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
178234
-e SCCACHE_BUCKET \
179235
-e SCCACHE_REGION \
180-
-e GPU_DEVICE \
236+
-e DEVICE_NAME \
237+
-e DEVICE_TYPE \
181238
-e HF_TOKEN \
182239
-e ENGINE_VERSION \
183240
-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
@@ -189,6 +246,16 @@ jobs:
189246
"${DOCKER_IMAGE}" \
190247
bash -xc "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"
191248
249+
- name: Authenticate with AWS
250+
# AWS CUDA runners already have access to the bucket via its runner IAM role
251+
if: env.DEVICE_NAME != 'cuda'
252+
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
253+
with:
254+
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
255+
# The max duration enforced by the server side
256+
role-duration-seconds: 18000
257+
aws-region: us-east-1
258+
192259
- name: Upload the benchmark results
193260
env:
194261
BENCHMARK_RESULTS: vllm-benchmarks/vllm/benchmarks/results
@@ -203,5 +270,5 @@ jobs:
203270
--repo vllm-benchmarks/vllm \
204271
--benchmark-name "vLLM benchmark" \
205272
--benchmark-results "${BENCHMARK_RESULTS}" \
206-
--device "${GPU_DEVICE}" \
273+
--device "${DEVICE_TYPE// /_}" \
207274
--model "${MODELS//\//_}"

0 commit comments

Comments
 (0)