Skip to content

Commit 046a22c

Browse files
huydhnlouie-tsai
andauthored
Enable CPU benchmark for VLLM perf dashboard (#44)
* first draft to enable CPU benchmark * Update .github/workflows/vllm-benchmark.yml Co-authored-by: Huy Do <huydhn@gmail.com> * fix for ROCm changes * change to use public cpu vllm postmerge registry * target on 4 NUMA node EMR machine * Update vllm-benchmark.yml * Update vllm-benchmark.yml * Fix CPU suffix * Rename benchmark files to include the device name Signed-off-by: Huy Do <huydhn@gmail.com> * Fix model selection for CPU devices Signed-off-by: Huy Do <huydhn@gmail.com> * Update the workflow Signed-off-by: Huy Do <huydhn@gmail.com> * Another try Signed-off-by: Huy Do <huydhn@gmail.com> * Use python3 Signed-off-by: Huy Do <huydhn@gmail.com> * Testing 1 2 3 Signed-off-by: Huy Do <huydhn@gmail.com> * Does this work? Signed-off-by: Huy Do <huydhn@gmail.com> * Debug Signed-off-by: Huy Do <huydhn@gmail.com> * Typo Signed-off-by: Huy Do <huydhn@gmail.com> * Fix Docker usage Signed-off-by: Huy Do <huydhn@gmail.com> * Missing ON_CPU? Signed-off-by: Huy Do <huydhn@gmail.com> * Testing 1 2 3 Signed-off-by: Huy Do <huydhn@gmail.com> * Fix the upload script Signed-off-by: Huy Do <huydhn@gmail.com> * Update .github/workflows/vllm-benchmark.yml Co-authored-by: Louie Tsai <louie.tsai@intel.com> * Typo Signed-off-by: Huy Do <huydhn@gmail.com> * Sanitize the device type Signed-off-by: Huy Do <huydhn@gmail.com> * Wrong variable Signed-off-by: Huy Do <huydhn@gmail.com> * c7i.metal-24xl has only 1 NUMA node Co-authored-by: Louie Tsai <louie.tsai@intel.com> --------- Signed-off-by: Huy Do <huydhn@gmail.com> Co-authored-by: Tsai, Louie <louie.tsai@intel.com>
1 parent 55172b0 commit 046a22c

15 files changed

+1014
-100
lines changed

.github/scripts/generate_vllm_benchmark_matrix.py

Lines changed: 82 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
logging.basicConfig(level=logging.INFO)
1313
# Those are H100 runners from https://github.com/pytorch-labs/pytorch-gha-infra/blob/main/multi-tenant/inventory/manual_inventory
1414
# while ROCm runner are provided by AMD
15-
RUNNERS_MAPPING = {
15+
TP_TO_RUNNER_MAPPING = {
1616
1: [
1717
"linux.aws.h100",
1818
"linux.rocm.gpu.mi300.2", # No single ROCm GPU?
19+
"linux.24xl.spr-metal",
1920
],
2021
# NB: There is no 2xH100 runner at the momement, so let's use the next one
2122
# in the list here which is 4xH100
@@ -26,13 +27,26 @@
2627
4: [
2728
"linux.aws.h100.4",
2829
"linux.rocm.gpu.mi300.4",
30+
# TODO (huydhn): Enable this when Intel's runners are ready
31+
# "intel-cpu-emr",
2932
],
3033
8: [
3134
"linux.aws.h100.8",
3235
"linux.rocm.gpu.mi300.8",
3336
],
3437
}
3538

39+
# This mapping is needed to find out the platform of the runner
40+
RUNNER_TO_PLATFORM_MAPPING = {
41+
"linux.aws.h100": "cuda",
42+
"linux.aws.h100.4": "cuda",
43+
"linux.aws.h100.8": "cuda",
44+
"linux.rocm.gpu.mi300.2": "rocm",
45+
"linux.rocm.gpu.mi300.4": "rocm",
46+
"linux.rocm.gpu.mi300.8": "rocm",
47+
"linux.24xl.spr-metal": "cpu",
48+
}
49+
3650
# All the different names vLLM uses to refer to their benchmark configs
3751
VLLM_BENCHMARK_CONFIGS_PARAMETER = set(
3852
[
@@ -76,10 +90,11 @@ def parse_args() -> Any:
7690
help="the comma-separated list of models to benchmark",
7791
)
7892
parser.add_argument(
79-
"--gpus",
93+
"--runners",
8094
type=str,
8195
default="",
82-
help="the comma-separated list of GPUs to benchmark",
96+
help="the comma-separated list of runners to run the benchmark",
97+
required=True,
8398
)
8499

85100
return parser.parse_args()
@@ -107,60 +122,76 @@ def set_output(name: str, val: Any) -> None:
107122

108123

109124
def generate_benchmark_matrix(
110-
benchmark_configs_dir: str, models: List[str], gpus: List[str]
125+
benchmark_configs_dir: str, models: List[str], runners: List[str]
111126
) -> Dict[str, Any]:
112127
"""
113128
Parse all the JSON files in vLLM benchmark configs directory to get the
114-
model name and tensor parallel size (aka number of GPUs)
129+
model name and tensor parallel size (aka number of GPUs or CPU NUMA nodes)
115130
"""
116-
use_all_gpus = True if not gpus else False
117131
benchmark_matrix: Dict[str, Any] = {
118132
"include": [],
119133
}
120134

121-
selected_models = []
122-
for file in glob.glob(f"{benchmark_configs_dir}/*.json"):
123-
with open(file) as f:
124-
try:
125-
configs = json.load(f)
126-
except json.JSONDecodeError as e:
127-
warning(f"Fail to load {file}: {e}")
128-
continue
129-
130-
for config in configs:
131-
param = list(VLLM_BENCHMARK_CONFIGS_PARAMETER & set(config.keys()))
132-
assert len(param) == 1
133-
134-
benchmark_config = config[param[0]]
135-
if "model" not in benchmark_config:
136-
warning(f"Model name is not set in {benchmark_config}, skipping...")
137-
continue
138-
model = benchmark_config["model"].lower()
139-
140-
# Dedup
141-
if model in selected_models:
142-
continue
143-
# and only choose the selected model:
144-
if models and model not in models:
145-
continue
146-
selected_models.append(model)
147-
148-
if "tensor_parallel_size" in benchmark_config:
149-
tp = benchmark_config["tensor_parallel_size"]
150-
elif "tp" in benchmark_config:
151-
tp = benchmark_config["tp"]
152-
else:
153-
tp = 8
154-
assert tp in RUNNERS_MAPPING
155-
156-
for runner in RUNNERS_MAPPING[tp]:
157-
found_runner = False
158-
for gpu in gpus:
159-
if gpu.lower() in runner:
160-
found_runner = True
161-
break
162-
163-
if found_runner or use_all_gpus:
135+
platforms = set()
136+
if not runners:
137+
use_all_runners = True
138+
platforms = set(v for v in RUNNER_TO_PLATFORM_MAPPING.values())
139+
else:
140+
use_all_runners = False
141+
for k, v in RUNNER_TO_PLATFORM_MAPPING.items():
142+
for r in runners:
143+
if r.lower() in k:
144+
platforms.add(v)
145+
146+
# Gather all possible benchmarks
147+
for platform in sorted(platforms):
148+
selected_models = []
149+
for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"):
150+
with open(file) as f:
151+
try:
152+
configs = json.load(f)
153+
except json.JSONDecodeError as e:
154+
warning(f"Fail to load {file}: {e}")
155+
continue
156+
157+
for config in configs:
158+
param = list(VLLM_BENCHMARK_CONFIGS_PARAMETER & set(config.keys()))
159+
assert len(param) == 1
160+
161+
benchmark_config = config[param[0]]
162+
if "model" not in benchmark_config:
163+
warning(f"Model name is not set in {benchmark_config}, skipping...")
164+
continue
165+
model = benchmark_config["model"].lower()
166+
167+
# Dedup
168+
if model in selected_models:
169+
continue
170+
# and only choose the selected model:
171+
if models and model not in models:
172+
continue
173+
selected_models.append(model)
174+
175+
if "tensor_parallel_size" in benchmark_config:
176+
tp = benchmark_config["tensor_parallel_size"]
177+
elif "tp" in benchmark_config:
178+
tp = benchmark_config["tp"]
179+
else:
180+
tp = 8
181+
assert tp in TP_TO_RUNNER_MAPPING
182+
183+
for runner in TP_TO_RUNNER_MAPPING[tp]:
184+
# Wrong platform
185+
if (
186+
runner not in RUNNER_TO_PLATFORM_MAPPING
187+
or RUNNER_TO_PLATFORM_MAPPING[runner] != platform
188+
):
189+
continue
190+
191+
found_runner = any([r and r.lower() in runner for r in runners])
192+
if not found_runner and not use_all_runners:
193+
continue
194+
164195
benchmark_matrix["include"].append(
165196
{
166197
"runner": runner,
@@ -176,11 +207,11 @@ def generate_benchmark_matrix(
176207
def main() -> None:
177208
args = parse_args()
178209
models = [m.strip().lower() for m in args.models.split(",") if m.strip()]
179-
gpus = [m.strip().lower() for m in args.gpus.split(",") if m.strip()]
210+
runners = [m.strip().lower() for m in args.runners.split(",") if m.strip()]
180211
benchmark_matrix = generate_benchmark_matrix(
181212
args.benchmark_configs_dir,
182213
models,
183-
gpus,
214+
runners,
184215
)
185216
set_output("benchmark_matrix", benchmark_matrix)
186217

.github/scripts/setup_vllm_benchmark.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,17 +61,27 @@ def parse_args() -> Any:
6161
help="the list of models to benchmark",
6262
required=True,
6363
)
64+
parser.add_argument(
65+
"--device",
66+
type=str,
67+
default="",
68+
help="device for the runner",
69+
required=True,
70+
)
6471

6572
return parser.parse_args()
6673

6774

6875
def setup_benchmark_configs(
69-
from_benchmark_configs_dir: str, to_benchmark_configs_dir: str, models: List[str]
76+
from_benchmark_configs_dir: str,
77+
to_benchmark_configs_dir: str,
78+
models: List[str],
79+
device: str,
7080
) -> None:
7181
"""
7282
Setup the benchmark configs to run on this runner
7383
"""
74-
for file in glob.glob(f"{from_benchmark_configs_dir}/*.json"):
84+
for file in glob.glob(f"{from_benchmark_configs_dir}/{device}/*.json"):
7585
filename = os.path.basename(file)
7686
benchmark_configs = []
7787

@@ -108,6 +118,7 @@ def main() -> None:
108118
args.from_benchmark_configs_dir,
109119
args.to_benchmark_configs_dir,
110120
args.models.split(","),
121+
args.device,
111122
)
112123

113124

0 commit comments

Comments
 (0)