Skip to content

Commit d532ab7

Browse files
committed
Merge branch 'main' into bf/add-gemma-qwen
2 parents 3d3bdec + c5b4d30 commit d532ab7

16 files changed

+1083
-103
lines changed

.github/scripts/generate_vllm_benchmark_matrix.py

Lines changed: 84 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
logging.basicConfig(level=logging.INFO)
1313
# Those are H100 runners from https://github.com/pytorch-labs/pytorch-gha-infra/blob/main/multi-tenant/inventory/manual_inventory
1414
# while ROCm runner are provided by AMD
15-
RUNNERS_MAPPING = {
15+
TP_TO_RUNNER_MAPPING = {
1616
1: [
17+
"linux.aws.a100",
1718
"linux.aws.h100",
1819
"linux.rocm.gpu.mi300.2", # No single ROCm GPU?
20+
"linux.24xl.spr-metal",
1921
],
2022
# NB: There is no 2xH100 runner at the momement, so let's use the next one
2123
# in the list here which is 4xH100
@@ -26,13 +28,27 @@
2628
4: [
2729
"linux.aws.h100.4",
2830
"linux.rocm.gpu.mi300.4",
31+
# TODO (huydhn): Enable this when Intel's runners are ready
32+
# "intel-cpu-emr",
2933
],
3034
8: [
3135
"linux.aws.h100.8",
3236
"linux.rocm.gpu.mi300.8",
3337
],
3438
}
3539

40+
# This mapping is needed to find out the platform of the runner
41+
RUNNER_TO_PLATFORM_MAPPING = {
42+
"linux.aws.a100": "cuda",
43+
"linux.aws.h100": "cuda",
44+
"linux.aws.h100.4": "cuda",
45+
"linux.aws.h100.8": "cuda",
46+
"linux.rocm.gpu.mi300.2": "rocm",
47+
"linux.rocm.gpu.mi300.4": "rocm",
48+
"linux.rocm.gpu.mi300.8": "rocm",
49+
"linux.24xl.spr-metal": "cpu",
50+
}
51+
3652
# All the different names vLLM uses to refer to their benchmark configs
3753
VLLM_BENCHMARK_CONFIGS_PARAMETER = set(
3854
[
@@ -76,10 +92,11 @@ def parse_args() -> Any:
7692
help="the comma-separated list of models to benchmark",
7793
)
7894
parser.add_argument(
79-
"--gpus",
95+
"--runners",
8096
type=str,
8197
default="",
82-
help="the comma-separated list of GPUs to benchmark",
98+
help="the comma-separated list of runners to run the benchmark",
99+
required=True,
83100
)
84101

85102
return parser.parse_args()
@@ -107,59 +124,76 @@ def set_output(name: str, val: Any) -> None:
107124

108125

109126
def generate_benchmark_matrix(
110-
benchmark_configs_dir: str, models: List[str], gpus: List[str]
127+
benchmark_configs_dir: str, models: List[str], runners: List[str]
111128
) -> Dict[str, Any]:
112129
"""
113130
Parse all the JSON files in vLLM benchmark configs directory to get the
114-
model name and tensor parallel size (aka number of GPUs)
131+
model name and tensor parallel size (aka number of GPUs or CPU NUMA nodes)
115132
"""
116-
get_all_models = True if not models else False
117-
use_all_gpus = True if not gpus else False
118-
119133
benchmark_matrix: Dict[str, Any] = {
120134
"include": [],
121135
}
122136

123-
for file in glob.glob(f"{benchmark_configs_dir}/*.json"):
124-
with open(file) as f:
125-
try:
126-
configs = json.load(f)
127-
except json.JSONDecodeError as e:
128-
warning(f"Fail to load {file}: {e}")
129-
continue
130-
131-
for config in configs:
132-
param = list(VLLM_BENCHMARK_CONFIGS_PARAMETER & set(config.keys()))
133-
assert len(param) == 1
134-
135-
benchmark_config = config[param[0]]
136-
if "model" not in benchmark_config:
137-
warning(f"Model name is not set in {benchmark_config}, skipping...")
138-
continue
139-
model = benchmark_config["model"].lower()
140-
141-
# Dedup
142-
if model in models:
143-
continue
144-
if get_all_models:
145-
models.append(model)
146-
147-
if "tensor_parallel_size" in benchmark_config:
148-
tp = benchmark_config["tensor_parallel_size"]
149-
elif "tp" in benchmark_config:
150-
tp = benchmark_config["tp"]
151-
else:
152-
tp = 8
153-
assert tp in RUNNERS_MAPPING
154-
155-
for runner in RUNNERS_MAPPING[tp]:
156-
found_runner = False
157-
for gpu in gpus:
158-
if gpu.lower() in runner:
159-
found_runner = True
160-
break
161-
162-
if found_runner or use_all_gpus:
137+
platforms = set()
138+
if not runners:
139+
use_all_runners = True
140+
platforms = set(v for v in RUNNER_TO_PLATFORM_MAPPING.values())
141+
else:
142+
use_all_runners = False
143+
for k, v in RUNNER_TO_PLATFORM_MAPPING.items():
144+
for r in runners:
145+
if r.lower() in k:
146+
platforms.add(v)
147+
148+
# Gather all possible benchmarks
149+
for platform in sorted(platforms):
150+
selected_models = []
151+
for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"):
152+
with open(file) as f:
153+
try:
154+
configs = json.load(f)
155+
except json.JSONDecodeError as e:
156+
warning(f"Fail to load {file}: {e}")
157+
continue
158+
159+
for config in configs:
160+
param = list(VLLM_BENCHMARK_CONFIGS_PARAMETER & set(config.keys()))
161+
assert len(param) == 1
162+
163+
benchmark_config = config[param[0]]
164+
if "model" not in benchmark_config:
165+
warning(f"Model name is not set in {benchmark_config}, skipping...")
166+
continue
167+
model = benchmark_config["model"].lower()
168+
169+
# Dedup
170+
if model in selected_models:
171+
continue
172+
# and only choose the selected model:
173+
if models and model not in models:
174+
continue
175+
selected_models.append(model)
176+
177+
if "tensor_parallel_size" in benchmark_config:
178+
tp = benchmark_config["tensor_parallel_size"]
179+
elif "tp" in benchmark_config:
180+
tp = benchmark_config["tp"]
181+
else:
182+
tp = 8
183+
assert tp in TP_TO_RUNNER_MAPPING
184+
185+
for runner in TP_TO_RUNNER_MAPPING[tp]:
186+
# Wrong platform
187+
if (
188+
runner not in RUNNER_TO_PLATFORM_MAPPING
189+
or RUNNER_TO_PLATFORM_MAPPING[runner] != platform
190+
):
191+
continue
192+
193+
found_runner = any([r and r.lower() in runner for r in runners])
194+
if not found_runner and not use_all_runners:
195+
continue
196+
163197
benchmark_matrix["include"].append(
164198
{
165199
"runner": runner,
@@ -175,11 +209,11 @@ def generate_benchmark_matrix(
175209
def main() -> None:
176210
args = parse_args()
177211
models = [m.strip().lower() for m in args.models.split(",") if m.strip()]
178-
gpus = [m.strip().lower() for m in args.gpus.split(",") if m.strip()]
212+
runners = [m.strip().lower() for m in args.runners.split(",") if m.strip()]
179213
benchmark_matrix = generate_benchmark_matrix(
180214
args.benchmark_configs_dir,
181215
models,
182-
gpus,
216+
runners,
183217
)
184218
set_output("benchmark_matrix", benchmark_matrix)
185219

.github/scripts/setup_vllm_benchmark.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,17 +61,27 @@ def parse_args() -> Any:
6161
help="the list of models to benchmark",
6262
required=True,
6363
)
64+
parser.add_argument(
65+
"--device",
66+
type=str,
67+
default="",
68+
help="device for the runner",
69+
required=True,
70+
)
6471

6572
return parser.parse_args()
6673

6774

6875
def setup_benchmark_configs(
69-
from_benchmark_configs_dir: str, to_benchmark_configs_dir: str, models: List[str]
76+
from_benchmark_configs_dir: str,
77+
to_benchmark_configs_dir: str,
78+
models: List[str],
79+
device: str,
7080
) -> None:
7181
"""
7282
Setup the benchmark configs to run on this runner
7383
"""
74-
for file in glob.glob(f"{from_benchmark_configs_dir}/*.json"):
84+
for file in glob.glob(f"{from_benchmark_configs_dir}/{device}/*.json"):
7585
filename = os.path.basename(file)
7686
benchmark_configs = []
7787

@@ -108,6 +118,7 @@ def main() -> None:
108118
args.from_benchmark_configs_dir,
109119
args.to_benchmark_configs_dir,
110120
args.models.split(","),
121+
args.device,
111122
)
112123

113124

0 commit comments

Comments
 (0)