12
12
logging .basicConfig (level = logging .INFO )
13
13
# Those are H100 runners from https://github.com/pytorch-labs/pytorch-gha-infra/blob/main/multi-tenant/inventory/manual_inventory
14
14
# while ROCm runner are provided by AMD
15
- RUNNERS_MAPPING = {
15
+ TP_TO_RUNNER_MAPPING = {
16
16
1 : [
17
+ "linux.aws.a100" ,
17
18
"linux.aws.h100" ,
18
19
"linux.rocm.gpu.mi300.2" , # No single ROCm GPU?
20
+ "linux.24xl.spr-metal" ,
19
21
],
20
22
# NB: There is no 2xH100 runner at the momement, so let's use the next one
21
23
# in the list here which is 4xH100
26
28
4 : [
27
29
"linux.aws.h100.4" ,
28
30
"linux.rocm.gpu.mi300.4" ,
31
+ # TODO (huydhn): Enable this when Intel's runners are ready
32
+ # "intel-cpu-emr",
29
33
],
30
34
8 : [
31
35
"linux.aws.h100.8" ,
32
36
"linux.rocm.gpu.mi300.8" ,
33
37
],
34
38
}
35
39
40
+ # This mapping is needed to find out the platform of the runner
41
+ RUNNER_TO_PLATFORM_MAPPING = {
42
+ "linux.aws.a100" : "cuda" ,
43
+ "linux.aws.h100" : "cuda" ,
44
+ "linux.aws.h100.4" : "cuda" ,
45
+ "linux.aws.h100.8" : "cuda" ,
46
+ "linux.rocm.gpu.mi300.2" : "rocm" ,
47
+ "linux.rocm.gpu.mi300.4" : "rocm" ,
48
+ "linux.rocm.gpu.mi300.8" : "rocm" ,
49
+ "linux.24xl.spr-metal" : "cpu" ,
50
+ }
51
+
36
52
# All the different names vLLM uses to refer to their benchmark configs
37
53
VLLM_BENCHMARK_CONFIGS_PARAMETER = set (
38
54
[
@@ -76,10 +92,11 @@ def parse_args() -> Any:
76
92
help = "the comma-separated list of models to benchmark" ,
77
93
)
78
94
parser .add_argument (
79
- "--gpus " ,
95
+ "--runners " ,
80
96
type = str ,
81
97
default = "" ,
82
- help = "the comma-separated list of GPUs to benchmark" ,
98
+ help = "the comma-separated list of runners to run the benchmark" ,
99
+ required = True ,
83
100
)
84
101
85
102
return parser .parse_args ()
@@ -107,59 +124,76 @@ def set_output(name: str, val: Any) -> None:
107
124
108
125
109
126
def generate_benchmark_matrix (
110
- benchmark_configs_dir : str , models : List [str ], gpus : List [str ]
127
+ benchmark_configs_dir : str , models : List [str ], runners : List [str ]
111
128
) -> Dict [str , Any ]:
112
129
"""
113
130
Parse all the JSON files in vLLM benchmark configs directory to get the
114
- model name and tensor parallel size (aka number of GPUs)
131
+ model name and tensor parallel size (aka number of GPUs or CPU NUMA nodes )
115
132
"""
116
- get_all_models = True if not models else False
117
- use_all_gpus = True if not gpus else False
118
-
119
133
benchmark_matrix : Dict [str , Any ] = {
120
134
"include" : [],
121
135
}
122
136
123
- for file in glob .glob (f"{ benchmark_configs_dir } /*.json" ):
124
- with open (file ) as f :
125
- try :
126
- configs = json .load (f )
127
- except json .JSONDecodeError as e :
128
- warning (f"Fail to load { file } : { e } " )
129
- continue
130
-
131
- for config in configs :
132
- param = list (VLLM_BENCHMARK_CONFIGS_PARAMETER & set (config .keys ()))
133
- assert len (param ) == 1
134
-
135
- benchmark_config = config [param [0 ]]
136
- if "model" not in benchmark_config :
137
- warning (f"Model name is not set in { benchmark_config } , skipping..." )
138
- continue
139
- model = benchmark_config ["model" ].lower ()
140
-
141
- # Dedup
142
- if model in models :
143
- continue
144
- if get_all_models :
145
- models .append (model )
146
-
147
- if "tensor_parallel_size" in benchmark_config :
148
- tp = benchmark_config ["tensor_parallel_size" ]
149
- elif "tp" in benchmark_config :
150
- tp = benchmark_config ["tp" ]
151
- else :
152
- tp = 8
153
- assert tp in RUNNERS_MAPPING
154
-
155
- for runner in RUNNERS_MAPPING [tp ]:
156
- found_runner = False
157
- for gpu in gpus :
158
- if gpu .lower () in runner :
159
- found_runner = True
160
- break
161
-
162
- if found_runner or use_all_gpus :
137
+ platforms = set ()
138
+ if not runners :
139
+ use_all_runners = True
140
+ platforms = set (v for v in RUNNER_TO_PLATFORM_MAPPING .values ())
141
+ else :
142
+ use_all_runners = False
143
+ for k , v in RUNNER_TO_PLATFORM_MAPPING .items ():
144
+ for r in runners :
145
+ if r .lower () in k :
146
+ platforms .add (v )
147
+
148
+ # Gather all possible benchmarks
149
+ for platform in sorted (platforms ):
150
+ selected_models = []
151
+ for file in glob .glob (f"{ benchmark_configs_dir } /{ platform } /*.json" ):
152
+ with open (file ) as f :
153
+ try :
154
+ configs = json .load (f )
155
+ except json .JSONDecodeError as e :
156
+ warning (f"Fail to load { file } : { e } " )
157
+ continue
158
+
159
+ for config in configs :
160
+ param = list (VLLM_BENCHMARK_CONFIGS_PARAMETER & set (config .keys ()))
161
+ assert len (param ) == 1
162
+
163
+ benchmark_config = config [param [0 ]]
164
+ if "model" not in benchmark_config :
165
+ warning (f"Model name is not set in { benchmark_config } , skipping..." )
166
+ continue
167
+ model = benchmark_config ["model" ].lower ()
168
+
169
+ # Dedup
170
+ if model in selected_models :
171
+ continue
172
+ # and only choose the selected model:
173
+ if models and model not in models :
174
+ continue
175
+ selected_models .append (model )
176
+
177
+ if "tensor_parallel_size" in benchmark_config :
178
+ tp = benchmark_config ["tensor_parallel_size" ]
179
+ elif "tp" in benchmark_config :
180
+ tp = benchmark_config ["tp" ]
181
+ else :
182
+ tp = 8
183
+ assert tp in TP_TO_RUNNER_MAPPING
184
+
185
+ for runner in TP_TO_RUNNER_MAPPING [tp ]:
186
+ # Wrong platform
187
+ if (
188
+ runner not in RUNNER_TO_PLATFORM_MAPPING
189
+ or RUNNER_TO_PLATFORM_MAPPING [runner ] != platform
190
+ ):
191
+ continue
192
+
193
+ found_runner = any ([r and r .lower () in runner for r in runners ])
194
+ if not found_runner and not use_all_runners :
195
+ continue
196
+
163
197
benchmark_matrix ["include" ].append (
164
198
{
165
199
"runner" : runner ,
@@ -175,11 +209,11 @@ def generate_benchmark_matrix(
175
209
def main () -> None :
176
210
args = parse_args ()
177
211
models = [m .strip ().lower () for m in args .models .split ("," ) if m .strip ()]
178
- gpus = [m .strip ().lower () for m in args .gpus .split ("," ) if m .strip ()]
212
+ runners = [m .strip ().lower () for m in args .runners .split ("," ) if m .strip ()]
179
213
benchmark_matrix = generate_benchmark_matrix (
180
214
args .benchmark_configs_dir ,
181
215
models ,
182
- gpus ,
216
+ runners ,
183
217
)
184
218
set_output ("benchmark_matrix" , benchmark_matrix )
185
219
0 commit comments