Skip to content

Commit e2a4066

Browse files
sjmonsonmarkurtz
andauthored
Add support for "benchmarking scenarios" (#99)
This PR adds support for "scenarios" that allow specifying benchmark argument in a file / as a single Pydantic object. CLI argument defaults are loaded from the scenario object defaults to give benchmark-as-code users the same defaults as CLI. Argument values in the CLI follow the following precedence: `Scenario (class defaults) < Scenario (CLI provided Scenario) < CLI Arguments`. Closes: * #81 --------- Co-authored-by: Mark Kurtz <mark.j.kurtz@gmail.com>
1 parent 023c8dd commit e2a4066

File tree

9 files changed

+331
-69
lines changed

9 files changed

+331
-69
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ cython_debug/
179179
*.json
180180
*.yaml
181181

182+
# But not scenarios
183+
!src/guidellm/benchmark/scenarios/*.json
184+
!src/guidellm/benchmark/scenarios/*.yaml
182185

183186
# UI Section - Next.js/React application under src/ui/
184187
# dependencies

src/guidellm/__main__.py

Lines changed: 90 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,55 @@
11
import asyncio
22
import codecs
3-
import json
43
from pathlib import Path
54
from typing import get_args
65

76
import click
7+
from pydantic import ValidationError
88

99
from guidellm.backend import BackendType
10-
from guidellm.benchmark import ProfileType, benchmark_generative_text
10+
from guidellm.benchmark import ProfileType
11+
from guidellm.benchmark.entrypoints import benchmark_with_scenario
12+
from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
1113
from guidellm.config import print_config
1214
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
1315
from guidellm.scheduler import StrategyType
16+
from guidellm.utils import cli as cli_tools
1417

1518
STRATEGY_PROFILE_CHOICES = set(
1619
list(get_args(ProfileType)) + list(get_args(StrategyType))
1720
)
1821

1922

20-
def parse_json(ctx, param, value): # noqa: ARG001
21-
if value is None:
22-
return None
23-
try:
24-
return json.loads(value)
25-
except json.JSONDecodeError as err:
26-
raise click.BadParameter(f"{param.name} must be a valid JSON string.") from err
27-
28-
29-
def parse_number_str(ctx, param, value): # noqa: ARG001
30-
if value is None:
31-
return None
32-
33-
values = value.split(",") if "," in value else [value]
34-
35-
try:
36-
return [float(val) for val in values]
37-
except ValueError as err:
38-
raise click.BadParameter(
39-
f"{param.name} must be a number or comma-separated list of numbers."
40-
) from err
41-
42-
4323
@click.group()
4424
def cli():
4525
pass
4626

4727

4828
@cli.command(
49-
help="Run a benchmark against a generative model using the specified arguments."
29+
help="Run a benchmark against a generative model using the specified arguments.",
30+
context_settings={"auto_envvar_prefix": "GUIDELLM"},
31+
)
32+
@click.option(
33+
"--scenario",
34+
type=cli_tools.Union(
35+
click.Path(
36+
exists=True,
37+
readable=True,
38+
file_okay=True,
39+
dir_okay=False,
40+
path_type=Path, # type: ignore[type-var]
41+
),
42+
click.Choice(get_builtin_scenarios()),
43+
),
44+
default=None,
45+
help=(
46+
"The name of a builtin scenario or path to a config file. "
47+
"Missing values from the config will use defaults. "
48+
"Options specified on the commandline will override the scenario."
49+
),
5050
)
5151
@click.option(
5252
"--target",
53-
required=True,
5453
type=str,
5554
help="The target path for the backend to run benchmarks against. For example, http://localhost:8000",
5655
)
@@ -61,20 +60,20 @@ def cli():
6160
"The type of backend to use to run requests against. Defaults to 'openai_http'."
6261
f" Supported types: {', '.join(get_args(BackendType))}"
6362
),
64-
default="openai_http",
63+
default=GenerativeTextScenario.get_default("backend_type"),
6564
)
6665
@click.option(
6766
"--backend-args",
68-
callback=parse_json,
69-
default=None,
67+
callback=cli_tools.parse_json,
68+
default=GenerativeTextScenario.get_default("backend_args"),
7069
help=(
7170
"A JSON string containing any arguments to pass to the backend as a "
7271
"dict with **kwargs."
7372
),
7473
)
7574
@click.option(
7675
"--model",
77-
default=None,
76+
default=GenerativeTextScenario.get_default("model"),
7877
type=str,
7978
help=(
8079
"The ID of the model to benchmark within the backend. "
@@ -83,7 +82,7 @@ def cli():
8382
)
8483
@click.option(
8584
"--processor",
86-
default=None,
85+
default=GenerativeTextScenario.get_default("processor"),
8786
type=str,
8887
help=(
8988
"The processor or tokenizer to use to calculate token counts for statistics "
@@ -93,16 +92,15 @@ def cli():
9392
)
9493
@click.option(
9594
"--processor-args",
96-
default=None,
97-
callback=parse_json,
95+
default=GenerativeTextScenario.get_default("processor_args"),
96+
callback=cli_tools.parse_json,
9897
help=(
9998
"A JSON string containing any arguments to pass to the processor constructor "
10099
"as a dict with **kwargs."
101100
),
102101
)
103102
@click.option(
104103
"--data",
105-
required=True,
106104
type=str,
107105
help=(
108106
"The HuggingFace dataset ID, a path to a HuggingFace dataset, "
@@ -112,15 +110,16 @@ def cli():
112110
)
113111
@click.option(
114112
"--data-args",
115-
callback=parse_json,
113+
default=GenerativeTextScenario.get_default("data_args"),
114+
callback=cli_tools.parse_json,
116115
help=(
117116
"A JSON string containing any arguments to pass to the dataset creation "
118117
"as a dict with **kwargs."
119118
),
120119
)
121120
@click.option(
122121
"--data-sampler",
123-
default=None,
122+
default=GenerativeTextScenario.get_default("data_sampler"),
124123
type=click.Choice(["random"]),
125124
help=(
126125
"The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -129,7 +128,6 @@ def cli():
129128
)
130129
@click.option(
131130
"--rate-type",
132-
required=True,
133131
type=click.Choice(STRATEGY_PROFILE_CHOICES),
134132
help=(
135133
"The type of benchmark to run. "
@@ -138,8 +136,7 @@ def cli():
138136
)
139137
@click.option(
140138
"--rate",
141-
default=None,
142-
callback=parse_number_str,
139+
default=GenerativeTextScenario.get_default("rate"),
143140
help=(
144141
"The rates to run the benchmark at. "
145142
"Can be a single number or a comma-separated list of numbers. "
@@ -152,6 +149,7 @@ def cli():
152149
@click.option(
153150
"--max-seconds",
154151
type=float,
152+
default=GenerativeTextScenario.get_default("max_seconds"),
155153
help=(
156154
"The maximum number of seconds each benchmark can run for. "
157155
"If None, will run until max_requests or the data is exhausted."
@@ -160,6 +158,7 @@ def cli():
160158
@click.option(
161159
"--max-requests",
162160
type=int,
161+
default=GenerativeTextScenario.get_default("max_requests"),
163162
help=(
164163
"The maximum number of requests each benchmark can run for. "
165164
"If None, will run until max_seconds or the data is exhausted."
@@ -168,7 +167,7 @@ def cli():
168167
@click.option(
169168
"--warmup-percent",
170169
type=float,
171-
default=None,
170+
default=GenerativeTextScenario.get_default("warmup_percent"),
172171
help=(
173172
"The percent of the benchmark (based on max-seconds, max-requets, "
174173
"or lenth of dataset) to run as a warmup and not include in the final results. "
@@ -178,6 +177,7 @@ def cli():
178177
@click.option(
179178
"--cooldown-percent",
180179
type=float,
180+
default=GenerativeTextScenario.get_default("cooldown_percent"),
181181
help=(
182182
"The percent of the benchmark (based on max-seconds, max-requets, or lenth "
183183
"of dataset) to run as a cooldown and not include in the final results. "
@@ -212,7 +212,7 @@ def cli():
212212
)
213213
@click.option(
214214
"--output-extras",
215-
callback=parse_json,
215+
callback=cli_tools.parse_json,
216216
help="A JSON string of extra data to save with the output benchmarks",
217217
)
218218
@click.option(
@@ -222,15 +222,16 @@ def cli():
222222
"The number of samples to save in the output file. "
223223
"If None (default), will save all samples."
224224
),
225-
default=None,
225+
default=GenerativeTextScenario.get_default("output_sampling"),
226226
)
227227
@click.option(
228228
"--random-seed",
229-
default=42,
229+
default=GenerativeTextScenario.get_default("random_seed"),
230230
type=int,
231231
help="The random seed to use for benchmarking to ensure reproducibility.",
232232
)
233233
def benchmark(
234+
scenario,
234235
target,
235236
backend_type,
236237
backend_args,
@@ -254,30 +255,53 @@ def benchmark(
254255
output_sampling,
255256
random_seed,
256257
):
258+
click_ctx = click.get_current_context()
259+
260+
overrides = cli_tools.set_if_not_default(
261+
click_ctx,
262+
target=target,
263+
backend_type=backend_type,
264+
backend_args=backend_args,
265+
model=model,
266+
processor=processor,
267+
processor_args=processor_args,
268+
data=data,
269+
data_args=data_args,
270+
data_sampler=data_sampler,
271+
rate_type=rate_type,
272+
rate=rate,
273+
max_seconds=max_seconds,
274+
max_requests=max_requests,
275+
warmup_percent=warmup_percent,
276+
cooldown_percent=cooldown_percent,
277+
output_sampling=output_sampling,
278+
random_seed=random_seed,
279+
)
280+
281+
try:
282+
# If a scenario file was specified read from it
283+
if scenario is None:
284+
_scenario = GenerativeTextScenario.model_validate(overrides)
285+
elif isinstance(scenario, Path):
286+
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
287+
else: # Only builtins can make it here; click will catch anything else
288+
_scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
289+
except ValidationError as e:
290+
# Translate pydantic valdation error to click argument error
291+
errs = e.errors(include_url=False, include_context=True, include_input=True)
292+
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
293+
raise click.BadParameter(
294+
errs[0]["msg"], ctx=click_ctx, param_hint=param_name
295+
) from e
296+
257297
asyncio.run(
258-
benchmark_generative_text(
259-
target=target,
260-
backend_type=backend_type,
261-
backend_args=backend_args,
262-
model=model,
263-
processor=processor,
264-
processor_args=processor_args,
265-
data=data,
266-
data_args=data_args,
267-
data_sampler=data_sampler,
268-
rate_type=rate_type,
269-
rate=rate,
270-
max_seconds=max_seconds,
271-
max_requests=max_requests,
272-
warmup_percent=warmup_percent,
273-
cooldown_percent=cooldown_percent,
298+
benchmark_with_scenario(
299+
scenario=_scenario,
274300
show_progress=not disable_progress,
275301
show_progress_scheduler_stats=display_scheduler_stats,
276302
output_console=not disable_console_outputs,
277303
output_path=output_path,
278304
output_extras=output_extras,
279-
output_sampling=output_sampling,
280-
random_seed=random_seed,
281305
)
282306
)
283307

@@ -316,7 +340,8 @@ def preprocess():
316340
"Convert a dataset to have specific prompt and output token sizes.\n"
317341
"DATA: Path to the input dataset or dataset ID.\n"
318342
"OUTPUT_PATH: Path to save the converted dataset, including file suffix."
319-
)
343+
),
344+
context_settings={"auto_envvar_prefix": "GUIDELLM"},
320345
)
321346
@click.argument(
322347
"data",
@@ -340,15 +365,15 @@ def preprocess():
340365
@click.option(
341366
"--processor-args",
342367
default=None,
343-
callback=parse_json,
368+
callback=cli_tools.parse_json,
344369
help=(
345370
"A JSON string containing any arguments to pass to the processor constructor "
346371
"as a dict with **kwargs."
347372
),
348373
)
349374
@click.option(
350375
"--data-args",
351-
callback=parse_json,
376+
callback=cli_tools.parse_json,
352377
help=(
353378
"A JSON string containing any arguments to pass to the dataset creation "
354379
"as a dict with **kwargs."

src/guidellm/benchmark/entrypoints.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,22 @@
1515
)
1616
from guidellm.benchmark.profile import ProfileType, create_profile
1717
from guidellm.benchmark.progress import GenerativeTextBenchmarkerProgressDisplay
18+
from guidellm.benchmark.scenario import GenerativeTextScenario, Scenario
1819
from guidellm.request import GenerativeRequestLoader
1920
from guidellm.scheduler import StrategyType
2021

2122

23+
async def benchmark_with_scenario(scenario: Scenario, **kwargs):
24+
"""
25+
Run a benchmark using a scenario and specify any extra arguments
26+
"""
27+
28+
if isinstance(scenario, GenerativeTextScenario):
29+
return await benchmark_generative_text(**vars(scenario), **kwargs)
30+
else:
31+
raise ValueError(f"Unsupported Scenario type {type(scenario)}")
32+
33+
2234
async def benchmark_generative_text(
2335
target: str,
2436
backend_type: BackendType,
@@ -43,13 +55,13 @@ async def benchmark_generative_text(
4355
max_requests: Optional[int],
4456
warmup_percent: Optional[float],
4557
cooldown_percent: Optional[float],
46-
show_progress: bool,
47-
show_progress_scheduler_stats: bool,
48-
output_console: bool,
4958
output_path: Optional[Union[str, Path]],
5059
output_extras: Optional[dict[str, Any]],
5160
output_sampling: Optional[int],
5261
random_seed: int,
62+
show_progress: bool = True,
63+
show_progress_scheduler_stats: bool = False,
64+
output_console: bool = True,
5365
) -> tuple[GenerativeBenchmarksReport, Optional[Path]]:
5466
console = GenerativeBenchmarksConsole(enabled=show_progress)
5567
console.print_line("Creating backend...")

0 commit comments

Comments
 (0)