Skip to content

Commit b637e9d

Browse files
authored
Add full serve CLI reference back to docs (#20978)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
1 parent 1e36c86 commit b637e9d

File tree

6 files changed

+58
-35
lines changed

6 files changed

+58
-35
lines changed

docs/cli/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
---
2+
toc_depth: 4
3+
---
4+
15
# vLLM CLI Guide
26

37
The vllm command-line tool is used to run and manage vLLM models. You can start by viewing the help message with:
@@ -42,6 +46,10 @@ Start the vLLM OpenAI Compatible API server.
4246
vllm serve --help=page
4347
```
4448

49+
### Options
50+
51+
--8<-- "docs/argparse/serve.md"
52+
4553
## chat
4654

4755
Generate chat completions via the running API server.

docs/configuration/serve_args.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ The `vllm serve` command is used to launch the OpenAI-compatible server.
55
## CLI Arguments
66

77
The `vllm serve` command is used to launch the OpenAI-compatible server.
8-
To see the available CLI arguments, run `vllm serve --help`!
8+
To see the available options, take a look at the [CLI Reference](../cli/README.md#options)!
99

1010
## Configuration file
1111

docs/mkdocs/hooks/generate_argparse.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
sys.modules["vllm._C"] = MagicMock()
1717

1818
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs # noqa: E402
19+
from vllm.entrypoints.openai.cli_args import make_arg_parser # noqa: E402
1920
from vllm.utils import FlexibleArgumentParser # noqa: E402
2021

2122
logger = logging.getLogger("mkdocs")
@@ -24,15 +25,18 @@
2425
class MarkdownFormatter(HelpFormatter):
2526
"""Custom formatter that generates markdown for argument groups."""
2627

27-
def __init__(self, prog):
28+
def __init__(self, prog, starting_heading_level=3):
2829
super().__init__(prog,
2930
max_help_position=float('inf'),
3031
width=float('inf'))
32+
self._section_heading_prefix = "#" * starting_heading_level
33+
self._argument_heading_prefix = "#" * (starting_heading_level + 1)
3134
self._markdown_output = []
3235

3336
def start_section(self, heading):
3437
if heading not in {"positional arguments", "options"}:
35-
self._markdown_output.append(f"\n### {heading}\n\n")
38+
heading_md = f"\n{self._section_heading_prefix} {heading}\n\n"
39+
self._markdown_output.append(heading_md)
3640

3741
def end_section(self):
3842
pass
@@ -46,9 +50,13 @@ def add_usage(self, usage, actions, groups, prefix=None):
4650

4751
def add_arguments(self, actions):
4852
for action in actions:
53+
if (len(action.option_strings) == 0
54+
or "--help" in action.option_strings):
55+
continue
4956

5057
option_strings = f'`{"`, `".join(action.option_strings)}`'
51-
self._markdown_output.append(f"#### {option_strings}\n\n")
58+
heading_md = f"{self._argument_heading_prefix} {option_strings}\n\n"
59+
self._markdown_output.append(heading_md)
5260

5361
if choices := action.choices:
5462
choices = f'`{"`, `".join(str(c) for c in choices)}`'
@@ -81,6 +89,14 @@ def create_parser(cls, **kwargs) -> FlexibleArgumentParser:
8189
return cls.add_cli_args(parser, **kwargs)
8290

8391

92+
def create_serve_parser() -> FlexibleArgumentParser:
93+
"""Create a parser for the serve command with markdown formatting."""
94+
parser = FlexibleArgumentParser()
95+
parser.formatter_class = lambda prog: MarkdownFormatter(
96+
prog, starting_heading_level=4)
97+
return make_arg_parser(parser)
98+
99+
84100
def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
85101
logger.info("Generating argparse documentation")
86102
logger.debug("Root directory: %s", ROOT_DIR.resolve())
@@ -95,6 +111,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
95111
"engine_args": create_parser(EngineArgs),
96112
"async_engine_args": create_parser(AsyncEngineArgs,
97113
async_args_only=True),
114+
"serve": create_serve_parser(),
98115
}
99116

100117
# Generate documentation for each parser

requirements/docs.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ cloudpickle
1717
fastapi
1818
msgspec
1919
openai
20+
partial-json-parser
2021
pillow
2122
psutil
2223
pybase64

vllm/entrypoints/cli/serve.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -67,37 +67,6 @@ def subparser_init(
6767
help="Start the vLLM OpenAI Compatible API server.",
6868
description="Start the vLLM OpenAI Compatible API server.",
6969
usage="vllm serve [model_tag] [options]")
70-
serve_parser.add_argument("model_tag",
71-
type=str,
72-
nargs='?',
73-
help="The model tag to serve "
74-
"(optional if specified in config)")
75-
serve_parser.add_argument(
76-
"--headless",
77-
action='store_true',
78-
default=False,
79-
help="Run in headless mode. See multi-node data parallel "
80-
"documentation for more details.")
81-
serve_parser.add_argument(
82-
'--data-parallel-start-rank',
83-
'-dpr',
84-
type=int,
85-
default=0,
86-
help="Starting data parallel rank for secondary nodes. "
87-
"Requires --headless.")
88-
serve_parser.add_argument('--api-server-count',
89-
'-asc',
90-
type=int,
91-
default=1,
92-
help='How many API server processes to run.')
93-
serve_parser.add_argument(
94-
"--config",
95-
type=str,
96-
default='',
97-
required=False,
98-
help="Read CLI options from a config file. "
99-
"Must be a YAML with the following options: "
100-
"https://docs.vllm.ai/en/latest/configuration/serve_args.html")
10170

10271
serve_parser = make_arg_parser(serve_parser)
10372
show_filtered_argument_or_group_from_help(serve_parser, ["serve"])

vllm/entrypoints/openai/cli_args.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,34 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
236236
register all arguments instead of manually enumerating them here. This
237237
avoids code duplication and keeps the argument definitions in one place.
238238
"""
239+
parser.add_argument("model_tag",
240+
type=str,
241+
nargs="?",
242+
help="The model tag to serve "
243+
"(optional if specified in config)")
244+
parser.add_argument(
245+
"--headless",
246+
action="store_true",
247+
default=False,
248+
help="Run in headless mode. See multi-node data parallel "
249+
"documentation for more details.")
250+
parser.add_argument(
251+
"--data-parallel-start-rank",
252+
"-dpr",
253+
type=int,
254+
default=0,
255+
help="Starting data parallel rank for secondary nodes. "
256+
"Requires --headless.")
257+
parser.add_argument("--api-server-count",
258+
"-asc",
259+
type=int,
260+
default=1,
261+
help="How many API server processes to run.")
262+
parser.add_argument(
263+
"--config",
264+
help="Read CLI options from a config file. "
265+
"Must be a YAML with the following options: "
266+
"https://docs.vllm.ai/en/latest/configuration/serve_args.html")
239267
parser = FrontendArgs.add_cli_args(parser)
240268
parser = AsyncEngineArgs.add_cli_args(parser)
241269

0 commit comments

Comments
 (0)