Skip to content

Commit a68e293

Browse files
authored
[Doc] Convert Sphinx directives ( {class}, {meth}, {attr}, ...) to MkDocs format for better documentation linking (vllm-project#18663)
Signed-off-by: Zerohertz <ohg3417@gmail.com>
1 parent 6881107 commit a68e293

37 files changed

+360
-247
lines changed

vllm/compilation/compiler_interface.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ def compute_hash(self, vllm_config: VllmConfig) -> str:
3939
Gather all the relevant information from the vLLM config,
4040
to compute a hash so that we can cache the compiled model.
4141
42-
See {meth}`VllmConfig.compute_hash` to check what information
42+
See [`VllmConfig.compute_hash`][vllm.config.VllmConfig.compute_hash]
43+
to check what information
4344
is already considered by default. This function should only
4445
consider the information that is specific to the compiler.
4546
"""

vllm/config.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2986,7 +2986,7 @@ class PoolerConfig:
29862986
pooling_type: Optional[str] = None
29872987
"""
29882988
The pooling method of the pooling model. This should be a key in
2989-
{class}`vllm.model_executor.layers.pooler.PoolingType`.
2989+
[`vllm.model_executor.layers.pooler.PoolingType`][].
29902990
"""
29912991

29922992
normalize: Optional[bool] = None
@@ -3697,23 +3697,27 @@ class CompilationConfig:
36973697
"""Configuration for compilation. It has three parts:
36983698
36993699
- Top-level Compilation control:
3700-
- {attr}`level`
3701-
- {attr}`debug_dump_path`
3702-
- {attr}`cache_dir`
3703-
- {attr}`backend`
3704-
- {attr}`custom_ops`
3705-
- {attr}`splitting_ops`
3700+
- [`level`][vllm.config.CompilationConfig.level]
3701+
- [`debug_dump_path`][vllm.config.CompilationConfig.debug_dump_path]
3702+
- [`cache_dir`][vllm.config.CompilationConfig.cache_dir]
3703+
- [`backend`][vllm.config.CompilationConfig.backend]
3704+
- [`custom_ops`][vllm.config.CompilationConfig.custom_ops]
3705+
- [`splitting_ops`][vllm.config.CompilationConfig.splitting_ops]
37063706
- CudaGraph capture:
3707-
- {attr}`use_cudagraph`
3708-
- {attr}`cudagraph_capture_sizes`
3709-
- {attr}`cudagraph_num_of_warmups`
3710-
- {attr}`cudagraph_copy_inputs`
3711-
- {attr}`full_cuda_graph`
3707+
- [`use_cudagraph`][vllm.config.CompilationConfig.use_cudagraph]
3708+
- [`cudagraph_capture_sizes`]
3709+
[vllm.config.CompilationConfig.cudagraph_capture_sizes]
3710+
- [`cudagraph_num_of_warmups`]
3711+
[vllm.config.CompilationConfig.cudagraph_num_of_warmups]
3712+
- [`cudagraph_copy_inputs`]
3713+
[vllm.config.CompilationConfig.cudagraph_copy_inputs]
3714+
- [`full_cuda_graph`][vllm.config.CompilationConfig.full_cuda_graph]
37123715
- Inductor compilation:
3713-
- {attr}`use_inductor`
3714-
- {attr}`compile_sizes`
3715-
- {attr}`inductor_compile_config`
3716-
- {attr}`inductor_passes`
3716+
- [`use_inductor`][vllm.config.CompilationConfig.use_inductor]
3717+
- [`compile_sizes`][vllm.config.CompilationConfig.compile_sizes]
3718+
- [`inductor_compile_config`]
3719+
[vllm.config.CompilationConfig.inductor_compile_config]
3720+
- [`inductor_passes`][vllm.config.CompilationConfig.inductor_passes]
37173721
- custom inductor passes
37183722
37193723
Why we have different sizes for cudagraph and inductor:

vllm/connections.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,4 +167,7 @@ async def async_download_file(
167167

168168

169169
global_http_connection = HTTPConnection()
170-
"""The global {class}`HTTPConnection` instance used by vLLM."""
170+
"""
171+
The global [`HTTPConnection`][vllm.connections.HTTPConnection] instance used
172+
by vLLM.
173+
"""

vllm/engine/async_llm_engine.py

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,8 @@ async def add_request_async(
475475
*,
476476
inputs: Optional[PromptType] = None, # DEPRECATED
477477
) -> None:
478-
"""Async version of {meth}`add_request`."""
478+
"""Async version of
479+
[`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
479480
if inputs is not None:
480481
prompt = inputs
481482
assert prompt is not None and params is not None
@@ -582,20 +583,21 @@ async def build_guided_decoding_logits_processor_async(
582583

583584

584585
class AsyncLLMEngine(EngineClient):
585-
"""An asynchronous wrapper for {class}`LLMEngine`.
586+
"""An asynchronous wrapper for [`LLMEngine`][vllm.LLMEngine].
586587
587-
This class is used to wrap the {class}`LLMEngine` class to make it
588-
asynchronous. It uses asyncio to create a background loop that keeps
589-
processing incoming requests. The {class}`LLMEngine` is kicked by the
590-
generate method when there are requests in the waiting queue. The generate
591-
method yields the outputs from the {class}`LLMEngine` to the caller.
588+
This class is used to wrap the [`LLMEngine`][vllm.LLMEngine] class to
589+
make it asynchronous. It uses asyncio to create a background loop that keeps
590+
processing incoming requests. The [`LLMEngine`][vllm.LLMEngine] is kicked
591+
by the generate method when there are requests in the waiting queue. The
592+
generate method yields the outputs from the [`LLMEngine`][vllm.LLMEngine]
593+
to the caller.
592594
593595
Args:
594596
log_requests: Whether to log the requests.
595597
start_engine_loop: If True, the background task to run the engine
596598
will be automatically started in the generate call.
597-
*args: Arguments for {class}`LLMEngine`.
598-
**kwargs: Arguments for {class}`LLMEngine`.
599+
*args: Arguments for [`LLMEngine`][vllm.LLMEngine].
600+
**kwargs: Arguments for [`LLMEngine`][vllm.LLMEngine].
599601
"""
600602

601603
_engine_class: Type[_AsyncLLMEngine] = _AsyncLLMEngine
@@ -985,8 +987,9 @@ async def generate(
985987
from the LLMEngine to the caller.
986988
987989
Args:
988-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
989-
for more details about the format of each input.
990+
prompt: The prompt to the LLM. See
991+
[`PromptType`][vllm.inputs.PromptType] for more details about
992+
the format of each input.
990993
sampling_params: The sampling parameters of the request.
991994
request_id: The unique id of the request.
992995
lora_request: LoRA request to use for generation, if any.
@@ -1003,7 +1006,7 @@ async def generate(
10031006
Details:
10041007
- If the engine is not running, start the background loop,
10051008
which iteratively invokes
1006-
{meth}`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
1009+
[`engine_step`][vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step]
10071010
to process the waiting requests.
10081011
- Add the request to the engine's `RequestTracker`.
10091012
On the next background loop, this request will be sent to
@@ -1075,8 +1078,9 @@ async def encode(
10751078
from the LLMEngine to the caller.
10761079
10771080
Args:
1078-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
1079-
for more details about the format of each input.
1081+
prompt: The prompt to the LLM. See
1082+
[`PromptType`][vllm.inputs.PromptType] for more details about
1083+
the format of each input.
10801084
pooling_params: The pooling parameters of the request.
10811085
request_id: The unique id of the request.
10821086
lora_request: LoRA request to use for generation, if any.
@@ -1089,15 +1093,15 @@ async def encode(
10891093
for the request.
10901094
10911095
Details:
1092-
- If the engine is not running, start the background loop,
1093-
which iteratively invokes
1094-
{meth}`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
1095-
to process the waiting requests.
1096-
- Add the request to the engine's `RequestTracker`.
1097-
On the next background loop, this request will be sent to
1098-
the underlying engine.
1099-
Also, a corresponding `AsyncStream` will be created.
1100-
- Wait for the request outputs from `AsyncStream` and yield them.
1096+
- If the engine is not running, start the background loop,
1097+
which iteratively invokes
1098+
[`vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`][]
1099+
to process the waiting requests.
1100+
- Add the request to the engine's `RequestTracker`.
1101+
On the next background loop, this request will be sent to
1102+
the underlying engine.
1103+
Also, a corresponding `AsyncStream` will be created.
1104+
- Wait for the request outputs from `AsyncStream` and yield them.
11011105
11021106
Example:
11031107
```

vllm/engine/llm_engine.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ class LLMEngine:
130130
iteration-level scheduling and efficient memory management to maximize the
131131
serving throughput.
132132
133-
The [LLM][vllm.LLM] class wraps this class for offline batched inference
134-
and the [AsyncLLMEngine][] class wraps this class for online serving.
133+
The [`LLM`][vllm.LLM] class wraps this class for offline batched inference
134+
and the [`AsyncLLMEngine`][vllm.engine.async_llm_engine.AsyncLLMEngine]
135+
class wraps this class for online serving.
135136
136-
The config arguments are derived from [EngineArgs][vllm.EngineArgs]. (See
137-
[engine-args][])
137+
The config arguments are derived from [`EngineArgs`][vllm.EngineArgs].
138138
139139
Args:
140140
vllm_config: The configuration for initializing and running vLLM.

vllm/engine/multiprocessing/client.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,9 @@ def generate(
492492
from the LLMEngine to the caller.
493493
494494
Args:
495-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
496-
for more details about the format of each input.
495+
prompt: The prompt to the LLM. See
496+
[`PromptType`][vllm.inputs.PromptType] for more details about
497+
the format of each input.
497498
sampling_params: The sampling parameters of the request.
498499
request_id: The unique id of the request.
499500
lora_request: LoRA request to use for generation, if any.
@@ -561,8 +562,9 @@ def encode(
561562
from the LLMEngine to the caller.
562563
563564
Args:
564-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
565-
for more details about the format of each input.
565+
prompt: The prompt to the LLM. See
566+
[`PromptType`][vllm.inputs.PromptType] for more details about
567+
the format of each input.
566568
pooling_params: The pooling parameters of the request.
567569
request_id: The unique id of the request.
568570
lora_request: LoRA request to use for generation, if any.

vllm/engine/multiprocessing/engine.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,22 @@
4242

4343

4444
class MQLLMEngine:
45-
"""A multiprocessing wrapper for {class}`LLMEngine`.
45+
"""A multiprocessing wrapper for
46+
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
4647
47-
This class is used to wrap the {class}`LLMEngine` class to enable use
48+
This class is used to wrap the
49+
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
4850
in concurrnet manner. It runs a background loop and uses zeromq to
4951
receive new requests and stream outputs incrementally via ipc.
5052
51-
The {class}`LLMEngine` generate or encode process is kicked off when a new
52-
RPCProcessRequest is received by the input_socket.
53+
The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
54+
process is kicked off when a new RPCProcessRequest is received by the
55+
input_socket.
5356
5457
The self.engine_loop checks the input_socket for new requests,
5558
adds them to the LLMEngine if there are any, calls the internal
56-
{class}`LLMEngine.step()`, and sends the RequestOutputs back over
57-
the output_socket.
59+
[`LLMEngine.step()`][vllm.engine.llm_engine.LLMEngine.step], and sends
60+
the RequestOutputs back over the output_socket.
5861
5962
If use_async_sockets is set, the logic associated with reading new
6063
requests from the socket and sending data to the socket is passed
@@ -65,8 +68,8 @@ class MQLLMEngine:
6568
ipc_path: Base path for zeromq interprocess messaging
6669
use_async_sockets: Whether to make send/recv async with GPU
6770
log_requests: Whether to log the requests.
68-
*args: Arguments for {class}`LLMEngine`.
69-
**kwargs: Arguments for {class}`LLMEngine`.
71+
*args: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
72+
**kwargs: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
7073
"""
7174

7275
def __init__(self,

vllm/engine/output_processor/multi_step.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,11 @@ def process_prompt_logprob(self, seq_group: SequenceGroup,
5656
scheduled computation.
5757
5858
Args:
59-
seq_group: the outputs are associated with this {class}`SequenceGroup`
60-
outputs: the {class}`SequenceGroupOutput`s for all scheduler steps
59+
seq_group: the outputs are associated with this
60+
[`SequenceGroup`][vllm.sequence.SequenceGroup]
61+
outputs: the
62+
[`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]s
63+
for all scheduler steps
6164
"""
6265
for output in outputs:
6366
# Concatenate single-step prompt logprob processing results.

vllm/engine/output_processor/single_step.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,21 @@
1919
def single_step_process_prompt_logprob(
2020
sg_output_proc: SequenceGroupOutputProcessor, seq_group: SequenceGroup,
2121
output: CompletionSequenceGroupOutput) -> None:
22-
"""Process prompt logprobs associated with the {class}`SequenceGroupOutput`
23-
for a given step.
22+
"""Process prompt logprobs associated with the
23+
[`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput] for a given step.
2424
2525
Do nothing if the output has no prompt logprobs.
2626
2727
Account for the fact that transformers do not compute first-token logprobs.
2828
2929
Args:
30-
sg_output_proc: {class}`SequenceGroupOutputProcessor` instance
31-
seq_group: the output is associated with this {class}`SequenceGroup`
32-
output: the {class}`SequenceGroupOutput` for a single scheduler step
30+
sg_output_proc:
31+
[`SequenceGroupOutputProcessor`][vllm.engine.output_processor.interfaces.SequenceGroupOutputProcessor]
32+
instance
33+
seq_group: the output is associated with this
34+
[`SequenceGroup`][vllm.sequence.SequenceGroup]
35+
output: the [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]
36+
for a single scheduler step
3337
"""
3438
prompt_logprobs = output.prompt_logprobs
3539

@@ -103,8 +107,11 @@ def process_prompt_logprob(self, seq_group: SequenceGroup,
103107
scheduled computation.
104108
105109
Args:
106-
seq_group: the output is associated with this {class}`SequenceGroup`
107-
outputs: the {class}`SequenceGroupOutput` for a single scheduler step
110+
seq_group: the output is associated with this
111+
[`SequenceGroup`][vllm.sequence.SequenceGroup]
112+
outputs: the
113+
[`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]
114+
for a single scheduler step
108115
"""
109116
assert len(outputs) == 1, "Single step should only have 1 output."
110117
output = outputs[0]

vllm/entrypoints/llm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,7 @@ class LLM:
129129
compilation_config: Either an integer or a dictionary. If it is an
130130
integer, it is used as the level of compilation optimization. If it
131131
is a dictionary, it can specify the full compilation configuration.
132-
**kwargs: Arguments for [EngineArgs][vllm.EngineArgs]. (See
133-
[engine-args][])
132+
**kwargs: Arguments for [`EngineArgs`][vllm.EngineArgs].
134133
135134
Note:
136135
This class is intended to be used for offline inference. For online
@@ -494,7 +493,7 @@ def collective_rpc(self,
494493
`self` argument, in addition to the arguments passed in `args`
495494
and `kwargs`. The `self` argument will be the worker object.
496495
timeout: Maximum time in seconds to wait for execution. Raises a
497-
{exc}`TimeoutError` on timeout. `None` means wait indefinitely.
496+
[`TimeoutError`][] on timeout. `None` means wait indefinitely.
498497
args: Positional arguments to pass to the worker method.
499498
kwargs: Keyword arguments to pass to the worker method.
500499

0 commit comments

Comments
 (0)