Skip to content

Commit b38baab

Browse files
authored
[Doc] Add inplace weights loading example (#19640)
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
1 parent 89cab4d commit b38baab

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
from vllm import LLM, RequestOutput, SamplingParams
5+
6+
# Sample prompts.
7+
prompts = [
8+
"Hello, my name is",
9+
"The president of the United States is",
10+
"The capital of France is",
11+
"The future of AI is",
12+
]
13+
# Create a sampling params object.
14+
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
15+
16+
17+
def print_prompts_and_outputs(outputs: list[RequestOutput]) -> None:
18+
print("-" * 60)
19+
for output in outputs:
20+
prompt = output.prompt
21+
generated_text = output.outputs[0].text
22+
print(f"Prompt: {prompt!r}")
23+
print(f"Output: {generated_text!r}")
24+
print("-" * 60)
25+
26+
27+
def main():
28+
# Create an LLM without loading real weights
29+
llm = LLM(
30+
model="Qwen/Qwen3-0.6B",
31+
load_format="dummy",
32+
enforce_eager=True,
33+
tensor_parallel_size=4,
34+
)
35+
outputs = llm.generate(prompts, sampling_params)
36+
print("\nOutputs do not make sense:")
37+
print_prompts_and_outputs(outputs)
38+
39+
# Update load format from `dummy` to `auto`
40+
llm.collective_rpc(
41+
"update_config", args=({"load_config": {"load_format": "auto"}},)
42+
)
43+
# Now reload real weights inplace
44+
llm.collective_rpc("reload_weights")
45+
46+
# Check outputs make sense
47+
outputs = llm.generate(prompts, sampling_params)
48+
print("\nOutputs make sense after loading real weights:")
49+
print_prompts_and_outputs(outputs)
50+
51+
52+
if __name__ == "__main__":
53+
main()

0 commit comments

Comments
 (0)