Skip to content

Commit ce52ae4

Browse files
reidliu41Yuqi Zhang
authored andcommitted
[Misc] improve Automatic Prefix Caching example (vllm-project#18554)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com> Signed-off-by: Yuqi Zhang <yuqizhang@google.com>
1 parent 18e2855 commit ce52ae4

File tree

2 files changed

+99
-75
lines changed

2 files changed

+99
-75
lines changed

docs/source/features/automatic_prefix_caching.md

Lines changed: 1 addition & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -14,81 +14,7 @@ Technical details on how vLLM implements APC can be found [here](#design-automat
1414

1515
Set `enable_prefix_caching=True` in vLLM engine to enable APC. Here is an example:
1616

17-
```python
18-
import time
19-
from vllm import LLM, SamplingParams
20-
21-
22-
# A prompt containing a large markdown table. The table is randomly generated by GPT-4.
23-
LONG_PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n" + """
24-
| ID | Name | Age | Occupation | Country | Email | Phone Number | Address |
25-
|-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------|
26-
| 1 | John Doe | 29 | Engineer | USA | john.doe@example.com | 555-1234 | 123 Elm St, Springfield, IL |
27-
| 2 | Jane Smith | 34 | Doctor | Canada | jane.smith@example.com | 555-5678 | 456 Oak St, Toronto, ON |
28-
| 3 | Alice Johnson | 27 | Teacher | UK | alice.j@example.com | 555-8765 | 789 Pine St, London, UK |
29-
| 4 | Bob Brown | 45 | Artist | Australia | bob.b@example.com | 555-4321 | 321 Maple St, Sydney, NSW |
30-
| 5 | Carol White | 31 | Scientist | New Zealand | carol.w@example.com | 555-6789 | 654 Birch St, Wellington, NZ |
31-
| 6 | Dave Green | 28 | Lawyer | Ireland | dave.g@example.com | 555-3456 | 987 Cedar St, Dublin, IE |
32-
| 7 | Emma Black | 40 | Musician | USA | emma.b@example.com | 555-1111 | 246 Ash St, New York, NY |
33-
| 8 | Frank Blue | 37 | Chef | Canada | frank.b@example.com | 555-2222 | 135 Spruce St, Vancouver, BC |
34-
| 9 | Grace Yellow | 50 | Engineer | UK | grace.y@example.com | 555-3333 | 864 Fir St, Manchester, UK |
35-
| 10 | Henry Violet | 32 | Artist | Australia | henry.v@example.com | 555-4444 | 753 Willow St, Melbourne, VIC|
36-
| 11 | Irene Orange | 26 | Scientist | New Zealand | irene.o@example.com | 555-5555 | 912 Poplar St, Auckland, NZ |
37-
| 12 | Jack Indigo | 38 | Teacher | Ireland | jack.i@example.com | 555-6666 | 159 Elm St, Cork, IE |
38-
| 13 | Karen Red | 41 | Lawyer | USA | karen.r@example.com | 555-7777 | 357 Cedar St, Boston, MA |
39-
| 14 | Leo Brown | 30 | Chef | Canada | leo.b@example.com | 555-8888 | 246 Oak St, Calgary, AB |
40-
| 15 | Mia Green | 33 | Musician | UK | mia.g@example.com | 555-9999 | 975 Pine St, Edinburgh, UK |
41-
| 16 | Noah Yellow | 29 | Doctor | Australia | noah.y@example.com | 555-0000 | 864 Birch St, Brisbane, QLD |
42-
| 17 | Olivia Blue | 35 | Engineer | New Zealand | olivia.b@example.com | 555-1212 | 753 Maple St, Hamilton, NZ |
43-
| 18 | Peter Black | 42 | Artist | Ireland | peter.b@example.com | 555-3434 | 912 Fir St, Limerick, IE |
44-
| 19 | Quinn White | 28 | Scientist | USA | quinn.w@example.com | 555-5656 | 159 Willow St, Seattle, WA |
45-
| 20 | Rachel Red | 31 | Teacher | Canada | rachel.r@example.com | 555-7878 | 357 Poplar St, Ottawa, ON |
46-
| 21 | Steve Green | 44 | Lawyer | UK | steve.g@example.com | 555-9090 | 753 Elm St, Birmingham, UK |
47-
| 22 | Tina Blue | 36 | Musician | Australia | tina.b@example.com | 555-1213 | 864 Cedar St, Perth, WA |
48-
| 23 | Umar Black | 39 | Chef | New Zealand | umar.b@example.com | 555-3435 | 975 Spruce St, Christchurch, NZ|
49-
| 24 | Victor Yellow | 43 | Engineer | Ireland | victor.y@example.com | 555-5657 | 246 Willow St, Galway, IE |
50-
| 25 | Wendy Orange | 27 | Artist | USA | wendy.o@example.com | 555-7879 | 135 Elm St, Denver, CO |
51-
| 26 | Xavier Green | 34 | Scientist | Canada | xavier.g@example.com | 555-9091 | 357 Oak St, Montreal, QC |
52-
| 27 | Yara Red | 41 | Teacher | UK | yara.r@example.com | 555-1214 | 975 Pine St, Leeds, UK |
53-
| 28 | Zack Blue | 30 | Lawyer | Australia | zack.b@example.com | 555-3436 | 135 Birch St, Adelaide, SA |
54-
| 29 | Amy White | 33 | Musician | New Zealand | amy.w@example.com | 555-5658 | 159 Maple St, Wellington, NZ |
55-
| 30 | Ben Black | 38 | Chef | Ireland | ben.b@example.com | 555-7870 | 246 Fir St, Waterford, IE |
56-
"""
57-
58-
59-
def get_generation_time(llm, sampling_params, prompts):
60-
# time the generation
61-
start_time = time.time()
62-
output = llm.generate(prompts, sampling_params=sampling_params)
63-
end_time = time.time()
64-
# print the output and generation time
65-
print(f"Output: {output[0].outputs[0].text}")
66-
print(f"Generation time: {end_time - start_time} seconds.")
67-
68-
69-
# set enable_prefix_caching=True to enable APC
70-
llm = LLM(
71-
model='lmsys/longchat-13b-16k',
72-
enable_prefix_caching=True
73-
)
74-
75-
sampling_params = SamplingParams(temperature=0, max_tokens=100)
76-
77-
# Querying the age of John Doe
78-
get_generation_time(
79-
llm,
80-
sampling_params,
81-
LONG_PROMPT + "Question: what is the age of John Doe? Your answer: The age of John Doe is ",
82-
)
83-
84-
# Querying the age of Zack Blue
85-
# This query will be faster since vllm avoids computing the KV cache of LONG_PROMPT again.
86-
get_generation_time(
87-
llm,
88-
sampling_params,
89-
LONG_PROMPT + "Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is ",
90-
)
91-
```
17+
<gh-file:examples/offline_inference/automatic_prefix_caching.py>
9218

9319
## Example workloads
9420

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
"""
3+
Demonstration script for Automatic Prefix Caching (APC) in vLLM.
4+
5+
Automatic Prefix Caching (APC) allows the vLLM engine to reuse cached
6+
KV (key-value) pairs from previous prompts if a new query shares the same
7+
prefix. This reduces redundant computation and improves inference speed.
8+
9+
To enable APC, set `enable_prefix_caching=True` when initializing the
10+
vLLM engine.
11+
12+
This script uses a long Markdown table as the shared prompt prefix and
13+
compares the generation time for two queries that share the same prefix
14+
but ask different questions.
15+
16+
Run:
17+
python examples/offline_inference/automatic_prefix_caching.py
18+
"""
19+
import time
20+
21+
from vllm import LLM, SamplingParams
22+
23+
# ruff: noqa: E501
24+
# A prompt containing a large markdown table. The table is randomly generated by GPT-4.
25+
LONG_PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n" + """
26+
| ID | Name | Age | Occupation | Country | Email | Phone Number | Address |
27+
|-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------|
28+
| 1 | John Doe | 29 | Engineer | USA | john.doe@example.com | 555-1234 | 123 Elm St, Springfield, IL |
29+
| 2 | Jane Smith | 34 | Doctor | Canada | jane.smith@example.com | 555-5678 | 456 Oak St, Toronto, ON |
30+
| 3 | Alice Johnson | 27 | Teacher | UK | alice.j@example.com | 555-8765 | 789 Pine St, London, UK |
31+
| 4 | Bob Brown | 45 | Artist | Australia | bob.b@example.com | 555-4321 | 321 Maple St, Sydney, NSW |
32+
| 5 | Carol White | 31 | Scientist | New Zealand | carol.w@example.com | 555-6789 | 654 Birch St, Wellington, NZ |
33+
| 6 | Dave Green | 28 | Lawyer | Ireland | dave.g@example.com | 555-3456 | 987 Cedar St, Dublin, IE |
34+
| 7 | Emma Black | 40 | Musician | USA | emma.b@example.com | 555-1111 | 246 Ash St, New York, NY |
35+
| 8 | Frank Blue | 37 | Chef | Canada | frank.b@example.com | 555-2222 | 135 Spruce St, Vancouver, BC |
36+
| 9 | Grace Yellow | 50 | Engineer | UK | grace.y@example.com | 555-3333 | 864 Fir St, Manchester, UK |
37+
| 10 | Henry Violet | 32 | Artist | Australia | henry.v@example.com | 555-4444 | 753 Willow St, Melbourne, VIC|
38+
| 11 | Irene Orange | 26 | Scientist | New Zealand | irene.o@example.com | 555-5555 | 912 Poplar St, Auckland, NZ |
39+
| 12 | Jack Indigo | 38 | Teacher | Ireland | jack.i@example.com | 555-6666 | 159 Elm St, Cork, IE |
40+
| 13 | Karen Red | 41 | Lawyer | USA | karen.r@example.com | 555-7777 | 357 Cedar St, Boston, MA |
41+
| 14 | Leo Brown | 30 | Chef | Canada | leo.b@example.com | 555-8888 | 246 Oak St, Calgary, AB |
42+
| 15 | Mia Green | 33 | Musician | UK | mia.g@example.com | 555-9999 | 975 Pine St, Edinburgh, UK |
43+
| 16 | Noah Yellow | 29 | Doctor | Australia | noah.y@example.com | 555-0000 | 864 Birch St, Brisbane, QLD |
44+
| 17 | Olivia Blue | 35 | Engineer | New Zealand | olivia.b@example.com | 555-1212 | 753 Maple St, Hamilton, NZ |
45+
| 18 | Peter Black | 42 | Artist | Ireland | peter.b@example.com | 555-3434 | 912 Fir St, Limerick, IE |
46+
| 19 | Quinn White | 28 | Scientist | USA | quinn.w@example.com | 555-5656 | 159 Willow St, Seattle, WA |
47+
| 20 | Rachel Red | 31 | Teacher | Canada | rachel.r@example.com | 555-7878 | 357 Poplar St, Ottawa, ON |
48+
| 21 | Steve Green | 44 | Lawyer | UK | steve.g@example.com | 555-9090 | 753 Elm St, Birmingham, UK |
49+
| 22 | Tina Blue | 36 | Musician | Australia | tina.b@example.com | 555-1213 | 864 Cedar St, Perth, WA |
50+
| 23 | Umar Black | 39 | Chef | New Zealand | umar.b@example.com | 555-3435 | 975 Spruce St, Christchurch, NZ|
51+
| 24 | Victor Yellow | 43 | Engineer | Ireland | victor.y@example.com | 555-5657 | 246 Willow St, Galway, IE |
52+
| 25 | Wendy Orange | 27 | Artist | USA | wendy.o@example.com | 555-7879 | 135 Elm St, Denver, CO |
53+
| 26 | Xavier Green | 34 | Scientist | Canada | xavier.g@example.com | 555-9091 | 357 Oak St, Montreal, QC |
54+
| 27 | Yara Red | 41 | Teacher | UK | yara.r@example.com | 555-1214 | 975 Pine St, Leeds, UK |
55+
| 28 | Zack Blue | 30 | Lawyer | Australia | zack.b@example.com | 555-3436 | 135 Birch St, Adelaide, SA |
56+
| 29 | Amy White | 33 | Musician | New Zealand | amy.w@example.com | 555-5658 | 159 Maple St, Wellington, NZ |
57+
| 30 | Ben Black | 38 | Chef | Ireland | ben.b@example.com | 555-7870 | 246 Fir St, Waterford, IE |
58+
"""
59+
60+
61+
def get_generation_time(llm, sampling_params, prompts):
62+
# time the generation
63+
start_time = time.time()
64+
output = llm.generate(prompts, sampling_params=sampling_params)
65+
end_time = time.time()
66+
# print the output and generation time
67+
print("-" * 30)
68+
print(f"Output: {output[0].outputs[0].text}")
69+
print(f"Generation time: {end_time - start_time} seconds.")
70+
print("-" * 30)
71+
72+
73+
def main():
74+
# set enable_prefix_caching=True to enable APC
75+
llm = LLM(model='lmsys/longchat-13b-16k', enable_prefix_caching=True)
76+
77+
sampling_params = SamplingParams(temperature=0, max_tokens=100)
78+
79+
# Querying the age of John Doe
80+
get_generation_time(
81+
llm,
82+
sampling_params,
83+
LONG_PROMPT +
84+
"Question: what is the age of John Doe? Your answer: The age of John Doe is ",
85+
)
86+
87+
# Querying the age of Zack Blue
88+
# This query will be faster since vllm avoids computing the KV cache of LONG_PROMPT again.
89+
get_generation_time(
90+
llm,
91+
sampling_params,
92+
LONG_PROMPT +
93+
"Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is ",
94+
)
95+
96+
97+
if __name__ == "__main__":
98+
main()

0 commit comments

Comments
 (0)