|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +""" |
| 3 | +Demonstration script for Automatic Prefix Caching (APC) in vLLM. |
| 4 | +
|
| 5 | +Automatic Prefix Caching (APC) allows the vLLM engine to reuse cached |
| 6 | +KV (key-value) pairs from previous prompts if a new query shares the same |
| 7 | +prefix. This reduces redundant computation and improves inference speed. |
| 8 | +
|
| 9 | +To enable APC, set `enable_prefix_caching=True` when initializing the |
| 10 | +vLLM engine. |
| 11 | +
|
| 12 | +This script uses a long Markdown table as the shared prompt prefix and |
| 13 | +compares the generation time for two queries that share the same prefix |
| 14 | +but ask different questions. |
| 15 | +
|
| 16 | +Run: |
| 17 | +python examples/offline_inference/automatic_prefix_caching.py |
| 18 | +""" |
| 19 | +import time |
| 20 | + |
| 21 | +from vllm import LLM, SamplingParams |
| 22 | + |
| 23 | +# ruff: noqa: E501 |
| 24 | +# A prompt containing a large markdown table. The table is randomly generated by GPT-4. |
| 25 | +LONG_PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n" + """ |
| 26 | +| ID | Name | Age | Occupation | Country | Email | Phone Number | Address | |
| 27 | +|-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------| |
| 28 | +| 1 | John Doe | 29 | Engineer | USA | john.doe@example.com | 555-1234 | 123 Elm St, Springfield, IL | |
| 29 | +| 2 | Jane Smith | 34 | Doctor | Canada | jane.smith@example.com | 555-5678 | 456 Oak St, Toronto, ON | |
| 30 | +| 3 | Alice Johnson | 27 | Teacher | UK | alice.j@example.com | 555-8765 | 789 Pine St, London, UK | |
| 31 | +| 4 | Bob Brown | 45 | Artist | Australia | bob.b@example.com | 555-4321 | 321 Maple St, Sydney, NSW | |
| 32 | +| 5 | Carol White | 31 | Scientist | New Zealand | carol.w@example.com | 555-6789 | 654 Birch St, Wellington, NZ | |
| 33 | +| 6 | Dave Green | 28 | Lawyer | Ireland | dave.g@example.com | 555-3456 | 987 Cedar St, Dublin, IE | |
| 34 | +| 7 | Emma Black | 40 | Musician | USA | emma.b@example.com | 555-1111 | 246 Ash St, New York, NY | |
| 35 | +| 8 | Frank Blue | 37 | Chef | Canada | frank.b@example.com | 555-2222 | 135 Spruce St, Vancouver, BC | |
| 36 | +| 9 | Grace Yellow | 50 | Engineer | UK | grace.y@example.com | 555-3333 | 864 Fir St, Manchester, UK | |
| 37 | +| 10 | Henry Violet | 32 | Artist | Australia | henry.v@example.com | 555-4444 | 753 Willow St, Melbourne, VIC| |
| 38 | +| 11 | Irene Orange | 26 | Scientist | New Zealand | irene.o@example.com | 555-5555 | 912 Poplar St, Auckland, NZ | |
| 39 | +| 12 | Jack Indigo | 38 | Teacher | Ireland | jack.i@example.com | 555-6666 | 159 Elm St, Cork, IE | |
| 40 | +| 13 | Karen Red | 41 | Lawyer | USA | karen.r@example.com | 555-7777 | 357 Cedar St, Boston, MA | |
| 41 | +| 14 | Leo Brown | 30 | Chef | Canada | leo.b@example.com | 555-8888 | 246 Oak St, Calgary, AB | |
| 42 | +| 15 | Mia Green | 33 | Musician | UK | mia.g@example.com | 555-9999 | 975 Pine St, Edinburgh, UK | |
| 43 | +| 16 | Noah Yellow | 29 | Doctor | Australia | noah.y@example.com | 555-0000 | 864 Birch St, Brisbane, QLD | |
| 44 | +| 17 | Olivia Blue | 35 | Engineer | New Zealand | olivia.b@example.com | 555-1212 | 753 Maple St, Hamilton, NZ | |
| 45 | +| 18 | Peter Black | 42 | Artist | Ireland | peter.b@example.com | 555-3434 | 912 Fir St, Limerick, IE | |
| 46 | +| 19 | Quinn White | 28 | Scientist | USA | quinn.w@example.com | 555-5656 | 159 Willow St, Seattle, WA | |
| 47 | +| 20 | Rachel Red | 31 | Teacher | Canada | rachel.r@example.com | 555-7878 | 357 Poplar St, Ottawa, ON | |
| 48 | +| 21 | Steve Green | 44 | Lawyer | UK | steve.g@example.com | 555-9090 | 753 Elm St, Birmingham, UK | |
| 49 | +| 22 | Tina Blue | 36 | Musician | Australia | tina.b@example.com | 555-1213 | 864 Cedar St, Perth, WA | |
| 50 | +| 23 | Umar Black | 39 | Chef | New Zealand | umar.b@example.com | 555-3435 | 975 Spruce St, Christchurch, NZ| |
| 51 | +| 24 | Victor Yellow | 43 | Engineer | Ireland | victor.y@example.com | 555-5657 | 246 Willow St, Galway, IE | |
| 52 | +| 25 | Wendy Orange | 27 | Artist | USA | wendy.o@example.com | 555-7879 | 135 Elm St, Denver, CO | |
| 53 | +| 26 | Xavier Green | 34 | Scientist | Canada | xavier.g@example.com | 555-9091 | 357 Oak St, Montreal, QC | |
| 54 | +| 27 | Yara Red | 41 | Teacher | UK | yara.r@example.com | 555-1214 | 975 Pine St, Leeds, UK | |
| 55 | +| 28 | Zack Blue | 30 | Lawyer | Australia | zack.b@example.com | 555-3436 | 135 Birch St, Adelaide, SA | |
| 56 | +| 29 | Amy White | 33 | Musician | New Zealand | amy.w@example.com | 555-5658 | 159 Maple St, Wellington, NZ | |
| 57 | +| 30 | Ben Black | 38 | Chef | Ireland | ben.b@example.com | 555-7870 | 246 Fir St, Waterford, IE | |
| 58 | +""" |
| 59 | + |
| 60 | + |
| 61 | +def get_generation_time(llm, sampling_params, prompts): |
| 62 | + # time the generation |
| 63 | + start_time = time.time() |
| 64 | + output = llm.generate(prompts, sampling_params=sampling_params) |
| 65 | + end_time = time.time() |
| 66 | + # print the output and generation time |
| 67 | + print("-" * 30) |
| 68 | + print(f"Output: {output[0].outputs[0].text}") |
| 69 | + print(f"Generation time: {end_time - start_time} seconds.") |
| 70 | + print("-" * 30) |
| 71 | + |
| 72 | + |
| 73 | +def main(): |
| 74 | + # set enable_prefix_caching=True to enable APC |
| 75 | + llm = LLM(model='lmsys/longchat-13b-16k', enable_prefix_caching=True) |
| 76 | + |
| 77 | + sampling_params = SamplingParams(temperature=0, max_tokens=100) |
| 78 | + |
| 79 | + # Querying the age of John Doe |
| 80 | + get_generation_time( |
| 81 | + llm, |
| 82 | + sampling_params, |
| 83 | + LONG_PROMPT + |
| 84 | + "Question: what is the age of John Doe? Your answer: The age of John Doe is ", |
| 85 | + ) |
| 86 | + |
| 87 | + # Querying the age of Zack Blue |
| 88 | + # This query will be faster since vllm avoids computing the KV cache of LONG_PROMPT again. |
| 89 | + get_generation_time( |
| 90 | + llm, |
| 91 | + sampling_params, |
| 92 | + LONG_PROMPT + |
| 93 | + "Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is ", |
| 94 | + ) |
| 95 | + |
| 96 | + |
| 97 | +if __name__ == "__main__": |
| 98 | + main() |
0 commit comments