|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
| 3 | +"""Compare the with and without prefix caching on V1 scheduler or AscendScheduler.""" |
| 4 | + |
| 5 | +import os |
| 6 | + |
| 7 | +import pytest |
| 8 | + |
| 9 | +from tests.conftest import VllmRunner |
| 10 | +from tests.model_utils import check_outputs_equal |
| 11 | + |
| 12 | +MODELS = [ |
| 13 | + # for MHA |
| 14 | + "Qwen/Qwen3-8B-Base", |
| 15 | + # for MLA |
| 16 | + "deepseek-ai/DeepSeek-V2-Lite-Chat" |
| 17 | +] |
| 18 | + |
| 19 | +# A prompt containing a large markdown table. The table is randomly generated by GPT-4. |
| 20 | +LONG_PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as follows.\n# Table\n" + """ |
| 21 | +| ID | Name | Age | Occupation | Country | Email | Phone Number | Address | |
| 22 | +|-----|---------------|-----|---------------|---------------|------------------------|----------------|------------------------------| |
| 23 | +| 1 | John Doe | 29 | Engineer | USA | john.doe@example.com | 555-1234 | 123 Elm St, Springfield, IL | |
| 24 | +| 2 | Jane Smith | 34 | Doctor | Canada | jane.smith@example.com | 555-5678 | 456 Oak St, Toronto, ON | |
| 25 | +| 3 | Alice Johnson | 27 | Teacher | UK | alice.j@example.com | 555-8765 | 789 Pine St, London, UK | |
| 26 | +| 4 | Bob Brown | 45 | Artist | Australia | bob.b@example.com | 555-4321 | 321 Maple St, Sydney, NSW | |
| 27 | +| 5 | Carol White | 31 | Scientist | New Zealand | carol.w@example.com | 555-6789 | 654 Birch St, Wellington, NZ | |
| 28 | +| 6 | Dave Green | 28 | Lawyer | Ireland | dave.g@example.com | 555-3456 | 987 Cedar St, Dublin, IE | |
| 29 | +| 7 | Emma Black | 40 | Musician | USA | emma.b@example.com | 555-1111 | 246 Ash St, New York, NY | |
| 30 | +| 8 | Frank Blue | 37 | Chef | Canada | frank.b@example.com | 555-2222 | 135 Spruce St, Vancouver, BC | |
| 31 | +| 9 | Grace Yellow | 50 | Engineer | UK | grace.y@example.com | 555-3333 | 864 Fir St, Manchester, UK | |
| 32 | +| 10 | Henry Violet | 32 | Artist | Australia | henry.v@example.com | 555-4444 | 753 Willow St, Melbourne, VIC| |
| 33 | +| 11 | Irene Orange | 26 | Scientist | New Zealand | irene.o@example.com | 555-5555 | 912 Poplar St, Auckland, NZ | |
| 34 | +| 12 | Jack Indigo | 38 | Teacher | Ireland | jack.i@example.com | 555-6666 | 159 Elm St, Cork, IE | |
| 35 | +| 13 | Karen Red | 41 | Lawyer | USA | karen.r@example.com | 555-7777 | 357 Cedar St, Boston, MA | |
| 36 | +| 14 | Leo Brown | 30 | Chef | Canada | leo.b@example.com | 555-8888 | 246 Oak St, Calgary, AB | |
| 37 | +| 15 | Mia Green | 33 | Musician | UK | mia.g@example.com | 555-9999 | 975 Pine St, Edinburgh, UK | |
| 38 | +| 16 | Noah Yellow | 29 | Doctor | Australia | noah.y@example.com | 555-0000 | 864 Birch St, Brisbane, QLD | |
| 39 | +| 17 | Olivia Blue | 35 | Engineer | New Zealand | olivia.b@example.com | 555-1212 | 753 Maple St, Hamilton, NZ | |
| 40 | +| 18 | Peter Black | 42 | Artist | Ireland | peter.b@example.com | 555-3434 | 912 Fir St, Limerick, IE | |
| 41 | +| 19 | Quinn White | 28 | Scientist | USA | quinn.w@example.com | 555-5656 | 159 Willow St, Seattle, WA | |
| 42 | +| 20 | Rachel Red | 31 | Teacher | Canada | rachel.r@example.com | 555-7878 | 357 Poplar St, Ottawa, ON | |
| 43 | +| 21 | Steve Green | 44 | Lawyer | UK | steve.g@example.com | 555-9090 | 753 Elm St, Birmingham, UK | |
| 44 | +| 22 | Tina Blue | 36 | Musician | Australia | tina.b@example.com | 555-1213 | 864 Cedar St, Perth, WA | |
| 45 | +| 23 | Umar Black | 39 | Chef | New Zealand | umar.b@example.com | 555-3435 | 975 Spruce St, Christchurch, NZ| |
| 46 | +| 24 | Victor Yellow | 43 | Engineer | Ireland | victor.y@example.com | 555-5657 | 246 Willow St, Galway, IE | |
| 47 | +| 25 | Wendy Orange | 27 | Artist | USA | wendy.o@example.com | 555-7879 | 135 Elm St, Denver, CO | |
| 48 | +| 26 | Xavier Green | 34 | Scientist | Canada | xavier.g@example.com | 555-9091 | 357 Oak St, Montreal, QC | |
| 49 | +| 27 | Yara Red | 41 | Teacher | UK | yara.r@example.com | 555-1214 | 975 Pine St, Leeds, UK | |
| 50 | +| 28 | Zack Blue | 30 | Lawyer | Australia | zack.b@example.com | 555-3436 | 135 Birch St, Adelaide, SA | |
| 51 | +| 29 | Amy White | 33 | Musician | New Zealand | amy.w@example.com | 555-5658 | 159 Maple St, Wellington, NZ | |
| 52 | +| 30 | Ben Black | 38 | Chef | Ireland | ben.b@example.com | 555-7870 | 246 Fir St, Waterford, IE | |
| 53 | +""" |
| 54 | + |
| 55 | +INPUT_PROMPTS = [ |
| 56 | + LONG_PROMPT + |
| 57 | + "Question: what is the age of John Doe? Your answer: The age of John Doe is ", |
| 58 | + LONG_PROMPT + |
| 59 | + "Question: what is the age of Zack Blue? Your answer: The age of Zack Blue is " |
| 60 | +] |
| 61 | + |
| 62 | + |
| 63 | +@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0", |
| 64 | + reason="mtp is not supported on v1") |
| 65 | +@pytest.mark.parametrize("model", MODELS) |
| 66 | +@pytest.mark.parametrize("max_tokens", [50]) |
| 67 | +def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None: |
| 68 | + with VllmRunner(model, |
| 69 | + enforce_eager=True, |
| 70 | + max_model_len=2048, |
| 71 | + tensor_parallel_size=2, |
| 72 | + gpu_memory_utilization=0.7) as vllm_model: |
| 73 | + prefix_cache_output = vllm_model.generate_greedy( |
| 74 | + INPUT_PROMPTS, max_tokens) |
| 75 | + |
| 76 | + with VllmRunner(model, |
| 77 | + enable_prefix_caching=False, |
| 78 | + enforce_eager=True, |
| 79 | + max_model_len=2048, |
| 80 | + tensor_parallel_size=2, |
| 81 | + gpu_memory_utilization=0.7) as vllm_model: |
| 82 | + vllm_output = vllm_model.generate_greedy(INPUT_PROMPTS, max_tokens) |
| 83 | + |
| 84 | + check_outputs_equal( |
| 85 | + outputs_0_lst=vllm_output, |
| 86 | + outputs_1_lst=prefix_cache_output, |
| 87 | + name_0="vllm_output", |
| 88 | + name_1="prefix_cache_output", |
| 89 | + ) |
| 90 | + |
| 91 | + |
| 92 | +@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0", |
| 93 | + reason="mtp is not supported on v1") |
| 94 | +@pytest.mark.parametrize("model", MODELS) |
| 95 | +@pytest.mark.parametrize("max_tokens", [50]) |
| 96 | +def test_prefix_cache_with_ascend_scheduler(model: str, |
| 97 | + max_tokens: int) -> None: |
| 98 | + |
| 99 | + with VllmRunner(model, |
| 100 | + additional_config={ |
| 101 | + 'ascend_scheduler_config': { |
| 102 | + 'enabled': True, |
| 103 | + }, |
| 104 | + }, |
| 105 | + enforce_eager=True, |
| 106 | + max_model_len=2048, |
| 107 | + tensor_parallel_size=2, |
| 108 | + gpu_memory_utilization=0.7) as vllm_model: |
| 109 | + vllm_output = vllm_model.generate_greedy(INPUT_PROMPTS, max_tokens) |
| 110 | + |
| 111 | + with VllmRunner(model, |
| 112 | + additional_config={ |
| 113 | + 'ascend_scheduler_config': { |
| 114 | + 'enabled': True, |
| 115 | + 'enable_prefix_caching': True, |
| 116 | + }, |
| 117 | + }, |
| 118 | + enforce_eager=True, |
| 119 | + max_model_len=2048, |
| 120 | + tensor_parallel_size=2, |
| 121 | + gpu_memory_utilization=0.7) as vllm_model: |
| 122 | + prefix_cache_output = vllm_model.generate_greedy( |
| 123 | + INPUT_PROMPTS, max_tokens) |
| 124 | + |
| 125 | + with VllmRunner(model, |
| 126 | + additional_config={ |
| 127 | + 'ascend_scheduler_config': { |
| 128 | + 'enabled': True, |
| 129 | + 'enable_prefix_caching': True, |
| 130 | + "enable_chunked_prefill": True, |
| 131 | + }, |
| 132 | + }, |
| 133 | + enforce_eager=True, |
| 134 | + max_model_len=2048, |
| 135 | + tensor_parallel_size=2, |
| 136 | + gpu_memory_utilization=0.7) as vllm_model: |
| 137 | + chunk_prefill_prefix_cache_output = vllm_model.generate_greedy( |
| 138 | + INPUT_PROMPTS, max_tokens) |
| 139 | + |
| 140 | + check_outputs_equal( |
| 141 | + outputs_0_lst=vllm_output, |
| 142 | + outputs_1_lst=prefix_cache_output, |
| 143 | + name_0="vllm_output", |
| 144 | + name_1="prefix_cache_output", |
| 145 | + ) |
| 146 | + |
| 147 | + check_outputs_equal( |
| 148 | + outputs_0_lst=chunk_prefill_prefix_cache_output, |
| 149 | + outputs_1_lst=prefix_cache_output, |
| 150 | + name_0="chunk_prefill_prefix_cache_output", |
| 151 | + name_1="prefix_cache_output", |
| 152 | + ) |
0 commit comments