File tree Expand file tree Collapse file tree 3 files changed +45
-3
lines changed Expand file tree Collapse file tree 3 files changed +45
-3
lines changed Original file line number Diff line number Diff line change @@ -59,6 +59,25 @@ def test_compilation_config():
59
59
assert args .compilation_config .level == 3
60
60
61
61
62
+ def test_prefix_cache_default ():
63
+ parser = EngineArgs .add_cli_args (FlexibleArgumentParser ())
64
+ args = parser .parse_args ([])
65
+
66
+ engine_args = EngineArgs .from_cli_args (args = args )
67
+ assert (not engine_args .enable_prefix_caching
68
+ ), "prefix caching defaults to off."
69
+
70
+ # with flag to turn it on.
71
+ args = parser .parse_args (["--enable-prefix-caching" ])
72
+ engine_args = EngineArgs .from_cli_args (args = args )
73
+ assert engine_args .enable_prefix_caching
74
+
75
+ # with disable flag to turn it off.
76
+ args = parser .parse_args (["--no-enable-prefix-caching" ])
77
+ engine_args = EngineArgs .from_cli_args (args = args )
78
+ assert not engine_args .enable_prefix_caching
79
+
80
+
62
81
def test_valid_pooling_config ():
63
82
parser = EngineArgs .add_cli_args (FlexibleArgumentParser ())
64
83
args = parser .parse_args ([
Original file line number Diff line number Diff line change 4
4
from vllm .config import VllmConfig
5
5
from vllm .engine .arg_utils import EngineArgs
6
6
from vllm .usage .usage_lib import UsageContext
7
+ from vllm .utils import FlexibleArgumentParser
7
8
8
9
if not envs .VLLM_USE_V1 :
9
10
pytest .skip (
12
13
)
13
14
14
15
16
+ def test_prefix_caching_from_cli ():
17
+ parser = EngineArgs .add_cli_args (FlexibleArgumentParser ())
18
+ args = parser .parse_args ([])
19
+ engine_args = EngineArgs .from_cli_args (args = args )
20
+ assert (engine_args .enable_prefix_caching
21
+ ), "V1 turns on prefix caching by default."
22
+
23
+ # Turn it off possible with flag.
24
+ args = parser .parse_args (["--no-enable-prefix-caching" ])
25
+ engine_args = EngineArgs .from_cli_args (args = args )
26
+ assert not engine_args .enable_prefix_caching
27
+
28
+ # Turn it on with flag.
29
+ args = parser .parse_args (["--enable-prefix-caching" ])
30
+ engine_args = EngineArgs .from_cli_args (args = args )
31
+ assert engine_args .enable_prefix_caching
32
+
33
+
15
34
def test_defaults ():
16
35
engine_args = EngineArgs (model = "facebook/opt-125m" )
17
36
Original file line number Diff line number Diff line change @@ -416,9 +416,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
416
416
'tokens. This is ignored on neuron devices and '
417
417
'set to max-model-len' )
418
418
419
- parser .add_argument ('--enable-prefix-caching' ,
420
- action = 'store_true' ,
421
- help = 'Enables automatic prefix caching.' )
419
+ parser .add_argument (
420
+ "--enable-prefix-caching" ,
421
+ action = argparse .BooleanOptionalAction ,
422
+ default = EngineArgs .enable_prefix_caching ,
423
+ help = "Enables automatic prefix caching. "
424
+ "Use --no-enable-prefix-caching to disable explicitly." ,
425
+ )
422
426
parser .add_argument ('--disable-sliding-window' ,
423
427
action = 'store_true' ,
424
428
help = 'Disables sliding window, '
You can’t perform that action at this time.
0 commit comments