Skip to content

Commit d9b4b3f

Browse files
authored
[Bug][CLI] Allow users to disable prefix caching explicitly (#10724)
Signed-off-by: rickyx <rickyx@anyscale.com>
1 parent 278be67 commit d9b4b3f

File tree

3 files changed

+45
-3
lines changed

3 files changed

+45
-3
lines changed

tests/engine/test_arg_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,25 @@ def test_compilation_config():
5959
assert args.compilation_config.level == 3
6060

6161

62+
def test_prefix_cache_default():
63+
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
64+
args = parser.parse_args([])
65+
66+
engine_args = EngineArgs.from_cli_args(args=args)
67+
assert (not engine_args.enable_prefix_caching
68+
), "prefix caching defaults to off."
69+
70+
# with flag to turn it on.
71+
args = parser.parse_args(["--enable-prefix-caching"])
72+
engine_args = EngineArgs.from_cli_args(args=args)
73+
assert engine_args.enable_prefix_caching
74+
75+
# with disable flag to turn it off.
76+
args = parser.parse_args(["--no-enable-prefix-caching"])
77+
engine_args = EngineArgs.from_cli_args(args=args)
78+
assert not engine_args.enable_prefix_caching
79+
80+
6281
def test_valid_pooling_config():
6382
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
6483
args = parser.parse_args([

tests/v1/engine/test_engine_args.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from vllm.config import VllmConfig
55
from vllm.engine.arg_utils import EngineArgs
66
from vllm.usage.usage_lib import UsageContext
7+
from vllm.utils import FlexibleArgumentParser
78

89
if not envs.VLLM_USE_V1:
910
pytest.skip(
@@ -12,6 +13,24 @@
1213
)
1314

1415

16+
def test_prefix_caching_from_cli():
17+
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
18+
args = parser.parse_args([])
19+
engine_args = EngineArgs.from_cli_args(args=args)
20+
assert (engine_args.enable_prefix_caching
21+
), "V1 turns on prefix caching by default."
22+
23+
# Turn it off possible with flag.
24+
args = parser.parse_args(["--no-enable-prefix-caching"])
25+
engine_args = EngineArgs.from_cli_args(args=args)
26+
assert not engine_args.enable_prefix_caching
27+
28+
# Turn it on with flag.
29+
args = parser.parse_args(["--enable-prefix-caching"])
30+
engine_args = EngineArgs.from_cli_args(args=args)
31+
assert engine_args.enable_prefix_caching
32+
33+
1534
def test_defaults():
1635
engine_args = EngineArgs(model="facebook/opt-125m")
1736

vllm/engine/arg_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,9 +416,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
416416
'tokens. This is ignored on neuron devices and '
417417
'set to max-model-len')
418418

419-
parser.add_argument('--enable-prefix-caching',
420-
action='store_true',
421-
help='Enables automatic prefix caching.')
419+
parser.add_argument(
420+
"--enable-prefix-caching",
421+
action=argparse.BooleanOptionalAction,
422+
default=EngineArgs.enable_prefix_caching,
423+
help="Enables automatic prefix caching. "
424+
"Use --no-enable-prefix-caching to disable explicitly.",
425+
)
422426
parser.add_argument('--disable-sliding-window',
423427
action='store_true',
424428
help='Disables sliding window, '

0 commit comments

Comments
 (0)