Skip to content

Commit 0923e34

Browse files
Disable prefix caching when model is attention free
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
1 parent 673aeb0 commit 0923e34

File tree

3 files changed

+11
-2
lines changed

3 files changed

+11
-2
lines changed

vllm/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4722,6 +4722,15 @@ def __post_init__(self):
47224722
if self.cache_config is not None:
47234723
self.cache_config.enable_prefix_caching = False
47244724

4725+
if self.model_config.is_attention_free:
4726+
# If the model is not of pooling type and it is attention free,
4727+
# we make sure prefix_caching is disabled so that the correct
4728+
# KVCacheCoordinator is loaded during initialization.
4729+
if self.cache_config is not None:
4730+
logger.info("This is an attention free model, "
4731+
"disabling prefix caching.")
4732+
self.cache_config.enable_prefix_caching = False
4733+
47254734
if (self.kv_events_config is not None
47264735
and self.kv_events_config.enable_kv_cache_events
47274736
and not self.cache_config.enable_prefix_caching):

vllm/v1/core/kv_cache_coordinator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ def get_kv_cache_coordinator(
388388
kv_cache_config: KVCacheConfig, max_model_len: int, use_eagle: bool,
389389
enable_caching: bool, caching_hash_fn: Callable,
390390
enable_kv_cache_events: bool) -> KVCacheCoordinator:
391-
if not enable_caching or len(kv_cache_config.kv_cache_groups) == 0:
391+
if not enable_caching:
392392
# We instantiate this coordinator also for attention free models that
393393
# have 0 kv_cache_groups
394394
return KVCacheCoordinatorNoPrefixCache(kv_cache_config, max_model_len,

vllm/v1/core/kv_cache_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def __init__(
8989
self.prefix_cache_stats = PrefixCacheStats() if log_stats else None
9090

9191
self.block_size: Optional[int] = None
92-
if self.enable_caching and len(kv_cache_config.kv_cache_groups) > 0:
92+
if self.enable_caching:
9393
assert len(
9494
set(g.kv_cache_spec.block_size
9595
for g in kv_cache_config.kv_cache_groups)

0 commit comments

Comments
 (0)