Skip to content

Commit 062ad97

Browse files
Fixed critical issues
Signed-off-by: nadathurv <work.vnadathur@gmail.com> Signed-off-by: Srreyansh Sethi <srreyansh.sethi@gmail.com> Co-Authored-By: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Co-Authored-By: nadathurv <218520480+nadathurv@users.noreply.github.com>
1 parent 7492b12 commit 062ad97

File tree

2 files changed

+5
-7
lines changed

2 files changed

+5
-7
lines changed

vllm/v1/core/kv_cache_coordinator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ def calculate_optimal_block_size(kv_cache_spec: dict[str, KVCacheSpec]) -> int:
391391
if not (attention_specs and mamba_specs):
392392
return attention_specs[0].block_size if attention_specs else 16
393393

394-
max_mamba_state = max(s.state_size_bytes for s in mamba_specs)
394+
max_mamba_state = max(s.page_size_bytes for s in mamba_specs)
395395
num_attention_layers = len(attention_specs)
396396
min_per_token_bytes = min(s.page_size_bytes / s.block_size for s in attention_specs)
397397

vllm/v1/core/kv_cache_utils.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -915,12 +915,10 @@ def _get_kv_cache_config_optimal_block_size(vllm_config, kv_cache_spec, availabl
915915
# Update specs with optimal size
916916
updated_specs = {}
917917
for name, spec in kv_cache_spec.items():
918-
if hasattr(spec, 'block_size'): # AttentionSpec
919-
new_spec = copy.deepcopy(spec)
920-
new_spec.block_size = optimal_block_size
921-
updated_specs[name] = new_spec
922-
else:
923-
updated_specs[name] = spec
918+
# The optimal block size is applied to all specs to ensure uniformity.
919+
new_spec = copy.deepcopy(spec)
920+
new_spec.block_size = optimal_block_size
921+
updated_specs[name] = new_spec
924922

925923
# Use existing logic
926924
return _get_kv_cache_config_uniform_page_size(vllm_config, updated_specs, available_memory)

0 commit comments

Comments
 (0)