Fixed critical issues

nadathurv · WorldExplored · nadathurv · commit 062ad97d93d6 · 2025-07-16T11:17:05.000-07:00
Signed-off-by: nadathurv &lt;work.vnadathur@gmail.com&gt;
Signed-off-by: Srreyansh Sethi &lt;srreyansh.sethi@gmail.com&gt;
Co-Authored-By: Srreyansh Sethi &lt;107075589+WorldExplored@users.noreply.github.com&gt;
Co-Authored-By: nadathurv &lt;218520480+nadathurv@users.noreply.github.com&gt;
diff --git a/vllm/v1/core/kv_cache_coordinator.py b/vllm/v1/core/kv_cache_coordinator.py
@@ -391,7 +391,7 @@ def calculate_optimal_block_size(kv_cache_spec: dict[str, KVCacheSpec]) -> int:
         if not (attention_specs and mamba_specs):
             return attention_specs[0].block_size if attention_specs else 16
         
-        max_mamba_state = max(s.state_size_bytes for s in mamba_specs)
+        max_mamba_state = max(s.page_size_bytes for s in mamba_specs)
         num_attention_layers = len(attention_specs)
         min_per_token_bytes = min(s.page_size_bytes / s.block_size for s in attention_specs)
         
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
@@ -915,12 +915,10 @@ def _get_kv_cache_config_optimal_block_size(vllm_config, kv_cache_spec, availabl
     # Update specs with optimal size
     updated_specs = {}
     for name, spec in kv_cache_spec.items():
-        if hasattr(spec, 'block_size'):  # AttentionSpec
-            new_spec = copy.deepcopy(spec)
-            new_spec.block_size = optimal_block_size
-            updated_specs[name] = new_spec
-        else:
-            updated_specs[name] = spec
+        # The optimal block size is applied to all specs to ensure uniformity.
+        new_spec = copy.deepcopy(spec)
+        new_spec.block_size = optimal_block_size
+        updated_specs[name] = new_spec
     
     # Use existing logic
     return _get_kv_cache_config_uniform_page_size(vllm_config, updated_specs, available_memory)