@@ -1623,7 +1623,7 @@ def _dummy_run(
1623
1623
attn_metadata = None
1624
1624
1625
1625
1626
- if not is_compile and not is_profile_run and self .dynamic_eplb :
1626
+ if not is_torchair_compile and not is_profile_run and self .dynamic_eplb :
1627
1627
self .eplb_updator .forward_before ()
1628
1628
1629
1629
with self .maybe_dummy_run_with_lora (self .lora_config ,
@@ -1705,7 +1705,7 @@ def _dummy_run(
1705
1705
self .drafter .dummy_run (num_reqs , with_prefill = with_prefill )
1706
1706
if is_profile_run and self .dynamic_eplb :
1707
1707
self .model .clear_all_moe_loads ()
1708
- if not is_compile and not is_profile_run and self .dynamic_eplb :
1708
+ if not is_torchair_compile and not is_profile_run and self .dynamic_eplb :
1709
1709
self .eplb_updator .forward_end ()
1710
1710
return hidden_states
1711
1711
@@ -1868,14 +1868,13 @@ def align_memory(tensor: torch.Tensor, alignment: int) -> torch.Tensor:
1868
1868
block_sizes = [self .cache_config .block_size ],
1869
1869
)
1870
1870
1871
- if not vllm_version_is ("0.9.0" ):
1872
- kv_cache_sizes = {}
1873
- for kv_cache_tensor in kv_cache_config .kv_cache_tensors :
1874
- assert len (kv_cache_tensor .shared_by ) == 1 , (
1875
- "KV cache tensor shared by multiple layers is not supported in "
1876
- "NPU." )
1877
- kv_cache_sizes [
1878
- kv_cache_tensor .shared_by [0 ]] = kv_cache_tensor .size
1871
+ kv_cache_sizes = {}
1872
+ for kv_cache_tensor in kv_cache_config .kv_cache_tensors :
1873
+ assert len (kv_cache_tensor .shared_by ) == 1 , (
1874
+ "KV cache tensor shared by multiple layers is not supported in "
1875
+ "NPU." )
1876
+ kv_cache_sizes [
1877
+ kv_cache_tensor .shared_by [0 ]] = kv_cache_tensor .size
1879
1878
1880
1879
for kv_cache_group in kv_cache_config .kv_cache_groups :
1881
1880
kv_cache_spec = kv_cache_group .kv_cache_spec
0 commit comments