File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change 13
13
from vllm .model_executor .model_loader import get_model
14
14
from vllm .model_executor .models import supports_multimodal
15
15
from vllm .model_executor .models .llama_eagle3 import Eagle3LlamaForCausalLM
16
+ from vllm .utils import is_pin_memory_available
16
17
from vllm .v1 .attention .backends .flash_attn import FlashAttentionMetadata
17
18
from vllm .v1 .attention .backends .utils import CommonAttentionMetadata
18
19
from vllm .v1 .kv_cache_interface import KVCacheConfig
@@ -281,9 +282,10 @@ def prepare_inputs(
281
282
282
283
# [q1 - n1, q2 - n2, q3 - n3] ->
283
284
# [0, q1 - n1, q1 + q2 - n1 - n2, q1 + q2 + q3 - n1 - n2 - n3]
284
- new_query_start_loc_cpu = torch .zeros (query_start_loc_cpu .shape ,
285
- dtype = torch .int32 ,
286
- pin_memory = True )
285
+ new_query_start_loc_cpu = torch .zeros (
286
+ query_start_loc_cpu .shape ,
287
+ dtype = torch .int32 ,
288
+ pin_memory = is_pin_memory_available ())
287
289
new_query_start_loc_np = new_query_start_loc_cpu .numpy ()
288
290
np .cumsum (new_num_tokens_per_req_np , out = new_query_start_loc_np [1 :])
289
291
You can’t perform that action at this time.
0 commit comments