Skip to content

Commit ccd6331

Browse files
gate pinned memory
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
1 parent ca9e6f5 commit ccd6331

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

vllm/v1/spec_decode/eagle.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from vllm.model_executor.model_loader import get_model
1414
from vllm.model_executor.models import supports_multimodal
1515
from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
16+
from vllm.utils import is_pin_memory_available
1617
from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata
1718
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
1819
from vllm.v1.kv_cache_interface import KVCacheConfig
@@ -281,9 +282,10 @@ def prepare_inputs(
281282

282283
# [q1 - n1, q2 - n2, q3 - n3] ->
283284
# [0, q1 - n1, q1 + q2 - n1 - n2, q1 + q2 + q3 - n1 - n2 - n3]
284-
new_query_start_loc_cpu = torch.zeros(query_start_loc_cpu.shape,
285-
dtype=torch.int32,
286-
pin_memory=True)
285+
new_query_start_loc_cpu = torch.zeros(
286+
query_start_loc_cpu.shape,
287+
dtype=torch.int32,
288+
pin_memory=is_pin_memory_available())
287289
new_query_start_loc_np = new_query_start_loc_cpu.numpy()
288290
np.cumsum(new_num_tokens_per_req_np, out=new_query_start_loc_np[1:])
289291

0 commit comments

Comments
 (0)