Skip to content

Commit 9ae5d95

Browse files
address comments
Signed-off-by: Congcong Chen <congcongchen@microsoft.com>
1 parent 11e30fe commit 9ae5d95

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

tests/models/registry.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ def check_available_online(
247247
trust_remote_code=True,
248248
v0_only=True),
249249
"Phi4FlashForCausalLM": _HfExamplesInfo("microsoft/Phi-4-mini-flash-reasoning", # noqa: E501
250-
trust_remote_code=True),
250+
trust_remote_code=True,
251+
v0_only=True),
251252
"PhiMoEForCausalLM": _HfExamplesInfo("microsoft/Phi-3.5-MoE-instruct",
252253
trust_remote_code=True),
253254
"Plamo2ForCausalLM": _HfExamplesInfo("pfnet/plamo-2-1b",

vllm/model_executor/models/phi4flash.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import torch.nn as nn
99
from transformers.activations import ACT2FN
1010

11+
import vllm.envs as envs
1112
from vllm.attention import Attention, AttentionMetadata, AttentionType
1213
from vllm.attention.selector import _Backend
1314
from vllm.config import CacheConfig, VllmConfig
@@ -563,7 +564,7 @@ def forward(
563564
# the kv cache since we reuse the kv cache from last layer.
564565
# If in prefill phase, we can <s>prune></s> truncate
565566
# the hidden state to save computation cost.
566-
if attn_metadata.prefill_metadata:
567+
if attn_metadata.prefill_metadata and not envs.VLLM_USE_V1:
567568
selected_token_indices = torch.cumsum(
568569
attn_metadata.seq_lens_tensor, dim=0) - 1
569570
hidden_states = hidden_states.index_select(

0 commit comments

Comments
 (0)