Skip to content

Commit bc52add

Browse files
address comments
Signed-off-by: Congcong Chen <congcongchen@microsoft.com>
1 parent a01a2a0 commit bc52add

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

tests/models/registry.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,8 @@ def check_available_online(
249249
trust_remote_code=True,
250250
v0_only=True),
251251
"Phi4FlashForCausalLM": _HfExamplesInfo("microsoft/Phi-4-mini-flash-reasoning", # noqa: E501
252-
trust_remote_code=True),
252+
trust_remote_code=True,
253+
v0_only=True),
253254
"PhiMoEForCausalLM": _HfExamplesInfo("microsoft/Phi-3.5-MoE-instruct",
254255
trust_remote_code=True),
255256
"Plamo2ForCausalLM": _HfExamplesInfo("pfnet/plamo-2-1b",

vllm/model_executor/models/phi4flash.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import torch.nn as nn
99
from transformers.activations import ACT2FN
1010

11+
import vllm.envs as envs
1112
from vllm.attention import Attention, AttentionMetadata, AttentionType
1213
from vllm.attention.selector import _Backend
1314
from vllm.config import CacheConfig, VllmConfig
@@ -563,7 +564,7 @@ def forward(
563564
# the kv cache since we reuse the kv cache from last layer.
564565
# If in prefill phase, we can <s>prune></s> truncate
565566
# the hidden state to save computation cost.
566-
if attn_metadata.prefill_metadata:
567+
if attn_metadata.prefill_metadata and not envs.VLLM_USE_V1:
567568
selected_token_indices = torch.cumsum(
568569
attn_metadata.seq_lens_tensor, dim=0) - 1
569570
hidden_states = hidden_states.index_select(

0 commit comments

Comments
 (0)