renames

congcongchen123 · congcongchen123 · commit bd81a61165df · 2025-07-09T23:38:29.000Z
Signed-off-by: Congcong Chen &lt;congcongchen@microsoft.com&gt;
diff --git a/vllm/model_executor/models/phi4flash.py b/vllm/model_executor/models/phi4flash.py
@@ -45,7 +45,6 @@
 class SwiGLUActivation(nn.Module):
 
     def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
-        # print(f"x1 shape: {x1.shape}, x2 shape: {x2.shape}")
         return x1 * nn.functional.silu(x2)
     
 
@@ -175,7 +174,7 @@ def forward(
         return self.out_proj(attn_output)
 
 
-class Phi3Mamba(nn.Module):
+class Phi4Mamba(nn.Module):
     def __init__(
         self,
         d_model,
@@ -250,15 +249,6 @@ def __init__(
                                             params_dtype=dtype,
                                         )
 
-        # # S4D real initialization
-        # A = repeat(
-        #     torch.arange(1, self.d_state + 1, dtype=torch.float32),
-        #     "n -> d n",
-        #     d=self.d_inner,
-        # ).contiguous()
-        # A_log = torch.log(A)  # Keep A_log in fp32
-        # self.A_log = nn.Parameter(A_log)
-
         # # D "skip" parameter
         # self.D = nn.Parameter(torch.ones(self.d_inner))  # Keep in fp32
         self.A = nn.Parameter(
@@ -417,7 +407,7 @@ def __init__(self,
         self.use_mamba = config.mb_per_layer > 0 and layer_idx % config.mb_per_layer == 0
         if self.use_mamba:
             factory_kwargs = {"dtype": None}
-            self.attn = Phi3Mamba(config.hidden_size, layer_idx=layer_idx, 
+            self.attn = Phi4Mamba(config.hidden_size, layer_idx=layer_idx, 
                                   yoco_cross=self.yoco_cross, yoco_kv=self.yoco_mb, **factory_kwargs)
         else:
             self.attn = SambaYAttention(config, layer_idx=layer_idx, yoco_cross=self.yoco_cross, cache_config=cache_config, prefix=f"{prefix}.self_attn")
@@ -590,7 +580,7 @@ def forward(
         return hidden_states
 
 
-class Phi4MiniFlashForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsV0Only):
+class Phi4FlashForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsV0Only):
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         config = vllm_config.model_config.hf_config
@@ -603,7 +593,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         # Prefix caching is not supported since there are mamba layers in this 
         # mode.
         assert not cache_config.enable_prefix_caching, \
-            "SambaY currently does not support prefix caching"
+            "Phi4flash currently does not support prefix caching"
 
         super().__init__()
         self.config = config
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
@@ -110,7 +110,7 @@
     "Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"),
     "Phi3SmallForCausalLM": ("phi3_small", "Phi3SmallForCausalLM"),
     "PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"),
-    "Phi4MiniFlashForCausalLM": ("phi4sambay", "Phi4MiniFlashForCausalLM"),
+    "Phi4FlashForCausalLM": ("phi4flash", "Phi4FlashForCausalLM"),
     "Plamo2ForCausalLM": ("plamo2", "Plamo2ForCausalLM"),
     "QWenLMHeadModel": ("qwen", "QWenLMHeadModel"),
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),