renaming

congcongchen123 · congcongchen123 · commit 8d6c4c4b8450 · 2025-07-09T23:38:29.000Z
Signed-off-by: Congcong Chen &lt;congcongchen@microsoft.com&gt;
diff --git a/vllm/model_executor/models/phi4sambay.py b/vllm/model_executor/models/phi4sambay.py
@@ -49,7 +49,7 @@ def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
         return x1 * nn.functional.silu(x2)
     
 
-class SambaMLP(nn.Module):
+class SambaYMLP(nn.Module):
     """Gated Linear Unit.
 
     Reference:
@@ -78,7 +78,7 @@ def get_virtual_engine():
     forward_context: ForwardContext = get_forward_context()
     return forward_context.virtual_engine
 
-class SambaAttention(nn.Module):
+class SambaYAttention(nn.Module):
     def __init__(self, 
                  config, 
                  layer_idx: Optional[int] = None, 
@@ -391,7 +391,7 @@ def forward(
         return contextualized_states, yoco_key_values
 
 
-class SambaDecoderLayer(nn.Module):
+class SambaYDecoderLayer(nn.Module):
     
     def __init__(self, 
                  config, 
@@ -403,13 +403,13 @@ def __init__(self,
         self.config = config
         self.layer_idx = layer_idx
 
-        self.mlp = SambaMLP(config)
+        self.mlp = SambaYMLP(config)
         self.input_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
         
         self.yoco_mb = False
         self.yoco_kv = False
         self.yoco_cross = False
-        assert config.num_hidden_layers % 4 == 0, 'n_layer should be divisible by 4 for samba + yoco'
+        assert config.num_hidden_layers % 4 == 0, 'n_layer should be divisible by 4 for SambaY + yoco'
         if layer_idx >= config.num_hidden_layers//2:
             self.yoco_mb = True
             self.yoco_kv = (layer_idx >= (config.num_hidden_layers//2 +1))
@@ -420,7 +420,7 @@ def __init__(self,
             self.attn = Phi3Mamba(config.hidden_size, layer_idx=layer_idx, 
                                   yoco_cross=self.yoco_cross, yoco_kv=self.yoco_mb, **factory_kwargs)
         else:
-            self.attn = SambaAttention(config, layer_idx=layer_idx, yoco_cross=self.yoco_cross, cache_config=cache_config, prefix=f"{prefix}.self_attn")
+            self.attn = SambaYAttention(config, layer_idx=layer_idx, yoco_cross=self.yoco_cross, cache_config=cache_config, prefix=f"{prefix}.self_attn")
         self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
 
     def forward(
@@ -469,7 +469,7 @@ def get_kv_cache(layer_name):
     kv_cache = self.kv_cache[forward_context.virtual_engine]
     return kv_cache
 
-class SambaModel(nn.Module):
+class SambaYModel(nn.Module):
 
     def __init__(
         self,
@@ -494,7 +494,7 @@ def __init__(
         
         self.start_layer, self.end_layer, self.layers = make_layers(
             config.num_hidden_layers,
-            lambda prefix: SambaDecoderLayer(config,
+            lambda prefix: SambaYDecoderLayer(config,
                                              int(prefix.split('.')[-1]),
                                              cache_config,
                                              prefix=prefix),
@@ -590,7 +590,7 @@ def forward(
         return hidden_states
 
 
-class SambaForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsV0Only):
+class Phi4MiniFlashForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsV0Only):
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         config = vllm_config.model_config.hf_config
@@ -603,13 +603,13 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         # Prefix caching is not supported since there are mamba layers in this 
         # mode.
         assert not cache_config.enable_prefix_caching, \
-            "Samba currently does not support prefix caching"
+            "SambaY currently does not support prefix caching"
 
         super().__init__()
         self.config = config
         self.model_config = vllm_config.model_config
         self.scheduler_config = scheduler_config
-        self.model = SambaModel(
+        self.model = SambaYModel(
             config, 
             cache_config=cache_config,
             prefix=maybe_prefix(prefix, "model")
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
@@ -110,7 +110,7 @@
     "Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"),
     "Phi3SmallForCausalLM": ("phi3_small", "Phi3SmallForCausalLM"),
     "PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"),
-    "SambaForCausalLM": ("phi3samba", "SambaForCausalLM"),
+    "Phi4MiniFlashForCausalLM": ("phi4sambay", "Phi4MiniFlashForCausalLM"),
     "Plamo2ForCausalLM": ("plamo2", "Plamo2ForCausalLM"),
     "QWenLMHeadModel": ("qwen", "QWenLMHeadModel"),
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),