refactor: Consolidate Eagle speculators weight mapping

rahul-tuli · claude · rahul-tuli · commit 875b78682b09 · 2025-07-15T09:22:22.000-04:00
- Move SPECULATORS_WEIGHT_MAP to module level to eliminate duplication - Replace duplicate _remap_weight_name methods with single function - Fix line continuation style to use proper parentheses - Streamline weight loading logic while preserving functionality - Remove verbose comments while keeping essential documentation - Preserve original 'fc' naming convention This consolidation improves maintainability and follows vLLM code style conventions while preserving all existing functionality for both Eagle-1 and Eagle-3 speculators models. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: rtuli@redhat.com Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py
@@ -23,6 +23,23 @@
 
 logger = init_logger(__name__)
 
+# Weight name mapping for speculators format compatibility
+SPECULATORS_WEIGHT_MAP = {
+    "fusion_fc.weight": "fc.weight",
+    "fusion_fc.bias": "fc.bias",
+    "embedding_layernorm.weight": "embedding_layernorm.weight",
+    "pre_lm_head_layernorm.weight": "hidden_states_layernorm.weight",
+}
+
+
+def remap_speculators_weight_name(name: str) -> str | None:
+    """Remap speculators format weight names to vLLM names."""
+    if name in SPECULATORS_WEIGHT_MAP:
+        return SPECULATORS_WEIGHT_MAP[name]
+    elif name.startswith("transformer."):
+        return None
+    return name
+
 
 class LlamaDecoderLayer(LlamaDecoderLayer):
 
@@ -55,14 +72,6 @@ class LlamaModel(nn.Module):
     (2 * hidden_size) and projects them back to hidden_size for processing
     through the transformer layers.
     """
-    
-    # Weight name mapping for speculators format compatibility
-    SPECULATORS_WEIGHT_MAP = {
-        "fusion_fc.weight": "projection_layer.weight",
-        "fusion_fc.bias": "projection_layer.bias",
-        "embedding_layernorm.weight": "embedding_layernorm.weight",
-        "pre_lm_head_layernorm.weight": "hidden_states_layernorm.weight",
-    }
 
     def __init__(
         self,
@@ -72,8 +81,7 @@ def __init__(
         start_layer_id: int = 0,
     ) -> None:
         super().__init__()
-        self.config = vllm_config. \
-            speculative_config.draft_model_config.hf_config
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
         self.vocab_size = self.config.vocab_size
 
         self.embed_tokens = VocabParallelEmbedding(
@@ -91,9 +99,9 @@ def __init__(
         ])
         
         # Projection layer: combines input embeddings with target hidden states
-        self.projection_layer = torch.nn.Linear(self.config.hidden_size * 2,
-                                               self.config.hidden_size,
-                                               bias=False)
+        self.fc = torch.nn.Linear(self.config.hidden_size * 2,
+                                  self.config.hidden_size,
+                                  bias=False)
         
         # Support for additional layernorms (HASS variant)
         # HASS adds layernorms to input embeddings and hidden states for better
@@ -134,7 +142,7 @@ def forward(
         
         # Project concatenated embeddings and hidden states
         # This combines information from both the input tokens and target model
-        hidden_states = self.projection_layer(
+        hidden_states = self.fc(
             torch.cat((input_embeds, hidden_states), dim=-1))
         
         # Process through transformer layers
@@ -148,23 +156,6 @@ def forward(
         hidden_states = hidden_states + residual
         return hidden_states, hidden_states
 
-    def _remap_weight_name(self, name: str) -> str | None:
-        """
-        Remap speculators format weight names to vLLM names.
-        
-        Args:
-            name: Original weight name from the checkpoint
-        
-        Returns:
-            Remapped weight name, or None if the weight should be skipped
-        """
-        if name in self.SPECULATORS_WEIGHT_MAP:
-            return self.SPECULATORS_WEIGHT_MAP[name]
-        elif name.startswith("transformer."):
-            # Skip transformer weights - they're loaded separately by the target model
-            return None
-        return name
-    
     def load_weights(self, weights: Iterable[tuple[str,
                                                    torch.Tensor]]) -> set[str]:
         """
@@ -192,8 +183,7 @@ def load_weights(self, weights: Iterable[tuple[str,
         loaded_params: set[str] = set()
         
         for name, loaded_weight in weights:
-            # Remap weight names for speculators compatibility
-            remapped_name = self._remap_weight_name(name)
+            remapped_name = remap_speculators_weight_name(name)
             if remapped_name is None:
                 continue
             name = remapped_name
@@ -252,8 +242,7 @@ class EagleLlamaForCausalLM(LlamaForCausalLM):
 
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         nn.Module.__init__(self)
-        self.config = vllm_config. \
-            speculative_config.draft_model_config.hf_config
+        self.config = vllm_config.speculative_config.draft_model_config.hf_config
         target_layer_num = vllm_config.model_config.get_num_layers(
             vllm_config.parallel_config)
         self.model = LlamaModel(vllm_config=vllm_config,
@@ -283,23 +272,6 @@ def forward(
         """
         return self.model(input_ids, positions, hidden_states)
 
-    def _remap_weight_name(self, name: str) -> str | None:
-        """
-        Remap speculators format weight names to vLLM names.
-        
-        Args:
-            name: Original weight name from the checkpoint
-        
-        Returns:
-            Remapped weight name, or None if the weight should be skipped
-        """
-        if name in self.SPECULATORS_WEIGHT_MAP:
-            return self.SPECULATORS_WEIGHT_MAP[name]
-        elif name.startswith("transformer."):
-            # Skip transformer weights - they're loaded separately by the target model
-            return None
-        return name
-    
     def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
         """
         Load model weights with support for speculators format.
@@ -317,8 +289,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
 
         model_weights = {}
         for name, loaded_weight in weights:
-            # Remap weight names for speculators compatibility
-            remapped_name = self._remap_weight_name(name)
+            remapped_name = remap_speculators_weight_name(name)
             if remapped_name is None:
                 continue
             name = remapped_name