fix: Complete speculators Eagle support fixes

rahul-tuli · claude · rahul-tuli · commit b08e3de20119 · 2025-07-09T11:24:45.000-04:00
- Updated llama_eagle.py to skip transformer weights (loaded separately) - Added num_lookahead_tokens to speculators config (required for Eagle) - Together these fixes allow speculators Eagle models to work with V1 engine Signed-off-by: rtuli@redhat.com 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Rahul Tuli <rtuli@redhat.com>
diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py
@@ -117,9 +117,8 @@ def load_weights(self, weights: Iterable[tuple[str,
             if name in speculators_name_map:
                 name = speculators_name_map[name]
             elif name.startswith("transformer."):
-                # transformer.* -> model.layers.0.*
-                suffix = name[len("transformer."):]
-                name = f"model.layers.0.{suffix}"
+                # Skip transformer weights - they're loaded separately
+                continue
             
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 if weight_name not in name:
diff --git a/vllm/transformers_utils/configs/speculators_eagle.py b/vllm/transformers_utils/configs/speculators_eagle.py
@@ -89,6 +89,7 @@ def _convert_speculators_to_vllm(cls, speculators_config: dict) -> dict:
             "eagle_fc_bias": speculators_config.get("fusion_bias", False),
             "truncated_vocab_size": transformer_config.get("vocab_size"),
             "method": speculators_config.get("speculators_model_type", "eagle"),  # Use speculators_model_type
+            "num_lookahead_tokens": 5,  # Default number of speculative tokens for Eagle
         }
         
         # Preserve any additional fields that might be needed

Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,7 @@ def _convert_speculators_to_vllm(cls, speculators_config: dict) -> dict:`
`89`	`89`	`"eagle_fc_bias": speculators_config.get("fusion_bias", False),`
`90`	`90`	`"truncated_vocab_size": transformer_config.get("vocab_size"),`
`91`	`91`	`"method": speculators_config.get("speculators_model_type", "eagle"), # Use speculators_model_type`
	`92`	`+ "num_lookahead_tokens": 5, # Default number of speculative tokens for Eagle`
`92`	`93`	`}`
`93`	`94`
`94`	`95`	`# Preserve any additional fields that might be needed`