feat: Add support for speculators Eagle checkpoints

rahul-tuli · claude · rahul-tuli · commit 175d2a844eac · 2025-07-09T09:38:08.000-04:00
- Add SpeculatorsEagleConfig to handle speculators config format - Update config loader to detect speculators Eagle models - Add weight name remapping in Eagle model load_weights - Support both standard Eagle and HASS (with layernorms) variants This enables vLLM to load Eagle models converted using the speculators library's checkpoint converter, mapping config fields and weight names to vLLM's expected format. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/vllm/model_executor/models/eagle.py b/vllm/model_executor/models/eagle.py
@@ -204,8 +204,24 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
         # https://huggingface.co/abhigoyal/EAGLE-LLaMA3-Instruct-8B-vllm
         # Also, here's an example script for converting trained EAGLE
         # checkpoint to vLLM compatible version: https://gist.github.com/abhigoyal1997/1e7a4109ccb7704fbc67f625e86b2d6d
+        
+        # Support for speculators format weights
+        speculators_name_map = {
+            "fusion_fc.weight": "fc.weight",
+            "fusion_fc.bias": "fc.bias",
+            "embedding_layernorm.weight": "enorm.weight",
+            "pre_lm_head_layernorm.weight": "hnorm.weight",
+        }
+        
         model_weights = {}
         for name, loaded_weight in weights:
+            # Handle speculators format weight names
+            if name in speculators_name_map:
+                name = speculators_name_map[name]
+            elif name.startswith("transformer."):
+                # transformer.* -> model.model.layers.0.*
+                suffix = name[len("transformer."):]
+                name = f"model.model.layers.0.{suffix}"
             if name == "token_map":
                 if self.config.truncated_vocab_size < self.config.vocab_size:
                     self.token_map = nn.Parameter(loaded_weight,
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
@@ -40,9 +40,11 @@
                                              NemotronConfig, NVLM_D_Config,
                                              OvisConfig, RWConfig,
                                              SkyworkR1VChatConfig, SolarConfig,
+                                             SpeculatorsEagleConfig,
                                              Telechat2Config, UltravoxConfig)
 # yapf: enable
 from vllm.transformers_utils.configs.mistral import adapt_config_dict
+from vllm.transformers_utils.configs.speculators_eagle import is_speculators_eagle_config
 from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import resolve_obj_by_qualname
 
@@ -347,6 +349,17 @@ def get_config(
             raise ValueError(error_message) from e
 
     if config_format == ConfigFormat.HF:
+        # Check if this is a speculators Eagle model
+        if is_speculators_eagle_config(model):
+            config = SpeculatorsEagleConfig.from_pretrained(
+                model,
+                revision=revision,
+                code_revision=code_revision,
+                token=_get_hf_token(),
+                **kwargs,
+            )
+            return config
+        
         config_dict, _ = PretrainedConfig.get_config_dict(
             model,
             revision=revision,
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
@@ -7,6 +7,7 @@
 from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
 from vllm.transformers_utils.configs.eagle import EAGLEConfig
 from vllm.transformers_utils.configs.exaone import ExaoneConfig
+from vllm.transformers_utils.configs.speculators_eagle import SpeculatorsEagleConfig
 # RWConfig is for the original tiiuae/falcon-40b(-instruct) and
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
@@ -40,6 +41,7 @@
     "MedusaConfig",
     "EAGLEConfig",
     "ExaoneConfig",
+    "SpeculatorsEagleConfig",
     "MiniMaxText01Config",
     "MiniMaxVL01Config",
     "MllamaConfig",
diff --git a/vllm/transformers_utils/configs/speculators_eagle.py b/vllm/transformers_utils/configs/speculators_eagle.py
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+import os
+from pathlib import Path
+from typing import Optional, Union
+
+from transformers import PretrainedConfig
+
+from vllm.transformers_utils.configs.eagle import EAGLEConfig
+
+
+class SpeculatorsEagleConfig(EAGLEConfig):
+    """
+    Adapter for speculators Eagle configs to make them compatible with vLLM.
+    
+    This class handles the conversion between speculators config format and
+    vLLM's expected Eagle config format.
+    """
+    
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path: Union[str, os.PathLike],
+        **kwargs,
+    ) -> "SpeculatorsEagleConfig":
+        """
+        Load a speculators Eagle config and convert it to vLLM format.
+        """
+        config_path = Path(pretrained_model_name_or_path) / "config.json"
+        
+        if not config_path.exists():
+            # Fall back to standard loading if not a local path
+            return super().from_pretrained(pretrained_model_name_or_path, **kwargs)
+        
+        with open(config_path, "r") as f:
+            config_dict = json.load(f)
+        
+        # Check if this is a speculators format config
+        if "speculators_model_type" not in config_dict:
+            # Not a speculators config, use standard loading
+            return super().from_pretrained(pretrained_model_name_or_path, **kwargs)
+        
+        # Convert speculators format to vLLM format
+        vllm_config = cls._convert_speculators_to_vllm(config_dict)
+        
+        return cls(**vllm_config)
+    
+    @classmethod
+    def _convert_speculators_to_vllm(cls, speculators_config: dict) -> dict:
+        """
+        Convert speculators Eagle config format to vLLM format.
+        
+        Speculators format:
+        {
+            "speculators_model_type": "eagle",
+            "transformer_layer_config": {...},
+            "layernorms": true/false,
+            "fusion_bias": true/false
+        }
+        
+        vLLM format:
+        {
+            "model_type": "eagle",
+            "model": {...},
+            "eagle_fc_bias": true/false,
+            "truncated_vocab_size": vocab_size
+        }
+        """
+        # Extract transformer config
+        transformer_config = speculators_config.get("transformer_layer_config", {})
+        
+        # Handle layernorms flag
+        if speculators_config.get("layernorms", False):
+            transformer_config["add_para_norm"] = True
+            # Ensure skip flags are set correctly for extra layernorms
+            transformer_config["skip_prenorm"] = False
+            transformer_config["skip_output_norm"] = False
+        
+        # Ensure transformer config has required fields
+        if "architectures" not in transformer_config:
+            # Infer from transformer_layer_architecture
+            arch = speculators_config.get("transformer_layer_architecture", "LlamaDecoderLayer")
+            if arch == "LlamaDecoderLayer":
+                transformer_config["architectures"] = ["LlamaForCausalLM"]
+            else:
+                transformer_config["architectures"] = [arch]
+        
+        # Build vLLM config
+        vllm_config = {
+            "model_type": "eagle",
+            "model": transformer_config,
+            "eagle_fc_bias": speculators_config.get("fusion_bias", False),
+            "truncated_vocab_size": transformer_config.get("vocab_size"),
+        }
+        
+        # Preserve any additional fields that might be needed
+        for key, value in speculators_config.items():
+            if key not in ["speculators_model_type", "transformer_layer_config", 
+                          "layernorms", "fusion_bias", "architectures"]:
+                vllm_config[key] = value
+        
+        # Set architectures for vLLM
+        vllm_config["architectures"] = ["EAGLEModel"]
+        
+        return vllm_config
+
+
+def is_speculators_eagle_config(config_path: Union[str, os.PathLike]) -> bool:
+    """
+    Check if a config file is in speculators Eagle format.
+    """
+    config_file = Path(config_path) / "config.json"
+    if not config_file.exists():
+        return False
+    
+    try:
+        with open(config_file, "r") as f:
+            config = json.load(f)
+        return config.get("speculators_model_type") == "eagle"
+    except:
+        return False