feat: Add verification scripts for Eagle models

rahul-tuli · claude · rahul-tuli · commit 0d5e8802311b · 2025-07-15T02:15:40.000-04:00
- Add comprehensive verification script for Eagle-1 and Eagle-3 models - Add quick test script for config loading and imports - Include documentation for usage and troubleshooting 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/local/README.md b/local/README.md
@@ -0,0 +1,92 @@
+# Eagle Models Verification Scripts
+
+This folder contains verification scripts for testing Eagle and Eagle-3 speculator models with vLLM.
+
+## Scripts
+
+### `verify_eagle_models.py`
+Full verification script that tests both Eagle-1 and Eagle-3 models with actual model loading and generation.
+
+**Models tested:**
+- **Eagle-1**: `nm-testing/eagle-llama3.1-8b-instruct` with `meta-llama/Meta-Llama-3.1-8B-Instruct`
+- **Eagle-3**: `nm-testing/eagle3-llama3.1-8b-instruct-speculators` with `meta-llama/Meta-Llama-3.1-8B-Instruct`
+
+**Usage:**
+```bash
+cd /home/rahul/vllm
+source .venv/bin/activate
+python local/verify_eagle_models.py
+```
+
+**Requirements:**
+- Sufficient GPU memory (models are ~8B parameters each)
+- Network access to download models from HuggingFace
+
+### `quick_eagle_test.py`
+Lightweight test script that verifies configuration loading and imports without full model initialization.
+
+**Tests:**
+- Config detection for speculators vs regular Eagle models
+- Model class imports
+- Engine argument handling
+- Speculative config creation
+
+**Usage:**
+```bash
+cd /home/rahul/vllm
+source .venv/bin/activate
+python local/quick_eagle_test.py
+```
+
+**Requirements:**
+- Minimal - only tests imports and config loading
+
+## Expected Output
+
+### Successful Run
+```
+✓ Eagle-3 speculators format detected correctly
+✓ Regular Eagle model correctly not detected as speculators
+✓ Eagle-1 model loaded successfully
+✓ Eagle-3 model loaded successfully
+🎉 All Eagle models are working correctly!
+```
+
+### Model Configuration Details
+
+**Eagle-1 (Regular Format):**
+- Uses standard vLLM Eagle configuration
+- Model: `nm-testing/eagle-llama3.1-8b-instruct`
+- Target: `meta-llama/Meta-Llama-3.1-8B-Instruct`
+
+**Eagle-3 (Speculators Format):**
+- Uses speculators library configuration format
+- Model: `nm-testing/eagle3-llama3.1-8b-instruct-speculators`
+- Target: `meta-llama/Meta-Llama-3.1-8B-Instruct`
+- Automatically detected and converted by `SpeculatorsEagleConfig`
+
+## Troubleshooting
+
+### Common Issues
+
+1. **CUDA out of memory**: Reduce `max_model_len` or use a machine with more GPU memory
+2. **Model download errors**: Ensure network connectivity and HuggingFace access
+3. **Import errors**: Verify vLLM installation and that you're in the correct environment
+
+### Debug Steps
+
+1. Run `quick_eagle_test.py` first to verify basic functionality
+2. Check that both target and draft models are accessible:
+   ```bash
+   python -c "from transformers import AutoConfig; print(AutoConfig.from_pretrained('nm-testing/eagle3-llama3.1-8b-instruct-speculators'))"
+   ```
+3. Test with smaller models if memory is limited
+
+## Development Notes
+
+These scripts verify the speculators Eagle support implementation:
+
+- **Config Translation**: `SpeculatorsEagleConfig` converts speculators format to vLLM format
+- **Model Detection**: `is_speculators_eagle_config()` identifies speculators models
+- **V1 Engine Support**: Uses V1 engine with `llama_eagle.py` implementation
+- **Weight Mapping**: Handles speculators weight name translation
diff --git a/local/quick_eagle_test.py b/local/quick_eagle_test.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+"""
+Quick Eagle test script for faster verification during development.
+
+This script runs a minimal test to verify Eagle models are working without
+full model initialization overhead.
+"""
+
+def test_config_loading():
+    """Test that Eagle configs can be loaded properly."""
+    print("Testing Eagle config loading...")
+    
+    try:
+        from vllm.transformers_utils.configs.speculators_eagle import (
+            SpeculatorsEagleConfig, 
+            is_speculators_eagle_config
+        )
+        
+        # Test speculators detection
+        is_speculators = is_speculators_eagle_config("nm-testing/eagle3-llama3.1-8b-instruct-speculators")
+        print(f"✓ Eagle-3 speculators detection: {is_speculators}")
+        
+        # Test regular Eagle detection  
+        is_regular = is_speculators_eagle_config("nm-testing/eagle-llama3.1-8b-instruct")
+        print(f"✓ Regular Eagle detection (should be False): {is_regular}")
+        
+        # Try loading a speculators config
+        if is_speculators:
+            config = SpeculatorsEagleConfig.from_pretrained(
+                "nm-testing/eagle3-llama3.1-8b-instruct-speculators"
+            )
+            print(f"✓ Config loaded successfully")
+            print(f"  - Method: {getattr(config, 'method', 'N/A')}")
+            print(f"  - Num lookahead tokens: {getattr(config, 'num_lookahead_tokens', 'N/A')}")
+            print(f"  - Model type: {getattr(config, 'model_type', 'N/A')}")
+        
+        return True
+        
+    except Exception as e:
+        print(f"✗ Config test failed: {str(e)}")
+        return False
+
+def test_model_imports():
+    """Test that Eagle model classes can be imported."""
+    print("\nTesting Eagle model imports...")
+    
+    try:
+        # Test V1 Eagle model import
+        from vllm.model_executor.models.llama_eagle import EagleLlamaForCausalLM
+        print("✓ V1 Eagle model imported successfully")
+        
+        # Test V0 Eagle model import  
+        from vllm.model_executor.models.eagle import EAGLEModel
+        print("✓ V0 Eagle model imported successfully")
+        
+        # Test detection utilities
+        from vllm.engine.arg_utils import EngineArgs
+        print("✓ Engine args imported successfully")
+        
+        return True
+        
+    except Exception as e:
+        print(f"✗ Import test failed: {str(e)}")
+        return False
+
+def test_engine_args():
+    """Test that speculative config can be created."""
+    print("\nTesting engine argument handling...")
+    
+    try:
+        from vllm.engine.arg_utils import EngineArgs
+        
+        # Test creating engine args with Eagle-3 speculative config
+        args = EngineArgs(
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct",
+            speculative_config={
+                "method": "eagle",
+                "model": "nm-testing/eagle3-llama3.1-8b-instruct-speculators", 
+                "num_spec_tokens": 5
+            }
+        )
+        
+        print("✓ EngineArgs created successfully")
+        
+        # Test speculative config creation
+        spec_config = args.create_speculative_config(
+            args.speculative_config, 
+            model_config=None  # We're just testing creation
+        )
+        
+        if spec_config:
+            print("✓ Speculative config created successfully")
+            print(f"  - Method: {spec_config.method}")
+            print(f"  - Draft model: {spec_config.model}")
+            print(f"  - Spec tokens: {spec_config.num_spec_tokens}")
+        
+        return True
+        
+    except Exception as e:
+        print(f"✗ Engine args test failed: {str(e)}")
+        return False
+
+def main():
+    """Run quick tests."""
+    print("Running Quick Eagle Verification Tests")
+    print("=" * 50)
+    
+    tests = [
+        test_config_loading,
+        test_model_imports,
+        test_engine_args,
+    ]
+    
+    passed = 0
+    for test in tests:
+        if test():
+            passed += 1
+    
+    print(f"\n{'=' * 50}")
+    print(f"Quick Tests Summary: {passed}/{len(tests)} passed")
+    
+    if passed == len(tests):
+        print("🎉 All quick tests passed! Eagle support is working.")
+    else:
+        print("⚠️  Some tests failed. Check the output above.")
+    
+    return 0 if passed == len(tests) else 1
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/local/verify_eagle_models.py b/local/verify_eagle_models.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+Simple verification script for Eagle and Eagle-3 speculator models.
+"""
+
+from vllm import LLM, SamplingParams
+
+def test_eagle1():
+    """Test Eagle-1 model."""
+    print("Testing Eagle-1...")
+    
+    llm = LLM(
+        model="meta-llama/Meta-Llama-3.1-8B-Instruct",
+        speculative_config={
+            "method": "eagle",
+            "model": "nm-testing/eagle-llama3.1-8b-instruct",
+            "num_spec_tokens": 5
+        },
+        max_model_len=1024,
+        enforce_eager=True
+    )
+    
+    outputs = llm.generate(["AI is"], SamplingParams(max_tokens=20, temperature=0))
+    print(f"Eagle-1 output: {outputs[0].outputs[0].text}")
+    print("✓ Eagle-1 works!")
+
+def test_eagle3():
+    """Test Eagle-3 model."""
+    print("\nTesting Eagle-3...")
+    
+    llm = LLM(
+        model="meta-llama/Meta-Llama-3.1-8B-Instruct",
+        speculative_config={
+            "method": "eagle", 
+            "model": "nm-testing/eagle3-llama3.1-8b-instruct-speculators",
+            "num_spec_tokens": 5
+        },
+        max_model_len=1024,
+        enforce_eager=True
+    )
+    
+    outputs = llm.generate(["AI is"], SamplingParams(max_tokens=20, temperature=0))
+    print(f"Eagle-3 output: {outputs[0].outputs[0].text}")
+    print("✓ Eagle-3 works!")
+
+if __name__ == "__main__":
+    print("Eagle Models Verification\n")
+    
+    try:
+        test_eagle1()
+        test_eagle3()
+        print("\n🎉 All tests passed!")
+    except Exception as e:
+        print(f"\n❌ Test failed: {e}")