Skip to content

Commit 31aeda0

Browse files
committed
use the correct configuration
1 parent e19cced commit 31aeda0

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

test_llama4_eplb.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def main():
1919
llm = LLM(
2020
model="/fp8-llama/llama4scout-fp8/",
2121
tensor_parallel_size=8,
22+
max_model_len=2048,
2223
enable_expert_parallel=True,
2324
enable_eplb=True,
2425
num_redundant_experts=16,

vllm/model_executor/models/llama4.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -369,11 +369,13 @@ def __init__(self,
369369
# Store the original layer_type and override it with a lambda
370370
original_layer_type = layer_type
371371

372-
def create_layer(prefix):
372+
def create_layer(config, cache_config, quant_config, prefix):
373+
# We use the config from vllm_config instead of the passed one
374+
# to ensure we get the Llama4TextConfig type
373375
config = cast(Llama4TextConfig, vllm_config.model_config.hf_config)
374376
return original_layer_type(config=config,
375-
cache_config=vllm_config.cache_config,
376-
quant_config=vllm_config.quant_config,
377+
cache_config=cache_config,
378+
quant_config=quant_config,
377379
prefix=prefix,
378380
enable_eplb=self.enable_eplb)
379381

0 commit comments

Comments
 (0)