From c6f2623b6868ba67b694ec4c814bca2cf77efdf8 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Mon, 14 Jul 2025 12:04:40 +0000 Subject: [PATCH] Add ModelConfig class for GraniteMoeHybrid to override default max_seq_len_to_capture Signed-off-by: Thomas Parnell --- vllm/model_executor/models/config.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 6d0ffad1a81..6c4c66cfa95 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -200,6 +200,19 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None: } +class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig): + + @staticmethod + def verify_and_update_config(vllm_config: "VllmConfig") -> None: + config = vllm_config.model_config + config.max_seq_len_to_capture = config.max_model_len + logger.info( + "Setting max_seq_len_to_capture to %d " + "to ensure that CUDA graph capture " + "covers sequences of length up to max_model_len.", + config.max_model_len) + + MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = { "GteModel": SnowflakeGteNewModelConfig, "GteNewModel": GteNewModelConfig, @@ -207,4 +220,5 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None: "Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig, "XLMRobertaModel": JinaRobertaModelConfig, "JinaVLForRanking": JinaVLForSequenceClassificationConfig, + "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig, }