diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 6c6f8e7268b..cb07fe7d9e1 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -205,6 +205,19 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None: } +class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig): + + @staticmethod + def verify_and_update_config(vllm_config: "VllmConfig") -> None: + config = vllm_config.model_config + config.max_seq_len_to_capture = config.max_model_len + logger.info( + "Setting max_seq_len_to_capture to %d " + "to ensure that CUDA graph capture " + "covers sequences of length up to max_model_len.", + config.max_model_len) + + class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig): @classmethod @@ -297,4 +310,5 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None: "Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig, "XLMRobertaModel": JinaRobertaModelConfig, "JinaVLForRanking": JinaVLForSequenceClassificationConfig, + "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig, }