vllm-project · vllm-bot · Jul 16, 2025 · Jul 14, 2025 · Jul 15, 2025 · gemini-code-assist
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
@@ -200,11 +200,25 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None:
         }
 
 
+class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig):
+
+    @staticmethod
+    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
+        config = vllm_config.model_config
+        config.max_seq_len_to_capture = config.max_model_len
+        logger.info(
+            "Setting max_seq_len_to_capture to %d "
+            "to ensure that CUDA graph capture "
+            "covers sequences of length up to max_model_len.",
+            config.max_model_len)
-        logger.info(
-            "Setting max_seq_len_to_capture to %d "
-            "to ensure that CUDA graph capture "
-            "covers sequences of length up to max_model_len.",
-            config.max_model_len)
+if hasattr(config, 'max_seq_len_to_capture') and config.max_seq_len_to_capture != config.max_model_len:
+    logger.warning(
+        "Overriding user-specified max_seq_len_to_capture to %d "
+        "to ensure that CUDA graph capture "
+        "covers sequences of length up to max_model_len.",
+        config.max_model_len)
-        logger.info(
-            "Setting max_seq_len_to_capture to %d "
-            "to ensure that CUDA graph capture "
-            "covers sequences of length up to max_model_len.",
-            config.max_model_len)
+if hasattr(config, 'max_seq_len_to_capture') and config.max_seq_len_to_capture != config.max_model_len:
+    logger.warning(
+        "Overriding user-specified max_seq_len_to_capture to %d "
+        "to ensure that CUDA graph capture "
+        "covers sequences of length up to max_model_len.",
+        config.max_model_len)
+
+
 MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
     "GteModel": SnowflakeGteNewModelConfig,
     "GteNewModel": GteNewModelConfig,
     "NomicBertModel": NomicBertModelConfig,
     "Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig,
     "XLMRobertaModel": JinaRobertaModelConfig,
     "JinaVLForRanking": JinaVLForSequenceClassificationConfig,
+    "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig,
 }