Skip to content

Commit 6cbc4d4

Browse files
authored
[Model] Add ModelConfig class for GraniteMoeHybrid to override default max_seq_len_to_capture (#20923)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
1 parent 153c6f1 commit 6cbc4d4

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

vllm/model_executor/models/config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,19 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None:
205205
}
206206

207207

208+
class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig):
209+
210+
@staticmethod
211+
def verify_and_update_config(vllm_config: "VllmConfig") -> None:
212+
config = vllm_config.model_config
213+
config.max_seq_len_to_capture = config.max_model_len
214+
logger.info(
215+
"Setting max_seq_len_to_capture to %d "
216+
"to ensure that CUDA graph capture "
217+
"covers sequences of length up to max_model_len.",
218+
config.max_model_len)
219+
220+
208221
class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig):
209222

210223
@classmethod
@@ -297,4 +310,5 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
297310
"Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig,
298311
"XLMRobertaModel": JinaRobertaModelConfig,
299312
"JinaVLForRanking": JinaVLForSequenceClassificationConfig,
313+
"GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig,
300314
}

0 commit comments

Comments
 (0)