diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 327f20a31a9..467b99816d4 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -136,7 +136,7 @@ class EngineArgs: tokenizer_revision: Optional[str] = None quantization: Optional[str] = None enforce_eager: Optional[bool] = None - max_seq_len_to_capture: int = 8192 + max_seq_len_to_capture: int = 4096 disable_custom_all_reduce: bool = False tokenizer_pool_size: int = 0 # Note: Specifying a tokenizer pool by passing a class