@@ -2042,12 +2042,11 @@ class SchedulerConfig:
2042
2042
NOTE: This will be replaced by speculative config in the future; it is
2043
2043
present to enable correctness tests until then."""
2044
2044
2045
- cuda_graph_sizes : list [int ] = field (default_factory = list )
2046
- """Cuda graph capture sizes
2047
- 1. if none provided, then default set to [max_num_seqs]
2048
- 2. if one value is provided, then the capture list would follow the
2045
+ cuda_graph_sizes : list [int ] = field (default_factory = lambda : [512 ])
2046
+ """Cuda graph capture sizes, default is 512.
2047
+ 1. if one value is provided, then the capture list would follow the
2049
2048
pattern: [1, 2, 4] + [i for i in range(8, cuda_graph_sizes + 1, 8)]
2050
- 3 . more than one value (e.g. 1 2 128) is provided, then the capture list
2049
+ 2 . more than one value (e.g. 1 2 128) is provided, then the capture list
2051
2050
will follow the provided list."""
2052
2051
2053
2052
delay_factor : float = 0.0
@@ -2212,10 +2211,6 @@ def __post_init__(self) -> None:
2212
2211
self .max_num_partial_prefills , self .max_long_partial_prefills ,
2213
2212
self .long_prefill_token_threshold )
2214
2213
2215
- # If cuda_graph_sizes is not specified, default set to [max_num_seqs].
2216
- if not self .cuda_graph_sizes :
2217
- self .cuda_graph_sizes = [self .max_num_seqs ]
2218
-
2219
2214
@model_validator (mode = 'after' )
2220
2215
def _verify_args (self ) -> Self :
2221
2216
if (self .max_num_batched_tokens < self .max_model_len
0 commit comments