File tree Expand file tree Collapse file tree 1 file changed +10
-9
lines changed Expand file tree Collapse file tree 1 file changed +10
-9
lines changed Original file line number Diff line number Diff line change @@ -1109,17 +1109,18 @@ def _process_reqs(
1109
1109
1110
1110
# Add graph_pad_size here
1111
1111
if self .torchair_graph_enabled and not with_prefill :
1112
- if self .dp_size > 1 :
1113
- padded_batch_size = self .select_torchair_padded_batch_size (
1114
- max_num_tokens )
1115
- num_tokens_across_dp .masked_fill_ (num_tokens_across_dp == - 1 ,
1116
- padded_batch_size )
1117
- else :
1118
- padded_batch_size = self .select_torchair_padded_batch_size (
1119
- total_num_scheduled_tokens )
1112
+ max_num_tokens = (max_num_tokens
1113
+ if self .dp_size > 1 else num_input_tokens )
1114
+ padded_batch_size = self .select_torchair_padded_batch_size (
1115
+ max_num_tokens )
1116
+ num_tokens_across_dp .masked_fill_ (num_tokens_across_dp == - 1 ,
1117
+ padded_batch_size )
1120
1118
graph_pad_size = padded_batch_size - total_num_scheduled_tokens
1121
-
1122
1119
extra_builder_kwargs ['graph_pad_size' ] = graph_pad_size
1120
+ else :
1121
+ # If torchair graph is not enabled, or if with_prefill is True, the
1122
+ # dummy run batch size is set to 1.
1123
+ num_tokens_across_dp .masked_fill_ (num_tokens_across_dp == - 1 , 1 )
1123
1124
1124
1125
if self .vllm_config .model_config .use_mla :
1125
1126
attn_metadata = self .attn_metadata_builder .build ( # type: ignore
You can’t perform that action at this time.
0 commit comments