File tree Expand file tree Collapse file tree 1 file changed +8
-3
lines changed Expand file tree Collapse file tree 1 file changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -1113,14 +1113,19 @@ def _process_reqs(
1113
1113
if self .dp_size > 1 else num_input_tokens )
1114
1114
padded_batch_size = self .select_torchair_padded_batch_size (
1115
1115
max_num_tokens )
1116
- num_tokens_across_dp .masked_fill_ (num_tokens_across_dp == - 1 ,
1117
- padded_batch_size )
1118
1116
graph_pad_size = padded_batch_size - total_num_scheduled_tokens
1119
1117
extra_builder_kwargs ['graph_pad_size' ] = graph_pad_size
1118
+ # If torchair graph is enabled and in decode mode, the dummy run
1119
+ # batch size is set to the selected graph size.
1120
+ dummy_num_tokens = padded_batch_size
1120
1121
else :
1121
1122
# If torchair graph is not enabled, or if with_prefill is True, the
1122
1123
# dummy run batch size is set to 1.
1123
- num_tokens_across_dp .masked_fill_ (num_tokens_across_dp == - 1 , 1 )
1124
+ dummy_num_tokens = 1
1125
+
1126
+ if self .dp_size > 1 :
1127
+ num_tokens_across_dp .masked_fill_ (num_tokens_across_dp == - 1 ,
1128
+ dummy_num_tokens )
1124
1129
1125
1130
if self .vllm_config .model_config .use_mla :
1126
1131
attn_metadata = self .attn_metadata_builder .build ( # type: ignore
You can’t perform that action at this time.
0 commit comments