Skip to content

Commit 6bbf179

Browse files
authored
[Misc] Fix the size of batched_dummy_mm_inputs in profile_run (#20434)
Signed-off-by: bk-201 <joy25810@foxmail.com>
1 parent 9e0ef88 commit 6bbf179

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

tests/models/registry.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,8 @@ def check_available_online(
412412
hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]}), # noqa: E501
413413
"Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"), # noqa: E501
414414
"Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501
415-
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"), # noqa: E501
415+
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct", # noqa: E501
416+
max_model_len=4096),
416417
"Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B"),
417418
"Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ"), # noqa: E501
418419
"SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"),

vllm/v1/worker/gpu_model_runner.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,8 +2219,8 @@ def profile_run(self) -> None:
22192219
encoder_budget = min(self.max_num_encoder_input_tokens,
22202220
self.encoder_cache_size)
22212221

2222-
max_num_mm_items_encoder_budget = cdiv(encoder_budget,
2223-
max_tokens_per_mm_item)
2222+
max_num_mm_items_encoder_budget = encoder_budget // \
2223+
max_tokens_per_mm_item
22242224

22252225
# Check how many items of this modality can be supported by
22262226
# the decoder budget.
@@ -2233,8 +2233,10 @@ def profile_run(self) -> None:
22332233
max_num_mm_items_decoder_budget = self.max_num_reqs * \
22342234
max_mm_items_per_req
22352235

2236-
max_num_mm_items = min(max_num_mm_items_encoder_budget,
2237-
max_num_mm_items_decoder_budget)
2236+
max_num_mm_items = max(
2237+
1,
2238+
min(max_num_mm_items_encoder_budget,
2239+
max_num_mm_items_decoder_budget))
22382240

22392241
logger.info(
22402242
"Encoder cache will be initialized with a budget of %s tokens,"
@@ -2244,7 +2246,7 @@ def profile_run(self) -> None:
22442246
# Create dummy batch of multimodal inputs.
22452247
dummy_mm_kwargs = self.mm_registry.get_decoder_dummy_data(
22462248
model_config=self.model_config,
2247-
seq_len=self.max_num_tokens,
2249+
seq_len=max_tokens_per_mm_item,
22482250
mm_counts={
22492251
dummy_data_modality: 1
22502252
},

0 commit comments

Comments
 (0)