Skip to content

Commit 4bf0214

Browse files
Last few changes after rebasing to latest branch version
1 parent ce227da commit 4bf0214

File tree

2 files changed

+46
-10
lines changed

2 files changed

+46
-10
lines changed

vllm/model_executor/models/prithvi_geospatial_mae.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ def get_dummy_mm_data(
6262
# The size of pixel_values might change in the cases where we resize
6363
# the input but never exceeds the dimensions below.
6464
return {
65-
"pixel_values": torch.full((1, 6, 512, 512), 1.0),
66-
"location_coords": torch.full((1, 2), 1.0),
65+
"pixel_values": torch.full((1, 6, 512, 512), 1.0, dtype=torch.float16),
66+
"location_coords": torch.full((1, 2), 1.0, dtype=torch.float16),
6767
}
6868

6969

vllm/v1/worker/gpu_model_runner.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def __init__(
122122
cache_config.cache_dtype]
123123

124124
self.is_multimodal_model = model_config.is_multimodal_model
125-
self.is_pooling_model = model_config.pooler_config is not None
125+
self.model_supports_multimodal_raw_input = model_config.model_supports_multimodal_raw_input
126126
self.max_model_len = model_config.max_model_len
127127
self.max_num_tokens = scheduler_config.max_num_batched_tokens
128128
self.max_num_reqs = scheduler_config.max_num_seqs
@@ -320,6 +320,11 @@ def _may_reorder_batch(self, scheduler_output: "SchedulerOutput") -> bool:
320320
Returns:
321321
True if the batch was reordered, False otherwise.
322322
"""
323+
324+
# nothing to be reordered when the mdoel is attention free
325+
if self.model_config.is_attention_free:
326+
return False
327+
323328
batch_reordered = self.attn_metadata_builders[0].reorder_batch(
324329
self.input_batch, scheduler_output)
325330

@@ -545,7 +550,23 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
545550
batch_reordered = self._may_reorder_batch(scheduler_output)
546551

547552
if batch_changed or batch_reordered:
548-
self.input_batch.refresh_sampling_metadata()
553+
self.input_batch.refresh()
554+
555+
def _maybe_add_model_args(self, num_tokens: int,
556+
model_kwargs: dict[str, Any],
557+
scheduler_output: "SchedulerOutput"=None):
558+
pass
559+
560+
def _maybe_compute_attn_prefix(
561+
self,
562+
scheduler_output: "SchedulerOutput",
563+
) -> list[int]:
564+
return [0] * len(self.kv_cache_config.kv_cache_groups)
565+
566+
def _maybe_prepare_additional_inputs(self,
567+
scheduler_output: "SchedulerOutput",
568+
token_indices: torch.Tensor):
569+
pass
549570

550571
def _get_cumsum_and_arange(
551572
self,
@@ -1012,13 +1033,14 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"):
10121033
curr_group_outputs = self.model.get_multimodal_embeddings(
10131034
**batched_mm_inputs)
10141035

1015-
sanity_check_mm_encoder_outputs(
1016-
curr_group_outputs,
1017-
expected_num_items=len(grouped_mm_inputs),
1018-
)
1036+
if curr_group_outputs:
1037+
sanity_check_mm_encoder_outputs(
1038+
curr_group_outputs,
1039+
expected_num_items=len(grouped_mm_inputs),
1040+
)
10191041

1020-
for output in curr_group_outputs:
1021-
encoder_outputs.append(output)
1042+
for output in curr_group_outputs:
1043+
encoder_outputs.append(output)
10221044

10231045
# Cache the encoder outputs.
10241046
for (req_id, input_id, pos_info), output in zip(
@@ -1304,6 +1326,9 @@ def execute_model(
13041326
# embeddings), we always use embeddings (rather than token ids)
13051327
# as input to the multimodal model, even when the input is text.
13061328
input_ids = self.input_ids[:num_scheduled_tokens]
1329+
self._maybe_add_model_args(num_scheduled_tokens,
1330+
model_kwargs, scheduler_output)
1331+
13071332
if mm_embeds:
13081333
inputs_embeds = self.model.get_input_embeddings(
13091334
input_ids, mm_embeds)
@@ -1319,6 +1344,7 @@ def execute_model(
13191344
# multimodal models, it is not desirable for performance since
13201345
# then the embedding layer is not included in the CUDA graph.
13211346
input_ids = self.input_ids[:num_input_tokens]
1347+
self._maybe_add_model_args(num_input_tokens, model_kwargs, scheduler_output)
13221348
inputs_embeds = None
13231349
if self.uses_mrope:
13241350
positions = self.mrope_positions[:, :num_input_tokens]
@@ -1352,6 +1378,10 @@ def execute_model(
13521378
positions=positions,
13531379
intermediate_tensors=intermediate_tensors,
13541380
inputs_embeds=inputs_embeds,
1381+
**MultiModalKwargs.as_kwargs(
1382+
model_kwargs,
1383+
device=self.device,
1384+
)
13551385
)
13561386

13571387
self.maybe_wait_for_kv_save()
@@ -1939,6 +1969,8 @@ def _dummy_run(
19391969
with self.maybe_dummy_run_with_lora(self.lora_config,
19401970
num_scheduled_tokens):
19411971
model = self.model
1972+
model_kwargs: dict[str, Any] = {}
1973+
self._maybe_add_model_args(num_tokens, model_kwargs)
19421974
if self.is_multimodal_model:
19431975
input_ids = None
19441976
inputs_embeds = self.inputs_embeds[:num_tokens]
@@ -1973,7 +2005,11 @@ def _dummy_run(
19732005
positions=positions,
19742006
intermediate_tensors=intermediate_tensors,
19752007
inputs_embeds=inputs_embeds,
2008+
**MultiModalKwargs.as_kwargs(
2009+
model_kwargs,
2010+
device=self.device)
19762011
)
2012+
19772013
if self.use_aux_hidden_state_outputs:
19782014
hidden_states, _ = outputs
19792015
else:

0 commit comments

Comments
 (0)