Skip to content

Commit ceedf19

Browse files
latest changes to align with the original branch
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
1 parent ad667ce commit ceedf19

File tree

3 files changed

+21
-19
lines changed

3 files changed

+21
-19
lines changed

vllm/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,7 @@ def __post_init__(self) -> None:
612612
self.served_model_name = get_served_model_name(self.model,
613613
self.served_model_name)
614614
self.multimodal_config = self._init_multimodal_config()
615+
self.is_pooling_model = self.registry.is_pooling_model(self.architectures)
615616
self.model_supports_multimodal_raw_input = self._init_model_supports_multimodal_raw_input()
616617
if not self.skip_tokenizer_init:
617618
self._verify_tokenizer_mode()

vllm/model_executor/models/prithvi_geospatial_mae.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def get_dummy_mm_data(
6262
# The size of pixel_values might change in the cases where we resize
6363
# the input but never exceeds the dimensions below.
6464
return {
65-
"pixel_values": torch.full((1, 6, 512, 512), 1.0, dtype=torch.float16),
65+
"pixel_values": torch.full((6, 512, 512), 1.0, dtype=torch.float16),
6666
"location_coords": torch.full((1, 2), 1.0, dtype=torch.float16),
6767
}
6868

@@ -178,7 +178,7 @@ def _parse_and_validate_multimodal_data(
178178
if not isinstance(pixel_values, torch.Tensor):
179179
raise ValueError(f"Incorrect type of pixel_values. "
180180
f"Got type: {type(pixel_values)}")
181-
pixel_values = torch.unbind(pixel_values, dim=0)[0]
181+
# pixel_values = torch.unbind(pixel_values, dim=0)[0]
182182

183183
location_coords = kwargs.pop("location_coords", None)
184184
if not isinstance(location_coords, torch.Tensor):
@@ -217,7 +217,7 @@ def pooler(
217217
hidden_states: torch.Tensor,
218218
pooling_metadata: PoolingMetadata,
219219
) -> Optional[PoolerOutput]:
220-
return PoolerOutput([PoolingSequenceGroupOutput(hidden_states[0])])
220+
return PoolerOutput([PoolingSequenceGroupOutput(hidden_state) for hidden_state in hidden_states])
221221

222222
def load_weights(self, weights: Iterable[tuple[str,
223223
torch.Tensor]]) -> set[str]:

vllm/v1/worker/gpu_model_runner.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def __init__(
124124
cache_config.cache_dtype]
125125

126126
self.is_multimodal_model = model_config.is_multimodal_model
127+
self.is_pooling_model = model_config.is_pooling_model
127128
self.model_supports_multimodal_raw_input = model_config.model_supports_multimodal_raw_input
128129
self.max_model_len = model_config.max_model_len
129130
self.max_num_tokens = scheduler_config.max_num_batched_tokens
@@ -557,7 +558,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
557558
self.input_batch.refresh_metadata()
558559

559560
def _add_multimodal_inputs_to_model_args(self, model_kwargs: dict[str, Any],
560-
scheduler_output: "SchedulerOutput"):
561+
scheduler_output: "SchedulerOutput",
562+
num_reqs: int=-1):
561563
# Multi-modal data.
562564
if scheduler_output:
563565
multi_modal_kwargs_list = []
@@ -569,21 +571,20 @@ def _add_multimodal_inputs_to_model_args(self, model_kwargs: dict[str, Any],
569571
multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
570572
else:
571573
# The only case where SchedulerOtput is None is for a dummy run, let's get some dummy data.
572-
dummy_data = self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1)
573-
multi_modal_kwargs = MultiModalKwargs.batch([dummy_data.multi_modal_data])
574+
dummy_data = [self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1).multi_modal_data for i in range(num_reqs)]
575+
# dummy_data = self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1)
576+
# multi_modal_kwargs = MultiModalKwargs.batch([dummy_data.multi_modal_data])
577+
multi_modal_kwargs = MultiModalKwargs.batch(dummy_data)
574578

575579
model_kwargs.update(multi_modal_kwargs)
576580

577-
def _maybe_add_model_args(self, num_tokens: int,
581+
def _maybe_add_multimodal_kwargs(self,
578582
model_kwargs: dict[str,Any],
579-
scheduler_output: "SchedulerOutput"=None):
580-
581-
if self.supports_token_type_ids:
582-
model_kwargs["token_type_ids"] =\
583-
self.get_token_type_ids()[:num_tokens]
583+
scheduler_output: "SchedulerOutput"=None,
584+
num_reqs: int=-1):
584585

585586
if self.model_supports_multimodal_raw_input:
586-
self._add_multimodal_inputs_to_model_args(model_kwargs, scheduler_output)
587+
self._add_multimodal_inputs_to_model_args(model_kwargs, scheduler_output, num_reqs)
587588

588589
def _maybe_compute_attn_prefix(
589590
self,
@@ -1364,15 +1365,15 @@ def execute_model(
13641365
mm_embeds = self._gather_mm_embeddings(scheduler_output)
13651366
else:
13661367
mm_embeds = []
1367-
1368+
1369+
model_kwargs: dict[str, Any] = {}
13681370
if self.is_multimodal_model and get_pp_group().is_first_rank:
13691371
# NOTE(woosuk): To unify token ids and soft tokens (vision
13701372
# embeddings), we always use embeddings (rather than token ids)
13711373
# as input to the multimodal model, even when the input is text.
13721374
input_ids = self.input_ids[:num_scheduled_tokens]
1373-
self._maybe_add_model_args(num_scheduled_tokens,
1374-
model_kwargs, scheduler_output)
1375-
1375+
self._maybe_add_multimodal_kwargs(model_kwargs=model_kwargs,
1376+
scheduler_output=scheduler_output)
13761377
if mm_embeds:
13771378
inputs_embeds = self.model.get_input_embeddings(
13781379
input_ids, mm_embeds)
@@ -1388,7 +1389,6 @@ def execute_model(
13881389
# multimodal models, it is not desirable for performance since
13891390
# then the embedding layer is not included in the CUDA graph.
13901391
input_ids = self.input_ids[:num_input_tokens]
1391-
self._maybe_add_model_args(num_input_tokens, model_kwargs, scheduler_output)
13921392
inputs_embeds = None
13931393
if self.uses_mrope:
13941394
positions = self.mrope_positions[:, :num_input_tokens]
@@ -2076,8 +2076,9 @@ def _dummy_run(
20762076
num_scheduled_tokens):
20772077
model = self.model
20782078
model_kwargs: dict[str, Any] = {}
2079-
self._maybe_add_model_args(num_tokens, model_kwargs)
20802079
if self.is_multimodal_model:
2080+
self._maybe_add_multimodal_kwargs(model_kwargs=model_kwargs,
2081+
num_reqs=num_reqs)
20812082
input_ids = None
20822083
inputs_embeds = self.inputs_embeds[:num_tokens]
20832084
else:

0 commit comments

Comments
 (0)