Skip to content

Commit 9aa5533

Browse files
latest changes to align with the original branch
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
1 parent 9a06b55 commit 9aa5533

File tree

3 files changed

+21
-19
lines changed

3 files changed

+21
-19
lines changed

vllm/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ def __post_init__(self) -> None:
642642
self.original_max_model_len = self.max_model_len
643643
self.max_model_len = self.get_and_verify_max_len(self.max_model_len)
644644
self.multimodal_config = self._init_multimodal_config()
645+
self.is_pooling_model = self.registry.is_pooling_model(self.architectures)
645646
self.model_supports_multimodal_raw_input = self._init_model_supports_multimodal_raw_input()
646647
if not self.skip_tokenizer_init:
647648
self._verify_tokenizer_mode()

vllm/model_executor/models/prithvi_geospatial_mae.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def get_dummy_mm_data(
6262
# The size of pixel_values might change in the cases where we resize
6363
# the input but never exceeds the dimensions below.
6464
return {
65-
"pixel_values": torch.full((1, 6, 512, 512), 1.0, dtype=torch.float16),
65+
"pixel_values": torch.full((6, 512, 512), 1.0, dtype=torch.float16),
6666
"location_coords": torch.full((1, 2), 1.0, dtype=torch.float16),
6767
}
6868

@@ -178,7 +178,7 @@ def _parse_and_validate_multimodal_data(
178178
if not isinstance(pixel_values, torch.Tensor):
179179
raise ValueError(f"Incorrect type of pixel_values. "
180180
f"Got type: {type(pixel_values)}")
181-
pixel_values = torch.unbind(pixel_values, dim=0)[0]
181+
# pixel_values = torch.unbind(pixel_values, dim=0)[0]
182182

183183
location_coords = kwargs.pop("location_coords", None)
184184
if not isinstance(location_coords, torch.Tensor):
@@ -217,7 +217,7 @@ def pooler(
217217
hidden_states: torch.Tensor,
218218
pooling_metadata: PoolingMetadata,
219219
) -> Optional[PoolerOutput]:
220-
return PoolerOutput([PoolingSequenceGroupOutput(hidden_states[0])])
220+
return PoolerOutput([PoolingSequenceGroupOutput(hidden_state) for hidden_state in hidden_states])
221221

222222
def load_weights(self, weights: Iterable[tuple[str,
223223
torch.Tensor]]) -> set[str]:

vllm/v1/worker/gpu_model_runner.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def __init__(
123123
cache_config.cache_dtype]
124124

125125
self.is_multimodal_model = model_config.is_multimodal_model
126+
self.is_pooling_model = model_config.is_pooling_model
126127
self.model_supports_multimodal_raw_input = model_config.model_supports_multimodal_raw_input
127128
self.max_model_len = model_config.max_model_len
128129
self.max_num_tokens = scheduler_config.max_num_batched_tokens
@@ -560,7 +561,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
560561
self.input_batch.refresh_metadata()
561562

562563
def _add_multimodal_inputs_to_model_args(self, model_kwargs: dict[str, Any],
563-
scheduler_output: "SchedulerOutput"):
564+
scheduler_output: "SchedulerOutput",
565+
num_reqs: int=-1):
564566
# Multi-modal data.
565567
if scheduler_output:
566568
multi_modal_kwargs_list = []
@@ -572,21 +574,20 @@ def _add_multimodal_inputs_to_model_args(self, model_kwargs: dict[str, Any],
572574
multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
573575
else:
574576
# The only case where SchedulerOtput is None is for a dummy run, let's get some dummy data.
575-
dummy_data = self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1)
576-
multi_modal_kwargs = MultiModalKwargs.batch([dummy_data.multi_modal_data])
577+
dummy_data = [self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1).multi_modal_data for i in range(num_reqs)]
578+
# dummy_data = self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1)
579+
# multi_modal_kwargs = MultiModalKwargs.batch([dummy_data.multi_modal_data])
580+
multi_modal_kwargs = MultiModalKwargs.batch(dummy_data)
577581

578582
model_kwargs.update(multi_modal_kwargs)
579583

580-
def _maybe_add_model_args(self, num_tokens: int,
584+
def _maybe_add_multimodal_kwargs(self,
581585
model_kwargs: dict[str,Any],
582-
scheduler_output: "SchedulerOutput"=None):
583-
584-
if self.supports_token_type_ids:
585-
model_kwargs["token_type_ids"] =\
586-
self.get_token_type_ids()[:num_tokens]
586+
scheduler_output: "SchedulerOutput"=None,
587+
num_reqs: int=-1):
587588

588589
if self.model_supports_multimodal_raw_input:
589-
self._add_multimodal_inputs_to_model_args(model_kwargs, scheduler_output)
590+
self._add_multimodal_inputs_to_model_args(model_kwargs, scheduler_output, num_reqs)
590591

591592
def _maybe_compute_attn_prefix(
592593
self,
@@ -1364,15 +1365,15 @@ def execute_model(
13641365
mm_embeds = self._gather_mm_embeddings(scheduler_output)
13651366
else:
13661367
mm_embeds = []
1367-
1368+
1369+
model_kwargs: dict[str, Any] = {}
13681370
if self.is_multimodal_model and get_pp_group().is_first_rank:
13691371
# NOTE(woosuk): To unify token ids and soft tokens (vision
13701372
# embeddings), we always use embeddings (rather than token ids)
13711373
# as input to the multimodal model, even when the input is text.
13721374
input_ids = self.input_ids[:num_scheduled_tokens]
1373-
self._maybe_add_model_args(num_scheduled_tokens,
1374-
model_kwargs, scheduler_output)
1375-
1375+
self._maybe_add_multimodal_kwargs(model_kwargs=model_kwargs,
1376+
scheduler_output=scheduler_output)
13761377
if mm_embeds:
13771378
inputs_embeds = self.model.get_input_embeddings(
13781379
input_ids, mm_embeds)
@@ -1388,7 +1389,6 @@ def execute_model(
13881389
# multimodal models, it is not desirable for performance since
13891390
# then the embedding layer is not included in the CUDA graph.
13901391
input_ids = self.input_ids[:num_input_tokens]
1391-
self._maybe_add_model_args(num_input_tokens, model_kwargs, scheduler_output)
13921392
inputs_embeds = None
13931393
if self.uses_mrope:
13941394
positions = self.mrope_positions[:, :num_input_tokens]
@@ -2053,8 +2053,9 @@ def _dummy_run(
20532053
num_scheduled_tokens):
20542054
model = self.model
20552055
model_kwargs: dict[str, Any] = {}
2056-
self._maybe_add_model_args(num_tokens, model_kwargs)
20572056
if self.is_multimodal_model:
2057+
self._maybe_add_multimodal_kwargs(model_kwargs=model_kwargs,
2058+
num_reqs=num_reqs)
20582059
input_ids = None
20592060
inputs_embeds = self.inputs_embeds[:num_tokens]
20602061
else:

0 commit comments

Comments
 (0)