Skip to content

Commit 7e2df9e

Browse files
latest changes to align with the original branch
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
1 parent 06a51bd commit 7e2df9e

File tree

3 files changed

+22
-20
lines changed

3 files changed

+22
-20
lines changed

vllm/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ def __post_init__(self) -> None:
614614
self.served_model_name = get_served_model_name(self.model,
615615
self.served_model_name)
616616
self.multimodal_config = self._init_multimodal_config()
617+
self.is_pooling_model = self.registry.is_pooling_model(self.architectures)
617618
self.model_supports_multimodal_raw_input = self._init_model_supports_multimodal_raw_input()
618619
if not self.skip_tokenizer_init:
619620
self._verify_tokenizer_mode()

vllm/model_executor/models/prithvi_geospatial_mae.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def get_dummy_mm_data(
6262
# The size of pixel_values might change in the cases where we resize
6363
# the input but never exceeds the dimensions below.
6464
return {
65-
"pixel_values": torch.full((1, 6, 512, 512), 1.0, dtype=torch.float16),
65+
"pixel_values": torch.full((6, 512, 512), 1.0, dtype=torch.float16),
6666
"location_coords": torch.full((1, 2), 1.0, dtype=torch.float16),
6767
}
6868

@@ -170,7 +170,7 @@ def _parse_and_validate_multimodal_data(
170170
if not isinstance(pixel_values, torch.Tensor):
171171
raise ValueError(f"Incorrect type of pixel_values. "
172172
f"Got type: {type(pixel_values)}")
173-
pixel_values = torch.unbind(pixel_values, dim=0)[0]
173+
# pixel_values = torch.unbind(pixel_values, dim=0)[0]
174174

175175
location_coords = kwargs.pop("location_coords", None)
176176
if not isinstance(location_coords, torch.Tensor):
@@ -209,7 +209,7 @@ def pooler(
209209
hidden_states: torch.Tensor,
210210
pooling_metadata: PoolingMetadata,
211211
) -> Optional[PoolerOutput]:
212-
return PoolerOutput([PoolingSequenceGroupOutput(hidden_states[0])])
212+
return PoolerOutput([PoolingSequenceGroupOutput(hidden_state) for hidden_state in hidden_states])
213213

214214
def load_weights(self, weights: Iterable[tuple[str,
215215
torch.Tensor]]) -> set[str]:

vllm/v1/worker/gpu_model_runner.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def __init__(
122122
cache_config.cache_dtype]
123123

124124
self.is_multimodal_model = model_config.is_multimodal_model
125+
self.is_pooling_model = model_config.is_pooling_model
125126
self.model_supports_multimodal_raw_input = model_config.model_supports_multimodal_raw_input
126127
self.max_model_len = model_config.max_model_len
127128
self.max_num_tokens = scheduler_config.max_num_batched_tokens
@@ -550,10 +551,11 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
550551
batch_reordered = self._may_reorder_batch(scheduler_output)
551552

552553
if batch_changed or batch_reordered:
553-
self.input_batch.refresh()
554+
self.input_batch.refresh_sampling_metadata()
554555

555556
def _add_multimodal_inputs_to_model_args(self, model_kwargs: dict[str, Any],
556-
scheduler_output: "SchedulerOutput"):
557+
scheduler_output: "SchedulerOutput",
558+
num_reqs: int=-1):
557559
# Multi-modal data.
558560
if scheduler_output:
559561
multi_modal_kwargs_list = []
@@ -565,21 +567,20 @@ def _add_multimodal_inputs_to_model_args(self, model_kwargs: dict[str, Any],
565567
multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
566568
else:
567569
# The only case where SchedulerOtput is None is for a dummy run, let's get some dummy data.
568-
dummy_data = self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1)
569-
multi_modal_kwargs = MultiModalKwargs.batch([dummy_data.multi_modal_data])
570+
dummy_data = [self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1).multi_modal_data for i in range(num_reqs)]
571+
# dummy_data = self.mm_registry.get_decoder_dummy_data(model_config=self.model_config, seq_len =1)
572+
# multi_modal_kwargs = MultiModalKwargs.batch([dummy_data.multi_modal_data])
573+
multi_modal_kwargs = MultiModalKwargs.batch(dummy_data)
570574

571575
model_kwargs.update(multi_modal_kwargs)
572576

573-
def _maybe_add_model_args(self, num_tokens: int,
577+
def _maybe_add_multimodal_kwargs(self,
574578
model_kwargs: dict[str,Any],
575-
scheduler_output: "SchedulerOutput"=None):
576-
577-
if self.supports_token_type_ids:
578-
model_kwargs["token_type_ids"] =\
579-
self.get_token_type_ids()[:num_tokens]
579+
scheduler_output: "SchedulerOutput"=None,
580+
num_reqs: int=-1):
580581

581582
if self.model_supports_multimodal_raw_input:
582-
self._add_multimodal_inputs_to_model_args(model_kwargs, scheduler_output)
583+
self._add_multimodal_inputs_to_model_args(model_kwargs, scheduler_output, num_reqs)
583584

584585
def _maybe_compute_attn_prefix(
585586
self,
@@ -1344,15 +1345,15 @@ def execute_model(
13441345
mm_embeds = self._gather_mm_embeddings(scheduler_output)
13451346
else:
13461347
mm_embeds = []
1347-
1348+
1349+
model_kwargs: dict[str, Any] = {}
13481350
if self.is_multimodal_model and get_pp_group().is_first_rank:
13491351
# NOTE(woosuk): To unify token ids and soft tokens (vision
13501352
# embeddings), we always use embeddings (rather than token ids)
13511353
# as input to the multimodal model, even when the input is text.
13521354
input_ids = self.input_ids[:num_scheduled_tokens]
1353-
self._maybe_add_model_args(num_scheduled_tokens,
1354-
model_kwargs, scheduler_output)
1355-
1355+
self._maybe_add_multimodal_kwargs(model_kwargs=model_kwargs,
1356+
scheduler_output=scheduler_output)
13561357
if mm_embeds:
13571358
inputs_embeds = self.model.get_input_embeddings(
13581359
input_ids, mm_embeds)
@@ -1368,7 +1369,6 @@ def execute_model(
13681369
# multimodal models, it is not desirable for performance since
13691370
# then the embedding layer is not included in the CUDA graph.
13701371
input_ids = self.input_ids[:num_input_tokens]
1371-
self._maybe_add_model_args(num_input_tokens, model_kwargs, scheduler_output)
13721372
inputs_embeds = None
13731373
if self.uses_mrope:
13741374
positions = self.mrope_positions[:, :num_input_tokens]
@@ -1994,8 +1994,9 @@ def _dummy_run(
19941994
num_scheduled_tokens):
19951995
model = self.model
19961996
model_kwargs: dict[str, Any] = {}
1997-
self._maybe_add_model_args(num_tokens, model_kwargs)
19981997
if self.is_multimodal_model:
1998+
self._maybe_add_multimodal_kwargs(model_kwargs=model_kwargs,
1999+
num_reqs=num_reqs)
19992000
input_ids = None
20002001
inputs_embeds = self.inputs_embeds[:num_tokens]
20012002
else:

0 commit comments

Comments
 (0)