Skip to content

Commit 7044280

Browse files
russellbjimpang
authored andcommitted
[V1] Change return type on get_multimodal_embeddings() (vllm-project#19446)
Signed-off-by: Russell Bryant <rbryant@redhat.com>
1 parent e443a9b commit 7044280

37 files changed

+108
-103
lines changed

vllm/model_executor/models/aria.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -601,11 +601,11 @@ def _process_image_input(
601601
def get_language_model(self) -> torch.nn.Module:
602602
return self.language_model
603603

604-
def get_multimodal_embeddings(
605-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
604+
def get_multimodal_embeddings(self,
605+
**kwargs: object) -> MultiModalEmbeddings:
606606
image_input = self._parse_and_validate_image_input(**kwargs)
607607
if image_input is None:
608-
return None
608+
return []
609609
multimodal_embeddings = self._process_image_input(image_input)
610610
return multimodal_embeddings
611611

vllm/model_executor/models/aya_vision.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,11 @@ def _parse_and_validate_image_input(
406406
def get_language_model(self) -> torch.nn.Module:
407407
return self.language_model
408408

409-
def get_multimodal_embeddings(
410-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
409+
def get_multimodal_embeddings(self,
410+
**kwargs: object) -> MultiModalEmbeddings:
411411
image_input = self._parse_and_validate_image_input(**kwargs)
412412
if image_input is None:
413-
return None
413+
return []
414414

415415
return self._process_image_input(image_input, **kwargs)
416416

vllm/model_executor/models/blip2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -627,11 +627,11 @@ def _process_image_input(self,
627627
def get_language_model(self) -> torch.nn.Module:
628628
return self.language_model
629629

630-
def get_multimodal_embeddings(
631-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
630+
def get_multimodal_embeddings(self,
631+
**kwargs: object) -> MultiModalEmbeddings:
632632
image_input = self._parse_and_validate_image_input(**kwargs)
633633
if image_input is None:
634-
return None
634+
return []
635635
vision_embeddings = self._process_image_input(image_input)
636636
return vision_embeddings
637637

vllm/model_executor/models/chameleon.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -987,11 +987,11 @@ def _parse_and_validate_image_input(
987987
def get_language_model(self) -> torch.nn.Module:
988988
return self.model
989989

990-
def get_multimodal_embeddings(
991-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
990+
def get_multimodal_embeddings(self,
991+
**kwargs: object) -> MultiModalEmbeddings:
992992
image_input = self._parse_and_validate_image_input(**kwargs)
993993
if image_input is None:
994-
return None
994+
return []
995995
assert self.model.vqmodel is not None
996996
image_tokens = self.model.get_image_tokens(image_input["data"].to(
997997
self.config.torch_dtype))

vllm/model_executor/models/deepseek_vl2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -586,11 +586,11 @@ def _process_image_input(
586586
def get_language_model(self) -> torch.nn.Module:
587587
return self.language_model
588588

589-
def get_multimodal_embeddings(
590-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
589+
def get_multimodal_embeddings(self,
590+
**kwargs: object) -> MultiModalEmbeddings:
591591
image_input = self._parse_and_validate_image_input(**kwargs)
592592
if image_input is None:
593-
return None
593+
return []
594594
vision_embeddings = self._process_image_input(image_input)
595595
return vision_embeddings
596596

vllm/model_executor/models/florence2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,11 +1032,11 @@ def _process_image_input(
10321032
def get_language_model(self) -> torch.nn.Module:
10331033
return self.language_model
10341034

1035-
def get_multimodal_embeddings(
1036-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
1035+
def get_multimodal_embeddings(self,
1036+
**kwargs: object) -> MultiModalEmbeddings:
10371037
image_input = self._parse_and_validate_image_input(**kwargs)
10381038
if image_input is None:
1039-
return None
1039+
return []
10401040
vision_embeddings = self._process_image_input(image_input)
10411041
return vision_embeddings
10421042

vllm/model_executor/models/fuyu.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,11 +324,11 @@ def _process_image_input(
324324
def get_language_model(self) -> torch.nn.Module:
325325
return self.language_model
326326

327-
def get_multimodal_embeddings(
328-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
327+
def get_multimodal_embeddings(self,
328+
**kwargs: object) -> MultiModalEmbeddings:
329329
image_input = self._parse_and_validate_image_input(**kwargs)
330330
if image_input is None:
331-
return None
331+
return []
332332

333333
return self._process_image_input(image_input)
334334

vllm/model_executor/models/gemma3_mm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -568,11 +568,11 @@ def _process_image_input(
568568
def get_language_model(self) -> torch.nn.Module:
569569
return self.language_model
570570

571-
def get_multimodal_embeddings(
572-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
571+
def get_multimodal_embeddings(self,
572+
**kwargs: object) -> MultiModalEmbeddings:
573573
image_input = self._parse_and_validate_image_input(**kwargs)
574574
if image_input is None:
575-
return None
575+
return []
576576

577577
return self._process_image_input(image_input)
578578

vllm/model_executor/models/glm4v.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -593,11 +593,11 @@ def _process_image_input(
593593
def get_language_model(self) -> torch.nn.Module:
594594
return self.transformer
595595

596-
def get_multimodal_embeddings(
597-
self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
596+
def get_multimodal_embeddings(self,
597+
**kwargs: object) -> MultiModalEmbeddings:
598598
image_input = self._parse_and_validate_image_input(**kwargs)
599599
if image_input is None:
600-
return None
600+
return []
601601

602602
vision_embeddings = self._process_image_input(image_input)
603603
return vision_embeddings

vllm/model_executor/models/granite_speech.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -706,10 +706,11 @@ def _process_audio_input(
706706
def get_multimodal_embeddings(
707707
self,
708708
**kwargs: object,
709-
) -> Optional[MultiModalEmbeddings]:
709+
) -> MultiModalEmbeddings:
710710
"""Compute the audio embeddings if audio inputs are present."""
711711
audio_input = self._parse_and_validate_audio_input(**kwargs)
712712
if audio_input is None:
713+
return []
713714
return None
714715
audio_features = self._process_audio_input(audio_input)
715716
return audio_features

0 commit comments

Comments
 (0)