Skip to content

Commit 475f5fc

Browse files
huachenheliChen-zexi
authored andcommitted
[Misc] Small: Remove global media connector. Each test should have its own test connector object. (vllm-project#20395)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
1 parent d07c399 commit 475f5fc

File tree

1 file changed

+57
-9
lines changed

1 file changed

+57
-9
lines changed

vllm/multimodal/utils.py

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ def __init__(
4343
*,
4444
allowed_local_media_path: str = "",
4545
) -> None:
46+
"""
47+
Args:
48+
media_io_kwargs: Additional args passed to process media
49+
inputs, keyed by modalities. For example,
50+
to set num_frames for video, set
51+
`--media-io-kwargs '{"video": {"num_frames": 40} }'`
52+
connection: HTTP connection client to download media contents.
53+
allowed_local_media_path: A local directory to load media files from.
54+
"""
4655
super().__init__()
4756

4857
self.media_io_kwargs: dict[str, dict[
@@ -277,15 +286,6 @@ def fetch_image_embedding(
277286
return image_embedding_io.load_base64("", data)
278287

279288

280-
global_media_connector = MediaConnector()
281-
"""The global [`MediaConnector`][vllm.multimodal.utils.MediaConnector]
282-
instance used by vLLM."""
283-
284-
fetch_audio = global_media_connector.fetch_audio
285-
fetch_image = global_media_connector.fetch_image
286-
fetch_video = global_media_connector.fetch_video
287-
288-
289289
def encode_audio_base64(
290290
audio: np.ndarray,
291291
sampling_rate: float,
@@ -441,3 +441,51 @@ def run_dp_sharded_vision_model(image_input: torch.Tensor,
441441
dim=0)
442442
vision_embeddings = vision_embeddings[:num_chunks, ...]
443443
return vision_embeddings
444+
445+
446+
def fetch_audio(
447+
audio_url: str,
448+
audio_io_kwargs: Optional[dict[str, Any]] = None,
449+
) -> tuple[np.ndarray, Union[int, float]]:
450+
"""
451+
Args:
452+
audio_url: URL of the audio file to fetch.
453+
audio_io_kwargs: Additional kwargs passed to handle audio IO.
454+
"""
455+
media_io_kwargs = None if not audio_io_kwargs else {
456+
"audio": audio_io_kwargs
457+
}
458+
media_connector = MediaConnector(media_io_kwargs=media_io_kwargs)
459+
return media_connector.fetch_audio(audio_url)
460+
461+
462+
def fetch_image(
463+
image_url: str,
464+
image_io_kwargs: Optional[dict[str, Any]] = None,
465+
) -> Image.Image:
466+
"""
467+
Args:
468+
image_url: URL of the image file to fetch.
469+
image_io_kwargs: Additional kwargs passed to handle image IO.
470+
"""
471+
media_io_kwargs = None if not image_io_kwargs else {
472+
"image": image_io_kwargs
473+
}
474+
media_connector = MediaConnector(media_io_kwargs=media_io_kwargs)
475+
return media_connector.fetch_image(image_url)
476+
477+
478+
def fetch_video(
479+
video_url: str,
480+
video_io_kwargs: Optional[dict[str, Any]] = None,
481+
) -> tuple[npt.NDArray, dict[str, Any]]:
482+
"""
483+
Args:
484+
video_url: URL of the video file to fetch.
485+
video_io_kwargs: Additional kwargs passed to handle video IO.
486+
"""
487+
media_io_kwargs = None if not video_io_kwargs else {
488+
"video": video_io_kwargs
489+
}
490+
media_connector = MediaConnector(media_io_kwargs=media_io_kwargs)
491+
return media_connector.fetch_video(video_url)

0 commit comments

Comments
 (0)