Some reformatting to make the pre-commit hooks succeed

christian-pinto · christian-pinto · commit c7f80c0812a5 · 2025-06-25T11:38:59.000Z
Signed-off-by: Christian Pinto &lt;christian.pinto@ibm.com&gt;
diff --git a/examples/offline_inference/prithvi_geospatial_mae.py b/examples/offline_inference/prithvi_geospatial_mae.py
@@ -144,7 +144,7 @@ def __init__(self):
             model=os.path.join(os.path.dirname(__file__), "./model"),
             skip_tokenizer_init=True,
             dtype="float16",
-            enforce_eager=True
+            enforce_eager=True,
         )
 
     def run(self, input_data, location_coords):
diff --git a/tests/models/multimodal/pooling/test_prithvi_mae.py b/tests/models/multimodal/pooling/test_prithvi_mae.py
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+import torch
+
+from ....conftest import VllmRunner
+
+def generate_test_mm_data():
+    mm_data = {
+        "pixel_values": torch.full((6, 512, 512), 1.0, dtype=torch.float16),
+        "location_coords": torch.full((1, 2), 1.0, dtype=torch.float16),
+    }
+    return mm_data
+     
+def _run_test(
+    vllm_runner: type[VllmRunner],
+    model: str,
+) -> None:   
+
+    mm_data = generate_test_mm_data()
+    prompt = {
+        # This model deals with no text input
+        "prompt_token_ids": [1],
+        "multi_modal_data": mm_data
+    }
+    with vllm_runner(model, task="embed",
+                    dtype=torch.float16,
+                    enforce_eager=True,
+                    skip_tokenizer_init=True) as vllm_model:
+        output = vllm_model.encode(prompt)
+            
+MODELS=["christian-pinto/Prithvi-EO-2.0-300M-TL-VLLM"]
+@pytest.mark.parametrize("model", MODELS)
+def test_models_image(
+    hf_runner,
+    vllm_runner,
+    image_assets,
+    model: str,
+) -> None:
+    _run_test(
+        vllm_runner,
+        model,
+    )
diff --git a/vllm/config.py b/vllm/config.py
@@ -614,8 +614,10 @@ def __post_init__(self) -> None:
         self.served_model_name = get_served_model_name(self.model,
                                                        self.served_model_name)
         self.multimodal_config = self._init_multimodal_config()
-        self.is_pooling_model = self.registry.is_pooling_model(self.architectures)
-        self.model_supports_multimodal_raw_input = self._init_model_supports_multimodal_raw_input()
+        self.is_pooling_model = self.registry.is_pooling_model(
+            self.architectures)
+        self.model_supports_multimodal_raw_input = (
+            self._init_model_supports_multimodal_raw_input())
         if not self.skip_tokenizer_init:
             self._verify_tokenizer_mode()
 
diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
@@ -120,6 +120,7 @@ def supports_multimodal(
 
     return isinstance(model, SupportsMultiModal)
 
+
 @runtime_checkable
 class SupportsMultiModalWithRawInput(SupportsMultiModal, Protocol):
     """The interface required for all multi-modal models."""
@@ -134,29 +135,34 @@ class SupportsMultiModalWithRawInput(SupportsMultiModal, Protocol):
         MRO of your model class.
     """
 
+
 @runtime_checkable
 class _SupportsMultiModalWithRawInput(Protocol):
     supports_multimodal_raw_input: ClassVar[Literal[True]]
 
 
 @overload
-def supports_multimodal_raw_input(model: object) -> TypeIs[SupportsMultiModalWithRawInput]:
+def supports_multimodal_raw_input(
+        model: object) -> TypeIs[SupportsMultiModalWithRawInput]:
     ...
 
 
 @overload
-def supports_multimodal_raw_input(model: type[object]) -> TypeIs[type[SupportsMultiModalWithRawInput]]:
+def supports_multimodal_raw_input(
+        model: type[object]) -> TypeIs[type[SupportsMultiModalWithRawInput]]:
     ...
 
 
 def supports_multimodal_raw_input(
     model: Union[type[object], object]
-) -> Union[TypeIs[type[SupportsMultiModalWithRawInput]], TypeIs[SupportsMultiModalWithRawInput]]:
+) -> Union[TypeIs[type[SupportsMultiModalWithRawInput]],
+           TypeIs[SupportsMultiModalWithRawInput]]:
     if isinstance(model, type):
         return isinstance(model, _SupportsMultiModalWithRawInput)
 
     return isinstance(model, SupportsMultiModalWithRawInput)
 
+
 @runtime_checkable
 class SupportsLoRA(Protocol):
     """The interface required for all models that support LoRA."""
diff --git a/vllm/model_executor/models/prithvi_geospatial_mae.py b/vllm/model_executor/models/prithvi_geospatial_mae.py
@@ -25,13 +25,14 @@
 
 from vllm.config import VllmConfig
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
-from vllm.model_executor.models.interfaces import (IsAttentionFree,
-                                                   SupportsMultiModalWithRawInput)
+from vllm.model_executor.models.interfaces import (
+    IsAttentionFree, SupportsMultiModalWithRawInput)
 from vllm.model_executor.models.utils import AutoWeightsLoader
 from vllm.model_executor.pooling_metadata import PoolingMetadata
 from vllm.multimodal import MULTIMODAL_REGISTRY
-from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalFieldElem,
-                                    MultiModalInputs, MultiModalKwargs, MultiModalKwargsItem,
+from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
+                                    MultiModalFieldElem, MultiModalInputs,
+                                    MultiModalKwargs, MultiModalKwargsItem,
                                     MultiModalSharedField, PlaceholderRange)
 from vllm.multimodal.parse import MultiModalDataItems
 from vllm.multimodal.processing import (BaseMultiModalProcessor,
@@ -62,7 +63,8 @@ def get_dummy_mm_data(
         # The size of pixel_values might change in the cases where we resize
         # the input but never exceeds the dimensions below.
         return {
-            "pixel_values": torch.full((6, 512, 512), 1.0, dtype=torch.float16),
+            "pixel_values": torch.full((6, 512, 512), 1.0,
+                                       dtype=torch.float16),
             "location_coords": torch.full((1, 2), 1.0, dtype=torch.float16),
         }
 
@@ -75,8 +77,10 @@ def _get_mm_fields_config(
         hf_processor_mm_kwargs: Mapping[str, object],
     ) -> Mapping[str, MultiModalFieldConfig]:
         return dict(
-            pixel_values=MultiModalFieldConfig.shared(batch_size=1, modality="image"),
-            location_coords=MultiModalFieldConfig.shared(batch_size=1, modality="image"),
+            pixel_values=MultiModalFieldConfig.shared(batch_size=1,
+                                                      modality="image"),
+            location_coords=MultiModalFieldConfig.shared(batch_size=1,
+                                                         modality="image"),
         )
 
     def _get_prompt_updates(
@@ -98,15 +102,16 @@ def apply(
 
         for k, v in mm_data.items():
             mm_kwargs[k] = v
-        mm_place_holders = {
-            "image": [PlaceholderRange(offset=0, length=0)]
-        }
+        mm_place_holders = {"image": [PlaceholderRange(offset=0, length=0)]}
 
         multimodal_kwargs_items = [
-            MultiModalKwargsItem.from_elems(
-                [MultiModalFieldElem(modality="image", key=key, data=data, field=MultiModalSharedField(1))
-                 for key, data in mm_kwargs.items()]
-            )
+            MultiModalKwargsItem.from_elems([
+                MultiModalFieldElem(modality="image",
+                                    key=key,
+                                    data=data,
+                                    field=MultiModalSharedField(1))
+                for key, data in mm_kwargs.items()
+            ])
         ]
 
         return MultiModalInputs(
@@ -123,7 +128,8 @@ def apply(
     PrithviGeoSpatialMAEMultiModalProcessor,
     info=PrithviGeoSpatialMAEProcessingInfo,
     dummy_inputs=PrithviGeoSpatialMAEInputBuilder)
-class PrithviGeoSpatialMAE(nn.Module, IsAttentionFree, SupportsMultiModalWithRawInput):
+class PrithviGeoSpatialMAE(nn.Module, IsAttentionFree,
+                           SupportsMultiModalWithRawInput):
     """ Prithvi Masked Autoencoder"""
 
     def _instantiate_model(self, config: dict) -> Optional[nn.Module]:
@@ -181,13 +187,14 @@ def _parse_and_validate_multimodal_data(
             location_coords = None
 
         return pixel_values, location_coords
-    
+
     def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
-        # We do not really use any input tokens and therefore no embeddings to be calculated
-        # However, due to the mandatory token ids in the input prompt we pass one token and the
-        # size of the dummy embedding tensors must reflect that.
+        # We do not really use any input tokens and therefore no embeddings
+        # to be calculated. However, due to the mandatory token ids in
+        # the input prompt we pass one token and the size of the dummy
+        #  embedding tensors must reflect that.
         return torch.empty(input_ids.shape)
-    
+
     def forward(
         self,
         input_ids: Optional[torch.Tensor],
@@ -209,7 +216,10 @@ def pooler(
         hidden_states: torch.Tensor,
         pooling_metadata: PoolingMetadata,
     ) -> Optional[PoolerOutput]:
-        return PoolerOutput([PoolingSequenceGroupOutput(hidden_state) for hidden_state in hidden_states])
+        return PoolerOutput([
+            PoolingSequenceGroupOutput(hidden_state)
+            for hidden_state in hidden_states
+        ])
 
     def load_weights(self, weights: Iterable[tuple[str,
                                                    torch.Tensor]]) -> set[str]:
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
@@ -24,8 +24,7 @@
 from .interfaces import (has_inner_state, has_noops, is_attention_free,
                          is_hybrid, supports_cross_encoding,
                          supports_multimodal, supports_multimodal_raw_input,
-                         supports_pp, supports_transcription,
-                         supports_v0_only)
+                         supports_pp, supports_transcription, supports_v0_only)
 from .interfaces_base import is_text_generation_model
 
 logger = init_logger(__name__)
@@ -522,7 +521,7 @@ def is_multimodal_model(
     ) -> bool:
         model_cls, _ = self.inspect_model_cls(architectures)
         return model_cls.supports_multimodal
-    
+
     def supports_multimodal_raw_input(
         self,
         architectures: Union[str, list[str]],
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
@@ -63,7 +63,9 @@ def new_empty(self) -> "KVCacheBlocks":
         """Creates a new KVCacheBlocks instance with no blocks."""
         return KVCacheBlocks(tuple([] for _ in range(len(self.blocks))))
 
+
 class DummyKVCacheManager:
+
     @property
     def usage(self) -> float:
         return 0.0
@@ -73,7 +75,7 @@ def make_prefix_cache_stats(self) -> Optional[PrefixCacheStats]:
 
     def get_computed_blocks(self,
                             request: Request) -> tuple[KVCacheBlocks, int]:
-        return(KVCacheBlocks([]), 0)
+        return (KVCacheBlocks([]), 0)
 
     def allocate_slots(
         self,
@@ -111,6 +113,15 @@ def get_block_ids(self, request_id: str) -> list[list[int]]:
         """Get the block ids of a request."""
         return []
 
+    def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
+        """Cache the blocks for the request, if enabled."""
+        pass
+
+    def create_empty_block_list(self) -> KVCacheBlocks:
+        """Creates a new KVCacheBlocks instance with no blocks."""
+        return (KVCacheBlocks([]), 0)
+
+
 class KVCacheManager:
 
     def __init__(
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
@@ -18,7 +18,7 @@
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
 from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager,
                                                 compute_encoder_budget)
-from vllm.v1.core.kv_cache_manager import KVCacheBlocks, KVCacheManager, DummyKVCacheManager
+from vllm.v1.core.kv_cache_manager import DummyKVCacheManager, KVCacheManager
 from vllm.v1.core.sched.interface import SchedulerInterface
 from vllm.v1.core.sched.output import (CachedRequestData, NewRequestData,
                                        SchedulerOutput)
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -139,23 +139,26 @@ def _initialize_kv_caches(
             # is attention free.
             kv_cache_specs = []
             kv_cache_configs = [
-                    KVCacheConfig(num_blocks=0, kv_cache_tensors={}, kv_cache_groups=[])
-                ]
+                KVCacheConfig(num_blocks=0,
+                              kv_cache_tensors={},
+                              kv_cache_groups=[])
+            ]
         else:
             # Get all kv cache needed by the model
             kv_cache_specs = self.model_executor.get_kv_cache_specs()
 
             # Profiles the peak memory usage of the model to determine how much
             # memory can be allocated for kv cache.
-            available_gpu_memory = self.model_executor.determine_available_memory()
+            available_gpu_memory = (
+                self.model_executor.determine_available_memory())
 
             assert len(kv_cache_specs) == len(available_gpu_memory)
             # Get the kv cache tensor size
             kv_cache_configs = [
                 get_kv_cache_config(vllm_config, kv_cache_spec_one_worker,
                                     available_gpu_memory_one_worker)
-                for kv_cache_spec_one_worker, available_gpu_memory_one_worker in
-                zip(kv_cache_specs, available_gpu_memory)
+                for kv_cache_spec_one_worker, available_gpu_memory_one_worker
+                in zip(kv_cache_specs, available_gpu_memory)
             ]
 
             # Since we use a shared centralized controller, we need the
@@ -194,7 +197,6 @@ def add_request(self, request: EngineCoreRequest):
             request.mm_inputs = self.mm_input_cache_server.get_and_update_p1(
                 request.mm_inputs, request.mm_hashes)
 
-
         req = Request.from_engine_core_request(request)
         if req.use_structured_output:
             # Start grammar compilation asynchronously
diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py
@@ -82,8 +82,7 @@ def __init__(
             self.dp_group = None
         self.should_execute_dummy_batch = False
 
-        
-        if not self.vllm_config.model_config.skip_tokenizer_init:     
+        if not self.vllm_config.model_config.skip_tokenizer_init:
             # Tokenizer (+ ensure liveness if running in another process).
             self.tokenizer = init_tokenizer_from_configs(
                 model_config=vllm_config.model_config,
diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py
@@ -330,14 +330,13 @@ def add_request(
         tokenizer = None if not self.tokenizer else \
             self.tokenizer.get_lora_tokenizer(request.lora_request)
 
-        req_state = RequestState.from_new_request(
-            tokenizer=tokenizer,
-            request=request,
-            prompt=prompt,
-            parent_req=parent_req,
-            request_index=request_index,
-            queue=queue,
-            log_stats=self.log_stats)
+        req_state = RequestState.from_new_request(tokenizer=tokenizer,
+                                                  request=request,
+                                                  prompt=prompt,
+                                                  parent_req=parent_req,
+                                                  request_index=request_index,
+                                                  queue=queue,
+                                                  log_stats=self.log_stats)
         self.request_states[request_id] = req_state
         self.lora_states.add_request(req_state)
         if parent_req:
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
@@ -379,7 +379,8 @@ def _validate_model_input(
         if tokenizer:
             max_input_id = max(prompt_ids, default=0)
             if max_input_id > tokenizer.max_token_id:
-                raise ValueError(f"Token id {max_input_id} is out of vocabulary")
+                raise ValueError(
+                    f"Token id {max_input_id} is out of vocabulary")
 
         max_prompt_len = self.model_config.max_model_len
         if len(prompt_ids) > max_prompt_len:
diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py
@@ -46,8 +46,8 @@ def __init__(self, vllm_config: VllmConfig):
         # compilation, so we set it to half the number of CPUs.
         max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2)
         self.executor = ThreadPoolExecutor(max_workers=max_workers)
-        self.tokenizer = None if vllm_config.model_config.skip_tokenizer_init else \
-            init_tokenizer_from_configs(
+        self.tokenizer = None if vllm_config.model_config.skip_tokenizer_init \
+            else init_tokenizer_from_configs(
                 model_config=self.vllm_config.model_config,
                 scheduler_config=self.vllm_config.scheduler_config,
                 lora_config=self.vllm_config.lora_config,
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ def __init__(self):`
`144`	`144`	`model=os.path.join(os.path.dirname(__file__), "./model"),`
`145`	`145`	`skip_tokenizer_init=True,`
`146`	`146`	`dtype="float16",`
`147`		`- enforce_eager=True`
	`147`	`+ enforce_eager=True,`
`148`	`148`	`)`
`149`	`149`
`150`	`150`	`def run(self, input_data, location_coords):`