Skip to content

Commit 5ac66e7

Browse files
Latest changes to aadpt to upstream master
Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
1 parent 9aa5533 commit 5ac66e7

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

examples/offline_inference/prithvi_geospatial_mae.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ def __init__(self):
143143
self.model = LLM(
144144
model=os.path.join(os.path.dirname(__file__), "./model"),
145145
skip_tokenizer_init=True,
146-
dtype="float32",
146+
dtype="float16",
147+
enforce_eager=True
147148
)
148149

149150
def run(self, input_data, location_coords):

vllm/v1/engine/core.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,17 @@ def _initialize_kv_caches(
134134
self, vllm_config: VllmConfig) -> tuple[int, int, KVCacheConfig]:
135135
start = time.time()
136136

137-
# Get all kv cache needed by the model
138-
kv_cache_specs = self.model_executor.get_kv_cache_specs()
137+
#TODO: CP start from here
138+
if vllm_config.model_config.is_attention_free:
139+
# No need for initializing anything related to KV cache if the model
140+
# is attention free.
141+
kv_cache_specs = []
142+
kv_cache_configs = [
143+
KVCacheConfig(num_blocks=0, kv_cache_tensors={}, kv_cache_groups=[])
144+
]
145+
else:
146+
# Get all kv cache needed by the model
147+
kv_cache_specs = self.model_executor.get_kv_cache_specs()
139148

140149
# Profiles the peak memory usage of the model to determine how much
141150
# memory can be allocated for kv cache.

0 commit comments

Comments
 (0)