From 57cd0ab442637a1ab23f7425c679c1ce78d82ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kuligowski?= Date: Wed, 18 Jun 2025 13:22:10 +0200 Subject: [PATCH] Generate buckets for prompt for V1 --- vllm/v1/worker/hpu_model_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/v1/worker/hpu_model_runner.py b/vllm/v1/worker/hpu_model_runner.py index 3f53a42e0b7..7cea2295f41 100644 --- a/vllm/v1/worker/hpu_model_runner.py +++ b/vllm/v1/worker/hpu_model_runner.py @@ -2147,6 +2147,7 @@ def warmup_model(self) -> None: raise AssertionError("Finished profiling") kv_caches = self.kv_caches max_blocks = int(kv_caches[0][0].size(0) // self.block_size) + self.bucketing_ctx.generate_prompt_buckets() self.bucketing_ctx.generate_decode_buckets(max_blocks) if not htorch.utils.internal.is_lazy(