diff --git a/vllm/v1/worker/hpu_model_runner.py b/vllm/v1/worker/hpu_model_runner.py index 3f53a42e0b7..7cea2295f41 100644 --- a/vllm/v1/worker/hpu_model_runner.py +++ b/vllm/v1/worker/hpu_model_runner.py @@ -2147,6 +2147,7 @@ def warmup_model(self) -> None: raise AssertionError("Finished profiling") kv_caches = self.kv_caches max_blocks = int(kv_caches[0][0].size(0) // self.block_size) + self.bucketing_ctx.generate_prompt_buckets() self.bucketing_ctx.generate_decode_buckets(max_blocks) if not htorch.utils.internal.is_lazy(