Skip to content

Commit 23114d3

Browse files
authored
[Misc] Warn about v0 in benchmark_paged_attn.py (#15495)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
1 parent 997c881 commit 23114d3

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

benchmarks/kernels/benchmark_paged_attention.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
import torch
88

99
from vllm import _custom_ops as ops
10+
from vllm.logger import init_logger
1011
from vllm.platforms import current_platform
1112
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
1213
create_kv_caches_with_random)
1314

15+
logger = init_logger(__name__)
16+
1417
NUM_BLOCKS = 128 * 1024
1518
PARTITION_SIZE = 512
1619
PARTITION_SIZE_ROCM = 256
@@ -193,6 +196,9 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
193196

194197

195198
if __name__ == '__main__':
199+
logger.warning("This script benchmarks the paged attention kernel. "
200+
"By default this is no longer used in vLLM inference.")
201+
196202
parser = FlexibleArgumentParser(
197203
description="Benchmark the paged attention kernel.")
198204
parser.add_argument("--version",

0 commit comments

Comments
 (0)