Skip to content

Commit e796669

Browse files
revert cpu metadata refactor
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
1 parent 5268d6a commit e796669

File tree

1 file changed

+4
-7
lines changed

1 file changed

+4
-7
lines changed

vllm/v1/attention/backends/cpu_attn.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
is_quantized_kv_cache)
1414
from vllm.attention.backends.utils import CommonAttentionState
1515
from vllm.logger import init_logger
16-
from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
17-
CommonAttentionMetadata)
16+
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
1817
from vllm.v1.core.sched.output import SchedulerOutput
1918
from vllm.v1.kv_cache_interface import AttentionSpec
2019
from vllm.v1.worker.block_table import BlockTable
@@ -310,7 +309,7 @@ def get_seq_len_block_table_args(
310309
raise AttributeError(f"Invalid attention type {str(attn_type)}")
311310

312311

313-
class TorchSDPAMetadataBuilderV1(AttentionMetadataBuilder[TorchSDPAMetadata]):
312+
class TorchSDPAMetadataBuilderV1:
314313

315314
def __init__(self, runner: CPUModelRunner, kv_cache_spec: AttentionSpec,
316315
block_table: BlockTable) -> None:
@@ -374,10 +373,8 @@ def reorder_batch(self, input_batch: InputBatch,
374373

375374
return True
376375

377-
def build(self,
378-
common_prefix_len: int,
379-
common_attn_metadata: CommonAttentionMetadata,
380-
fast_build: bool = False) -> TorchSDPAMetadata:
376+
def build(self, common_prefix_len: int,
377+
common_attn_metadata: CommonAttentionMetadata):
381378
num_reqs = common_attn_metadata.num_reqs
382379
num_actual_tokens = common_attn_metadata.num_actual_tokens
383380
max_query_len = common_attn_metadata.max_query_len

0 commit comments

Comments
 (0)