File tree Expand file tree Collapse file tree 1 file changed +5
-7
lines changed
vllm/v1/attention/backends/mla Expand file tree Collapse file tree 1 file changed +5
-7
lines changed Original file line number Diff line number Diff line change 208
208
UnquantizedLinearMethod )
209
209
from vllm .platforms import current_platform
210
210
from vllm .utils import cdiv , round_down
211
- from vllm .v1 .attention .backends .utils import (AttentionMetadataBuilder ,
212
- CommonAttentionMetadata ,
213
- get_per_layer_parameters ,
214
- infer_global_hyperparameters ,
215
- reoder_batch_to_split_decodes_and_prefills ,
216
- split_decodes_and_prefills )
211
+ from vllm .v1 .attention .backends .utils import (
212
+ AttentionMetadataBuilder , CommonAttentionMetadata ,
213
+ get_per_layer_parameters , infer_global_hyperparameters ,
214
+ reoder_batch_to_split_decodes_and_prefills , split_decodes_and_prefills )
217
215
from vllm .v1 .kv_cache_interface import AttentionSpec
218
216
219
217
try :
@@ -681,7 +679,7 @@ def build(self,
681
679
decode = decode_metadata ,
682
680
)
683
681
684
- if self ._use_fi_prefill and self . _num_prefills > 0 :
682
+ if self ._use_fi_prefill and num_prefills > 0 :
685
683
assert isinstance (attn_metadata .prefill , FlashInferPrefillMetadata )
686
684
self ._build_fi_prefill_wrappers (attn_metadata .prefill )
687
685
You can’t perform that action at this time.
0 commit comments