We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 00f9b31 commit 385f1d8Copy full SHA for 385f1d8
vllm/v1/attention/backends/flashinfer.py
@@ -8,11 +8,12 @@
8
9
import torch
10
11
-import vllm.envs as envs
12
from flashinfer import (BatchDecodeWithPagedKVCacheWrapper,
13
BatchPrefillWithPagedKVCacheWrapper,
14
MultiLevelCascadeAttentionWrapper)
15
from flashinfer.decode import trtllm_batch_decode_with_kv_cache
+
16
+import vllm.envs as envs
17
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
18
AttentionType)
19
from vllm.config import VllmConfig
0 commit comments