|
16 | 16 | has_kv_transfer_group,
|
17 | 17 | is_v1_kv_transfer_group)
|
18 | 18 | from vllm.forward_context import ForwardContext, get_forward_context
|
| 19 | +from vllm.logger import init_logger |
19 | 20 | from vllm.model_executor.layers.linear import UnquantizedLinearMethod
|
20 | 21 | from vllm.model_executor.layers.quantization.base_config import (
|
21 | 22 | QuantizationConfig)
|
22 | 23 | from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod
|
23 | 24 | from vllm.platforms import _Backend, current_platform
|
24 | 25 | from vllm.utils import direct_register_custom_op
|
25 | 26 |
|
| 27 | +logger = init_logger(__name__) |
| 28 | +USE_XFORMERS_OPS = None |
| 29 | + |
| 30 | + |
| 31 | +def check_xformers_availability(): |
| 32 | + global USE_XFORMERS_OPS |
| 33 | + if USE_XFORMERS_OPS is not None: |
| 34 | + return USE_XFORMERS_OPS |
| 35 | + |
| 36 | + if current_platform.is_cuda() and current_platform.has_device_capability( |
| 37 | + 100): |
| 38 | + # Xformers FA is not compatible with B200 |
| 39 | + USE_XFORMERS_OPS = False |
| 40 | + else: |
| 41 | + try: |
| 42 | + from importlib.util import find_spec |
| 43 | + |
| 44 | + find_spec("xformers.ops") |
| 45 | + USE_XFORMERS_OPS = True |
| 46 | + except ImportError: |
| 47 | + USE_XFORMERS_OPS = False |
| 48 | + |
| 49 | + # the warning only needs to be shown once |
| 50 | + if not USE_XFORMERS_OPS: |
| 51 | + logger.warning("Xformers is not available, falling back.") |
| 52 | + |
| 53 | + return USE_XFORMERS_OPS |
| 54 | + |
26 | 55 |
|
27 | 56 | class Attention(nn.Module):
|
28 | 57 | """Attention layer.
|
@@ -314,6 +343,10 @@ def __init__(
|
314 | 343 | _Backend.TORCH_SDPA, _Backend.XFORMERS, _Backend.PALLAS_VLLM_V1
|
315 | 344 | } else _Backend.TORCH_SDPA
|
316 | 345 |
|
| 346 | + if (self.attn_backend == _Backend.XFORMERS |
| 347 | + and not check_xformers_availability()): |
| 348 | + self.attn_backend = _Backend.TORCH_SDPA |
| 349 | + |
317 | 350 | def forward(
|
318 | 351 | self,
|
319 | 352 | query: torch.Tensor,
|
|
0 commit comments