Skip to content

Commit f460983

Browse files
authored
[Bugfix] Fix Mistral3 support on SM100/SM120 (#20998)
Signed-off-by: mgoin <mgoin64@gmail.com>
1 parent e9534c7 commit f460983

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

vllm/model_executor/models/pixtral.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
PromptReplacement, PromptUpdate,
4444
PromptUpdateDetails)
4545
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
46+
from vllm.platforms import current_platform
4647
from vllm.sequence import IntermediateTensors
4748
from vllm.transformers_utils.tokenizer import (MistralTokenizer,
4849
cached_tokenizer_from_config)
@@ -54,7 +55,12 @@
5455

5556
try:
5657
from xformers import ops as xops
57-
USE_XFORMERS_OPS = True
58+
if (current_platform.is_cuda()
59+
and current_platform.has_device_capability(100)):
60+
# Xformers FA is not compatible with B200
61+
USE_XFORMERS_OPS = False
62+
else:
63+
USE_XFORMERS_OPS = True
5864
except ImportError:
5965
USE_XFORMERS_OPS = False
6066

@@ -1082,7 +1088,6 @@ def forward(
10821088
# Transpose q and k back for attention
10831089
q = q.transpose(1, 2).contiguous()
10841090
k = k.transpose(1, 2).contiguous()
1085-
10861091
out = xops.memory_efficient_attention(q,
10871092
k,
10881093
v,

0 commit comments

Comments
 (0)