Skip to content

Commit a9e7a00

Browse files
committed
Fix API typo and remove FP8 on V1 restriction
1 parent 8c211e5 commit a9e7a00

File tree

2 files changed

+1
-18
lines changed

2 files changed

+1
-18
lines changed

vllm/attention/ops/prefix_prefill.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ def _fwd_kernel(Q,
3838
V,
3939
K_cache,
4040
V_cache,
41-
out_scale,
4241
B_Loc,
4342
sm_scale,
4443
k_scale,
4544
v_scale,
45+
out_scale,
4646
B_Start_Loc,
4747
B_Seqlen,
4848
x: tl.constexpr,

vllm/engine/arg_utils.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,23 +1368,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool:
13681368
recommend_to_remove=False)
13691369
return False
13701370

1371-
if current_platform.is_rocm():
1372-
from vllm.model_executor.layers.quantization.fp8 import Fp8Config
1373-
load_config = self.create_load_config()
1374-
quantization_config = VllmConfig.get_quantization_config(
1375-
model_config, load_config)
1376-
if isinstance(quantization_config, Fp8Config):
1377-
_raise_or_fallback(feature_name="fp8 for ROCm",
1378-
recommend_to_remove=False)
1379-
return False
1380-
from vllm.model_executor.layers.quantization.quark.quark import (
1381-
QuarkConfig)
1382-
1383-
if isinstance(quantization_config, QuarkConfig
1384-
) and quantization_config.has_fp8_layer_weights():
1385-
_raise_or_fallback(feature_name="Quark fp8 for ROCm",
1386-
recommend_to_remove=False)
1387-
13881371
# No Fp8 KV cache so far.
13891372
if self.kv_cache_dtype != "auto":
13901373
fp8_attention = self.kv_cache_dtype.startswith("fp8")

0 commit comments

Comments
 (0)