We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b616f6a commit 0ec3779Copy full SHA for 0ec3779
vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -201,12 +201,13 @@ def apply_w8a8_block_fp8_linear_fake(
201
return torch.empty(output_shape, dtype=input.dtype, device=input.device)
202
203
204
-direct_register_custom_op(
205
- op_name="apply_w8a8_block_fp8_linear",
206
- op_func=apply_w8a8_block_fp8_linear,
207
- mutates_args=[],
208
- fake_impl=apply_w8a8_block_fp8_linear_fake,
209
-)
+if not current_platform.is_cpu():
+ direct_register_custom_op(
+ op_name="apply_w8a8_block_fp8_linear",
+ op_func=apply_w8a8_block_fp8_linear,
+ mutates_args=[],
+ fake_impl=apply_w8a8_block_fp8_linear_fake,
210
+ )
211
212
213
def input_to_float8(
0 commit comments