move float8 blockwise kernels out of prototype

vkuzo · vkuzo · commit 841d104100b9 · 2025-10-29T04:05:23.000-07:00
Summary: These will be useful as a fallback path for a_1_128_w_128_128 (DeepSeek) scaling support for float8 inference. Bringing out of prototype folder. Test Plan: ``` pytest test/kernel/test_blockwise_triton.py -s -x python benchmarks/benchmark_blockwise_scaled_linear_triton.py ``` Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 0fb2407 ghstack-comment-id: 3460951786 Pull-Request: #3256
diff --git a/benchmarks/benchmark_blockwise_scaled_linear_triton.py b/benchmarks/benchmark_blockwise_scaled_linear_triton.py
@@ -13,7 +13,7 @@
     from triton.testing import do_bench
 
     from torchao.float8.float8_utils import compute_error
-    from torchao.prototype.blockwise_fp8_inference.blockwise_quantization import (
+    from torchao.kernel.blockwise_quantization import (
         blockwise_fp8_gemm,
         fp8_blockwise_act_quant,
         fp8_blockwise_weight_quant,
diff --git a/test/kernel/test_blockwise_triton.py b/test/kernel/test_blockwise_triton.py
@@ -11,7 +11,7 @@
 
 triton = pytest.importorskip("triton", reason="Triton required to run this test")
 
-from torchao.prototype.blockwise_fp8_inference.blockwise_quantization import (
+from torchao.kernel.blockwise_quantization import (
     blockwise_fp8_gemm,
     fp8_blockwise_act_quant,
     fp8_blockwise_weight_dequant,
diff --git a/torchao/kernel/blockwise_quantization.py b/torchao/kernel/blockwise_quantization.py
diff --git a/torchao/prototype/blockwise_fp8_inference/__init__.py b/torchao/prototype/blockwise_fp8_inference/__init__.py
@@ -1,11 +1,12 @@
-from .blockwise_linear import BlockwiseQuantLinear
-from .blockwise_quantization import (
+from torchao.kernel.blockwise_quantization import (
     blockwise_fp8_gemm,
     fp8_blockwise_act_quant,
     fp8_blockwise_weight_dequant,
     fp8_blockwise_weight_quant,
 )
 
+from .blockwise_linear import BlockwiseQuantLinear
+
 __all__ = [
     "blockwise_fp8_gemm",
     "BlockwiseQuantLinear",
diff --git a/torchao/prototype/blockwise_fp8_inference/blockwise_linear.py b/torchao/prototype/blockwise_fp8_inference/blockwise_linear.py
@@ -7,7 +7,7 @@
 import torch
 from torch import nn
 
-from torchao.prototype.blockwise_fp8_inference.blockwise_quantization import (
+from torchao.kernel.blockwise_quantization import (
     blockwise_fp8_gemm,
     fp8_blockwise_act_quant,
 )

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`import torch`
`8`	`8`	`from torch import nn`
`9`	`9`
`10`		`-from torchao.prototype.blockwise_fp8_inference.blockwise_quantization import (`
	`10`	`+from torchao.kernel.blockwise_quantization import (`
`11`	`11`	`blockwise_fp8_gemm,`
`12`	`12`	`fp8_blockwise_act_quant,`
`13`	`13`	`)`