Support int_scaled_mm on CPU (#121)

Xia-Weiwen · web-flow · commit fc5d2c89915b · 2024-04-05T11:43:16.000-07:00
diff --git a/test/kernel/test_autotuner.py b/test/kernel/test_autotuner.py
@@ -52,13 +52,15 @@ def test_int_mm(self, device, dtype):
     @parameterized.expand(
         [
             ("cuda", torch.bfloat16),
-            # TODO: ("cpu", torch.bfloat16),
+            ("cpu", torch.bfloat16),
             ("cuda", torch.float16),
-            # TODO: ("cpu", torch.float16),
+            ("cpu", torch.float16),
         ]
     )
-    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
     def test_int_scaled_mm(self, device, dtype):
+        if device == "cuda" and not torch.cuda.is_available():
+            self.skipTest(f"{device} not available")
+
         from torchao.kernel import intmm
 
         dtype = torch.bfloat16
diff --git a/torchao/kernel/intmm_triton.py b/torchao/kernel/intmm_triton.py
@@ -356,3 +356,9 @@ def int_scaled_matmul_cuda(a, b, scales1):
         int_scaled_matmul_kernel, [a, b, scales1, c], int8_mm_kernel_configs
     )
     return int_scaled_matmul_kernel(a, b, scales1, c, best_config)
+
+
+@torch.library.impl(lib, "int_scaled_matmul", "CPU")
+def int_scaled_matmul_cpu(a, b, scales1):
+    c = torch._int_mm(a, b)
+    return c.to(scales1.dtype) * scales1

Original file line number	Diff line number	Diff line change
`@@ -52,13 +52,15 @@ def test_int_mm(self, device, dtype):`
`52`	`52`	`@parameterized.expand(`
`53`	`53`	`[`
`54`	`54`	`("cuda", torch.bfloat16),`
`55`		`- # TODO: ("cpu", torch.bfloat16),`
	`55`	`+ ("cpu", torch.bfloat16),`
`56`	`56`	`("cuda", torch.float16),`
`57`		`- # TODO: ("cpu", torch.float16),`
	`57`	`+ ("cpu", torch.float16),`
`58`	`58`	`]`
`59`	`59`	`)`
`60`		`- @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")`
`61`	`60`	`def test_int_scaled_mm(self, device, dtype):`
	`61`	`+ if device == "cuda" and not torch.cuda.is_available():`
	`62`	`+ self.skipTest(f"{device} not available")`
	`63`	`+`
`62`	`64`	`from torchao.kernel import intmm`
`63`	`65`
`64`	`66`	`dtype = torch.bfloat16`