Improve DCE by marking math functions as pure (#312)

oulgen · web-flow · commit 1e483a9feaa7 · 2025-07-13T22:59:17.000-07:00
diff --git a/helion/_compiler/ast_read_writes.py b/helion/_compiler/ast_read_writes.py
@@ -197,6 +197,22 @@ def visit_UnaryOp(self, node: ast.UnaryOp) -> None:
     def visit_Starred(self, node: ast.Starred) -> None:
         self.visit(node.value)
 
+    def visit_Call(self, node: ast.Call) -> None:
+        # Math methods are all pure, so allow them
+        if not (
+            isinstance(node.func, ast.Attribute)
+            and isinstance(node.func.value, ast.Name)
+            and node.func.value.id == "math"
+        ):
+            raise _NotPureException
+
+        # Recurse into children except for func
+        for arg in node.args:
+            self.visit(arg)
+
+        for keyword in node.keywords:
+            self.visit(keyword.value)
+
 
 def definitely_does_not_have_side_effects(expr: ast.expr) -> bool:
     try:
diff --git a/test/test_examples.expected b/test/test_examples.expected
@@ -37,7 +37,6 @@ def _add_make_precompiler(x: torch.Tensor, y: torch.Tensor):
 --- assertExpectedJournal(TestExamples.test_attention_block_pointer)
 from __future__ import annotations
 
-import math
 import torch
 import triton
 import triton.language as tl
@@ -103,7 +102,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 128
     _BLOCK_SIZE_3 = 64
     _attention_kernel[64 * triton.cdiv(1024, _BLOCK_SIZE_1),](q_view, k_view, v_view, out, _BLOCK_SIZE_1, _BLOCK_SIZE_3, num_warps=4, num_stages=3)
@@ -119,7 +117,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 128
     _BLOCK_SIZE_3 = 64
     from helion.runtime.precompile_shim import make_precompiler
@@ -128,7 +125,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
 --- assertExpectedJournal(TestExamples.test_attention_dynamic)
 from __future__ import annotations
 
-import math
 import torch
 import triton
 import triton.language as tl
@@ -198,7 +194,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 32
     _RDIM_SIZE_2 = 64
     _BLOCK_SIZE_3 = 32
@@ -215,7 +210,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 32
     _RDIM_SIZE_2 = 64
     _BLOCK_SIZE_3 = 32
@@ -225,7 +219,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
 --- assertExpectedJournal(TestExamples.test_attention_pointer)
 from __future__ import annotations
 
-import math
 import torch
 import triton
 import triton.language as tl
@@ -291,7 +284,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 64
     _RDIM_SIZE_2 = 64
     _BLOCK_SIZE_3 = 64
@@ -308,7 +300,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 64
     _RDIM_SIZE_2 = 64
     _BLOCK_SIZE_3 = 64
diff --git a/test/test_tensor_descriptor.expected b/test/test_tensor_descriptor.expected
@@ -4,7 +4,6 @@ Update expected outputs by running tests with the EXPECTTEST_ACCEPT=1 environmen
 --- assertExpectedJournal(TestTensorDescriptor.test_attention_td_dynamic)
 from __future__ import annotations
 
-import math
 import torch
 import helion
 import triton
@@ -79,7 +78,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 16
     _BLOCK_SIZE_3 = 16
     _attention_kernel[q_in.size(1) * triton.cdiv(m_dim, _BLOCK_SIZE_1),](q_view, k_view, v_view, out, k_view.size(0), k_view.size(2), out.size(0), out.size(1), q_in.size(1), q_view.size(0), q_view.size(1), v_view.size(0), v_view.size(1), k_view.stride(0), k_view.stride(1), k_view.stride(2), out.stride(0), out.stride(1), out.stride(2), q_view.stride(0), q_view.stride(1), q_view.stride(2), v_view.stride(0), v_view.stride(1), v_view.stride(2), m_dim, n_dim, _BLOCK_SIZE_1, _BLOCK_SIZE_3, num_warps=4, num_stages=3)
@@ -95,7 +93,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 16
     _BLOCK_SIZE_3 = 16
     from helion.runtime.precompile_shim import make_precompiler
@@ -104,7 +101,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
 --- assertExpectedJournal(TestTensorDescriptor.test_attention_tensor_descriptor)
 from __future__ import annotations
 
-import math
 import torch
 import helion
 import triton
@@ -177,7 +173,6 @@ def attention(q_in: torch.Tensor, k_in: torch.Tensor, v_in: torch.Tensor):
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 128
     _BLOCK_SIZE_3 = 64
     _attention_kernel[64 * triton.cdiv(1024, _BLOCK_SIZE_1),](q_view, k_view, v_view, out, _BLOCK_SIZE_1, _BLOCK_SIZE_3, num_warps=4, num_stages=3)
@@ -193,7 +188,6 @@ def _attention_make_precompiler(q_in: torch.Tensor, k_in: torch.Tensor, v_in: to
     v_view = v_in.reshape([-1, n_dim, head_dim])
     k_view = k_in.reshape([-1, n_dim, head_dim]).transpose(1, 2)
     out = torch.empty_like(q_view)
-    sm_scale = 1.0 / math.sqrt(head_dim)
     _BLOCK_SIZE_1 = 128
     _BLOCK_SIZE_3 = 64
     from helion.runtime.precompile_shim import make_precompiler