Support dynamic fill value to hl.full (#316)

jansel · web-flow · commit 00d13b68b4a2 · 2025-07-14T17:33:01.000-07:00
diff --git a/helion/language/creation_ops.py b/helion/language/creation_ops.py
@@ -121,17 +121,27 @@ def _full_codegen(state: CodegenState) -> ast.AST:
     assert isinstance(fake_value, torch.Tensor)
     shape_str = state.device_function.tile_strategy.shape_str(fake_value.size())
     type_str = triton_type(fake_value.dtype)
-    value_str = state.device_function.literal_expr(state.proxy_arg(1))
-    return expr_from_string(f"tl.full({shape_str}, {value_str}, {type_str})")
+
+    # Check if the value is static (literal) or dynamic (node)
+    proxy_value = state.proxy_arg(1)
+    if isinstance(proxy_value, (int, float, bool)):
+        # For static values, use literal_expr to preserve special representations like float('-inf')
+        value_str = state.device_function.literal_expr(proxy_value)
+        return expr_from_string(f"tl.full({shape_str}, {value_str}, {type_str})")
+    # For dynamic values, use ast_arg to get the proper AST representation
+    value_ast = state.ast_arg(1)
+    return expr_from_string(f"tl.full({shape_str}, value, {type_str})", value=value_ast)
 
 
 @_decorators.get_masked_value(full)
 def _(
     node: torch.fx.Node,
-) -> float | bool:
+) -> float | bool | None:
     value = node.args[1]
-    assert isinstance(value, (int, float, bool))
-    return value
+    if isinstance(value, (int, float, bool)):
+        return value
+    # Return None for dynamic values (like tensor elements)
+    return None
 
 
 def arange(
diff --git a/test/test_loops.expected b/test/test_loops.expected
@@ -368,6 +368,39 @@ def fn(x: torch.Tensor, begin: torch.Tensor, end: torch.Tensor, *, _launcher=_de
     _launcher(_fn_kernel, (triton.cdiv(x.size(0), _BLOCK_SIZE_0),), x, begin, end, out, x.size(0), out.stride(0), x.stride(0), x.stride(1), _BLOCK_SIZE_0, _BLOCK_SIZE_1, num_warps=4, num_stages=3)
     return out
 
+--- assertExpectedJournal(TestLoops.test_full_with_dynamic_fill_value)
+from __future__ import annotations
+
+import torch
+import triton
+import triton.language as tl
+from helion.runtime import default_launcher as _default_launcher
+
+@triton.jit
+def _kernel_with_dynamic_fill_kernel(fill_value, x, out, out_stride_0, out_stride_1, x_stride_0, x_stride_1, B, C, _BLOCK_SIZE_0: tl.constexpr, _BLOCK_SIZE_1: tl.constexpr):
+    num_blocks_0 = tl.cdiv(B, _BLOCK_SIZE_0)
+    pid_0 = tl.program_id(0) % num_blocks_0
+    pid_1 = tl.program_id(0) // num_blocks_0
+    offset_0 = pid_0 * _BLOCK_SIZE_0
+    indices_0 = (offset_0 + tl.arange(0, _BLOCK_SIZE_0)).to(tl.int32)
+    mask_0 = indices_0 < B
+    offset_1 = pid_1 * _BLOCK_SIZE_1
+    indices_1 = (offset_1 + tl.arange(0, _BLOCK_SIZE_1)).to(tl.int32)
+    mask_1 = indices_1 < C
+    load = tl.load(fill_value + tl.zeros([], tl.int32), None)
+    filled = tl.full([_BLOCK_SIZE_0, _BLOCK_SIZE_1], load, tl.float32)
+    load_1 = tl.load(x + (indices_0[:, None] * x_stride_0 + indices_1[None, :] * x_stride_1), mask_0[:, None] & mask_1[None, :], other=0)
+    v_0 = load_1 + filled
+    tl.store(out + (indices_0[:, None] * out_stride_0 + indices_1[None, :] * out_stride_1), v_0, mask_0[:, None] & mask_1[None, :])
+
+def kernel_with_dynamic_fill(x: torch.Tensor, fill_value: torch.Tensor, *, _launcher=_default_launcher):
+    B, C = x.shape
+    out = torch.empty_like(x)
+    _BLOCK_SIZE_0 = 4
+    _BLOCK_SIZE_1 = 8
+    _launcher(_kernel_with_dynamic_fill_kernel, (triton.cdiv(B, _BLOCK_SIZE_0) * triton.cdiv(C, _BLOCK_SIZE_1),), fill_value, x, out, out.stride(0), out.stride(1), x.stride(0), x.stride(1), B, C, _BLOCK_SIZE_0, _BLOCK_SIZE_1, num_warps=4, num_stages=3)
+    return out
+
 --- assertExpectedJournal(TestLoops.test_l2_grouping_3d)
 from __future__ import annotations
 
diff --git a/test/test_loops.py b/test/test_loops.py
@@ -962,6 +962,33 @@ def add_3d_kernel_reordered(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         )  # Original dim 1 = second fastest varying
         self.assertIn("offset_0 = pid_2", code)  # Original dim 0 = slowest varying
 
+    def test_full_with_dynamic_fill_value(self):
+        """Test hl.full with dynamic fill value from scalar tensor."""
+
+        @helion.kernel(use_default_config=True)
+        def kernel_with_dynamic_fill(
+            x: torch.Tensor, fill_value: torch.Tensor
+        ) -> torch.Tensor:
+            B, C = x.shape
+            out = torch.empty_like(x)
+
+            for b_tile, c_tile in hl.tile([B, C]):
+                # Use scalar tensor as fill value
+                filled = hl.full((b_tile, c_tile), fill_value[0], x.dtype)
+                out[b_tile, c_tile] = x[b_tile, c_tile] + filled
+
+            return out
+
+        x = torch.randn(4, 8, device=DEVICE, dtype=torch.float32)
+        fill_value = torch.tensor([3.5], device=DEVICE, dtype=torch.float32)
+
+        code, result = code_and_output(kernel_with_dynamic_fill, (x, fill_value))
+        self.assertExpectedJournal(code)
+
+        # Verify correctness
+        expected = x + fill_value[0]
+        torch.testing.assert_close(result, expected)
+
 
 if __name__ == "__main__":
     unittest.main()