Support list, tuple and dict inputs

oulgen · oulgen · commit 7641e0f5a35b · 2025-05-14T12:03:35.000-07:00
ghstack-source-id: 2d332b4 Pull Request resolved: #34
diff --git a/helion/_compiler/compile_environment.py b/helion/_compiler/compile_environment.py
@@ -162,6 +162,12 @@ def to_fake(self, obj: object, origin: Origin) -> object:
             return lift_closures(obj, origin)
         if isinstance(obj, ConstExpr):
             return obj.value
+        if isinstance(obj, list):
+            return [self.to_fake(e, origin) for e in obj]
+        if isinstance(obj, tuple):
+            return tuple(self.to_fake(e, origin) for e in obj)
+        if isinstance(obj, dict):
+            return {k: self.to_fake(e, origin) for k, e in obj.items()}
         # TODO(jansel): support other types of args
         raise TypeError(f"unsupported argument type {type(obj)} ({origin})")
 
diff --git a/test/test_misc.py b/test/test_misc.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import unittest
+
 from expecttest import TestCase
 import pytest
 import torch
@@ -53,3 +55,66 @@ def add3(x, y):
             code_and_output(add2, (x, x))
 
         code_and_output(add3, (x, x))
+
+    def test_inputs(self):
+        @helion.kernel
+        def kernel(a_list, b_dict, b_tuple):
+            a0, a1 = a_list
+            b0 = b_dict["b0"]
+            (b1,) = b_tuple
+            c0, c1 = torch.empty_like(a0), torch.empty_like(a1)
+            for tile in hl.tile(a0.size()):
+                c0[tile] = a0[tile] + b0[tile]
+                c1[tile] = a1[tile] + b1[tile]
+            return [c0, c1]
+
+        x = torch.randn(4, device=DEVICE)
+        code, result = code_and_output(kernel, ([x, x], {"b0": x}, (x,)))
+        torch.testing.assert_close(result[0], 2 * x)
+        torch.testing.assert_close(result[1], 2 * x)
+        self.assertExpectedInline(
+            code,
+            """\
+from __future__ import annotations
+
+import torch
+import triton
+import triton.language as tl
+
+@triton.jit
+def _kernel_kernel(a0, c0, c1, a0_size_0, a0_stride_0, c0_stride_0, c1_stride_0, _BLOCK_SIZE_0: tl.constexpr):
+    pid_0 = tl.program_id(0)
+    offset_0 = pid_0 * _BLOCK_SIZE_0
+    indices_0 = offset_0 + tl.arange(0, _BLOCK_SIZE_0).to(tl.int32)
+    mask_0 = indices_0 < a0_size_0
+    load = tl.load(a0 + indices_0 * a0_stride_0, mask_0, other=0)
+    load_1 = tl.load(a0 + indices_0 * a0_stride_0, mask_0, other=0)
+    v_0 = load + load_1
+    tl.store(c0 + indices_0 * c0_stride_0, v_0, mask_0)
+    load_2 = tl.load(a0 + indices_0 * a0_stride_0, mask_0, other=0)
+    load_3 = tl.load(a0 + indices_0 * a0_stride_0, mask_0, other=0)
+    v_1 = load_2 + load_3
+    tl.store(c1 + indices_0 * c1_stride_0, v_1, mask_0)
+
+def kernel(a_list, b_dict, b_tuple):
+    a0, a1 = a_list
+    b0 = b_dict['b0']
+    b1, = b_tuple
+    c0, c1 = (torch.empty_like(a0), torch.empty_like(a1))
+    _BLOCK_SIZE_0 = 4
+    _kernel_kernel[triton.cdiv(a0.size(0), _BLOCK_SIZE_0),](a0, c0, c1, a0.size(0), a0.stride(0), c0.stride(0), c1.stride(0), _BLOCK_SIZE_0, num_warps=4, num_stages=3)
+    return [c0, c1]
+
+def _kernel_make_precompiler(a_list, b_dict, b_tuple):
+    a0, a1 = a_list
+    b0 = b_dict['b0']
+    b1, = b_tuple
+    c0, c1 = (torch.empty_like(a0), torch.empty_like(a1))
+    _BLOCK_SIZE_0 = 4
+    from helion.runtime.precompile_shim import make_precompiler
+    return make_precompiler(_kernel_kernel)(a0, c0, c1, a0.size(0), a0.stride(0), c0.stride(0), c1.stride(0), _BLOCK_SIZE_0, num_warps=4, num_stages=3)""",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()