Add error for using a host tensor directly (#306)

jansel · web-flow · commit 6ec6f1d3260a · 2025-07-13T14:52:12.000-07:00
diff --git a/docs/api/exceptions.md b/docs/api/exceptions.md
@@ -135,6 +135,9 @@ These exceptions occur when Helion language functions are used incorrectly with
 
    Raised for invalid types in tensor subscripts.
 
+.. autoclass:: HostTensorDirectUsage
+
+    Raised when host tensors are used directly in device code without proper indexing.
 ```
 
 ## Assignment and Variable Errors
diff --git a/helion/_compiler/device_ir.py b/helion/_compiler/device_ir.py
@@ -920,6 +920,7 @@ def lower_to_device_ir(func: HostFunction) -> DeviceIR:
         for graph in device_ir.graphs:
             prepare_graph_lowerings(graph.graph)
         for graph in device_ir.graphs:
+            validate_host_tensor_usage(graph.graph)
             remove_unnecessary_tile_index(graph.graph)
             remove_unnecessary_masking(graph.graph)
         device_ir.build_rolled_reductions()
@@ -949,6 +950,32 @@ def codegen(self, state: CodegenState) -> list[object]:
         return codegen_helper_function_graph_info(self, state)
 
 
+def validate_host_tensor_usage(graph: torch.fx.Graph) -> None:
+    """
+    Validate that scalar _host_tensor ops only flow into allowed operations.
+    This replaces the AST visitor context detection with cleaner FX graph validation.
+    Only checks 0-dimensional tensors (scalars), not regular tensors.
+    Uses decorator metadata to determine which operations allow host tensors.
+    """
+    from ..language._decorators import is_api_func
+    from ..language._tracing_ops import _host_tensor
+
+    for node in graph.find_nodes(op="call_function", target=_host_tensor):
+        scalar_tensor_name = node.args[0]
+        assert isinstance(scalar_tensor_name, str), scalar_tensor_name
+
+        # Check all users of this scalar _host_tensor node
+        for user in node.users:
+            if user.op == "call_function":
+                # Check if this operation allows host tensors via decorator metadata
+                if not (
+                    is_api_func(user.target)
+                    and getattr(user.target, "_allow_host_tensor", False)
+                ):
+                    op_name = getattr(user.target, "__name__", str(user.target))
+                    raise exc.HostTensorDirectUsage(scalar_tensor_name, op_name)
+
+
 def remove_unnecessary_tile_index(graph: torch.fx.Graph) -> None:
     """
     Remove unnecessary tile_index nodes from the graph.
diff --git a/helion/exc.py b/helion/exc.py
@@ -115,6 +115,13 @@ class NotAllowedOnDevice(BaseError):
     message = "The statement {} is not allowed inside the `hl.tile` or `hl.grid` loop."
 
 
+class HostTensorDirectUsage(BaseError):
+    message = (
+        "Direct use of host tensor '{0}' in op '{1}' not allowed inside the `hl.tile` or `hl.grid` loop. "
+        "First load it using {0}[...] or hl.load({0}, ...)."
+    )
+
+
 class ShapeSpecializingCall(BaseError):
     message = "Call would force shape specialization, try `hl.specialize(x)` or `hl.constexpr`."
 
diff --git a/helion/language/_decorators.py b/helion/language/_decorators.py
@@ -77,6 +77,7 @@ class APIFunc(Protocol):
     _prepare_args: Callable[[tuple[object, ...]], tuple[object, ...]]
     _get_masked_value: Callable[[torch.fx.Node], float | bool | None] | None
     _to_device_ir: Callable[..., object] | None
+    _allow_host_tensor: bool
     _signature: inspect.Signature
 
     def __call__(self, *args: object, **kwargs: object) -> object: ...
@@ -126,6 +127,7 @@ def api(
     is_device_only: bool = True,
     tiles_as_sizes: bool = False,
     cache_type: bool = False,
+    allow_host_tensor: bool = False,
     signature: inspect.Signature | None = None,
 ) -> _Decorator:
     def _impl(fn: _C) -> _C:
@@ -181,6 +183,7 @@ def wrapper(*args: object, **kwargs: object) -> object:
         api._fake_fn = None
         api._get_masked_value = None
         api._to_device_ir = None
+        api._allow_host_tensor = allow_host_tensor
         api._signature = signature or inspect.signature(
             cast("Callable[..., object]", fn)
         )
diff --git a/helion/language/_tracing_ops.py b/helion/language/_tracing_ops.py
@@ -94,7 +94,7 @@ def _(state: CodegenState) -> None:
 
 # Note we can't DCE phi nodes because there may be a loop carry dependency not captured in the outer graph
 @has_side_effect
-@_decorators.api()
+@_decorators.api(allow_host_tensor=True)
 def _phi(lhs: object, rhs: object) -> object:
     """Combine values from different branches of a control flow."""
     raise AssertionError("this should never be called")
@@ -291,7 +291,7 @@ def _(node: torch.fx.Node) -> float | bool:
     return value
 
 
-@_decorators.api()
+@_decorators.api(allow_host_tensor=True)
 def _new_var(value: _T, /) -> _T:
     """
     Create a shallow copy of a value that is assigned a fresh variable in codegen.
diff --git a/helion/language/memory_ops.py b/helion/language/memory_ops.py
@@ -19,7 +19,7 @@
 
 
 @has_side_effect
-@_decorators.api(tiles_as_sizes=True)
+@_decorators.api(tiles_as_sizes=True, allow_host_tensor=True)
 def store(
     tensor: torch.Tensor,
     index: list[object],
@@ -84,7 +84,7 @@ def _(state: CodegenState) -> ast.AST:
     )
 
 
-@_decorators.api(tiles_as_sizes=True)
+@_decorators.api(tiles_as_sizes=True, allow_host_tensor=True)
 def load(
     tensor: torch.Tensor, index: list[object], extra_mask: torch.Tensor | None = None
 ) -> torch.Tensor:
@@ -130,7 +130,7 @@ def _(node: torch.fx.Node) -> int:
 
 
 @has_side_effect
-@_decorators.api()
+@_decorators.api(allow_host_tensor=True)
 def atomic_add(
     target: torch.Tensor,
     index: list[object],
diff --git a/helion/language/signal_wait.py b/helion/language/signal_wait.py
@@ -18,7 +18,7 @@
 
 
 @has_side_effect
-@_decorators.api(tiles_as_sizes=True)
+@_decorators.api(tiles_as_sizes=True, allow_host_tensor=True)
 def wait(
     signal_pad: torch.Tensor,
     index: list[object],
@@ -158,7 +158,7 @@ def _(state: CodegenState) -> ast.AST:
 
 
 @has_side_effect
-@_decorators.api(tiles_as_sizes=True)
+@_decorators.api(tiles_as_sizes=True, allow_host_tensor=True)
 def signal(
     signal_pad: torch.Tensor,
     index: list[object],
diff --git a/test/test_errors.py b/test/test_errors.py
@@ -198,6 +198,22 @@ def closure_fn():
         with self.assertRaises(helion.exc.StatementNotSupported):
             code_and_output(bad_fn, (torch.randn(8, device=DEVICE),))
 
+    def test_direct_scalar_tensor_in_device_context(self):
+        """Test that direct scalar tensor usage gives clear error in device code."""
+
+        @helion.kernel()
+        def bad_fn(x: torch.Tensor, scalar_tensor: torch.Tensor) -> torch.Tensor:
+            result = torch.empty_like(x)
+            for tile in hl.tile(x.shape):
+                result[tile] = x[tile] + scalar_tensor  # Error: direct scalar usage
+            return result
+
+        with self.assertRaises(helion.exc.HostTensorDirectUsage):
+            code_and_output(
+                bad_fn,
+                (torch.randn(4, 4, device=DEVICE), torch.tensor(3.0, device=DEVICE)),
+            )
+
 
 if __name__ == "__main__":
     unittest.main()