Add HELION_DEV_LOW_VRAM env var for low GPU memory machines

yf225 · yf225 · commit 2859d4135a25 · 2025-07-14T22:59:10.000-07:00
Some dev machine (e.g. gpu laptop) has low VRAM which causes some
tritonbench inputs to OOM. This PR adds HELION_DEV_LOW_VRAM env var and
uses smaller inputs if the env var is set. User can choose to opt
into this mode by setting the env var, instead of passively having
smaller inputs due to low VRAM.
diff --git a/examples/jagged_mean.py b/examples/jagged_mean.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
+import os
+
 import torch
 
 import helion
 from helion._testing import run_example
 import helion.language as hl
-from helion.utils import get_gpu_memory_info
 
-# TritonBench configuration - adjust based on available GPU memory
-if get_gpu_memory_info()[0] < 16.0:
+# TritonBench configuration - adjust based on HELION_DEV_LOW_VRAM environment variable
+if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1":
     # Low memory configuration
     TRITONBENCH_ARGS = {"B": 32, "M": 8, "seqlen": 64}
 
diff --git a/examples/rms_norm.py b/examples/rms_norm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import os
+
 import torch
 
 import helion
@@ -8,7 +10,8 @@
 
 # TritonBench configuration
 # TODO(yf225): reduction dim size = 8192 currently throws error. After it's fixed we can remove "num_inputs" extra arg.
-TRITONBENCH_ARGS = {"num_inputs": 3}
+if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1":
+    TRITONBENCH_ARGS = {"num_inputs": 3}
 
 
 @helion.kernel(static_shapes=True)
diff --git a/helion/utils.py b/helion/utils.py