From 29b003ce423494946a5d506a7252adf1ad606be3 Mon Sep 17 00:00:00 2001
From: Will Feng <yfeng.us@gmail.com>
Date: Mon, 14 Jul 2025 22:51:14 -0700
Subject: [PATCH] Add HELION_DEV_LOW_VRAM env var for low GPU memory machines

Some dev machine (e.g. gpu laptop) has low VRAM which causes some
tritonbench inputs to OOM. This PR adds HELION_DEV_LOW_VRAM env var and
uses smaller inputs if the env var is set. User can choose to opt
into this mode by setting the env var, instead of passively having
smaller inputs due to low VRAM.

stack-info: PR: https://github.com/pytorch-labs/helion/pull/325, branch: yf225/stack/31
---
 examples/jagged_mean.py |  7 ++++---
 helion/utils.py         | 35 -----------------------------------
 2 files changed, 4 insertions(+), 38 deletions(-)
 delete mode 100644 helion/utils.py

diff --git a/examples/jagged_mean.py b/examples/jagged_mean.py
index 540865b1..cbc6e99d 100644
--- a/examples/jagged_mean.py
+++ b/examples/jagged_mean.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
+import os
+
 import torch
 
 import helion
 from helion._testing import run_example
 import helion.language as hl
-from helion.utils import get_gpu_memory_info
 
-# TritonBench configuration - adjust based on available GPU memory
-if get_gpu_memory_info()[0] < 16.0:
+# TritonBench configuration - adjust based on HELION_DEV_LOW_VRAM environment variable
+if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1":
     # Low memory configuration
     TRITONBENCH_ARGS = {"B": 32, "M": 8, "seqlen": 64}
 
diff --git a/helion/utils.py b/helion/utils.py
deleted file mode 100644
index 0e6f9177..00000000
--- a/helion/utils.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-import torch
-
-
-def get_gpu_memory_info(device_id: int | None = None) -> tuple[float, float]:
-    """
-    Get total and available GPU memory in GB.
-
-    Args:
-        device_id: GPU device ID. If None, uses current device.
-
-    Returns:
-        Tuple of (total_memory_gb, available_memory_gb)
-    """
-    if not torch.cuda.is_available():
-        return (0.0, 0.0)
-
-    if device_id is None:
-        device_id = torch.cuda.current_device()
-
-    # Get total memory
-    total_memory = torch.cuda.get_device_properties(device_id).total_memory
-
-    # Get reserved memory (memory allocated by the caching allocator)
-    reserved_memory = torch.cuda.memory_reserved(device_id)
-
-    # Available memory is approximately total - reserved
-    available_memory = total_memory - reserved_memory
-
-    # Convert to GB
-    total_gb = total_memory / (1024**3)
-    available_gb = available_memory / (1024**3)
-
-    return (total_gb, available_gb)