diff --git a/examples/jagged_mean.py b/examples/jagged_mean.py
index 540865b1..cbc6e99d 100644
--- a/examples/jagged_mean.py
+++ b/examples/jagged_mean.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
+import os
+
 import torch
 
 import helion
 from helion._testing import run_example
 import helion.language as hl
-from helion.utils import get_gpu_memory_info
 
-# TritonBench configuration - adjust based on available GPU memory
-if get_gpu_memory_info()[0] < 16.0:
+# TritonBench configuration - adjust based on HELION_DEV_LOW_VRAM environment variable
+if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1":
     # Low memory configuration
     TRITONBENCH_ARGS = {"B": 32, "M": 8, "seqlen": 64}
 
diff --git a/helion/utils.py b/helion/utils.py
deleted file mode 100644
index 0e6f9177..00000000
--- a/helion/utils.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import annotations
-
-import torch
-
-
-def get_gpu_memory_info(device_id: int | None = None) -> tuple[float, float]:
-    """
-    Get total and available GPU memory in GB.
-
-    Args:
-        device_id: GPU device ID. If None, uses current device.
-
-    Returns:
-        Tuple of (total_memory_gb, available_memory_gb)
-    """
-    if not torch.cuda.is_available():
-        return (0.0, 0.0)
-
-    if device_id is None:
-        device_id = torch.cuda.current_device()
-
-    # Get total memory
-    total_memory = torch.cuda.get_device_properties(device_id).total_memory
-
-    # Get reserved memory (memory allocated by the caching allocator)
-    reserved_memory = torch.cuda.memory_reserved(device_id)
-
-    # Available memory is approximately total - reserved
-    available_memory = total_memory - reserved_memory
-
-    # Convert to GB
-    total_gb = total_memory / (1024**3)
-    available_gb = available_memory / (1024**3)
-
-    return (total_gb, available_gb)