From 29b003ce423494946a5d506a7252adf1ad606be3 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Mon, 14 Jul 2025 22:51:14 -0700 Subject: [PATCH] Add HELION_DEV_LOW_VRAM env var for low GPU memory machines Some dev machine (e.g. gpu laptop) has low VRAM which causes some tritonbench inputs to OOM. This PR adds HELION_DEV_LOW_VRAM env var and uses smaller inputs if the env var is set. User can choose to opt into this mode by setting the env var, instead of passively having smaller inputs due to low VRAM. stack-info: PR: https://github.com/pytorch-labs/helion/pull/325, branch: yf225/stack/31 --- examples/jagged_mean.py | 7 ++++--- helion/utils.py | 35 ----------------------------------- 2 files changed, 4 insertions(+), 38 deletions(-) delete mode 100644 helion/utils.py diff --git a/examples/jagged_mean.py b/examples/jagged_mean.py index 540865b1..cbc6e99d 100644 --- a/examples/jagged_mean.py +++ b/examples/jagged_mean.py @@ -1,14 +1,15 @@ from __future__ import annotations +import os + import torch import helion from helion._testing import run_example import helion.language as hl -from helion.utils import get_gpu_memory_info -# TritonBench configuration - adjust based on available GPU memory -if get_gpu_memory_info()[0] < 16.0: +# TritonBench configuration - adjust based on HELION_DEV_LOW_VRAM environment variable +if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1": # Low memory configuration TRITONBENCH_ARGS = {"B": 32, "M": 8, "seqlen": 64} diff --git a/helion/utils.py b/helion/utils.py deleted file mode 100644 index 0e6f9177..00000000 --- a/helion/utils.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import torch - - -def get_gpu_memory_info(device_id: int | None = None) -> tuple[float, float]: - """ - Get total and available GPU memory in GB. - - Args: - device_id: GPU device ID. If None, uses current device. - - Returns: - Tuple of (total_memory_gb, available_memory_gb) - """ - if not torch.cuda.is_available(): - return (0.0, 0.0) - - if device_id is None: - device_id = torch.cuda.current_device() - - # Get total memory - total_memory = torch.cuda.get_device_properties(device_id).total_memory - - # Get reserved memory (memory allocated by the caching allocator) - reserved_memory = torch.cuda.memory_reserved(device_id) - - # Available memory is approximately total - reserved - available_memory = total_memory - reserved_memory - - # Convert to GB - total_gb = total_memory / (1024**3) - available_gb = available_memory / (1024**3) - - return (total_gb, available_gb)