diff --git a/examples/jagged_mean.py b/examples/jagged_mean.py index 540865b1..cbc6e99d 100644 --- a/examples/jagged_mean.py +++ b/examples/jagged_mean.py @@ -1,14 +1,15 @@ from __future__ import annotations +import os + import torch import helion from helion._testing import run_example import helion.language as hl -from helion.utils import get_gpu_memory_info -# TritonBench configuration - adjust based on available GPU memory -if get_gpu_memory_info()[0] < 16.0: +# TritonBench configuration - adjust based on HELION_DEV_LOW_VRAM environment variable +if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1": # Low memory configuration TRITONBENCH_ARGS = {"B": 32, "M": 8, "seqlen": 64} diff --git a/helion/utils.py b/helion/utils.py deleted file mode 100644 index 0e6f9177..00000000 --- a/helion/utils.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import torch - - -def get_gpu_memory_info(device_id: int | None = None) -> tuple[float, float]: - """ - Get total and available GPU memory in GB. - - Args: - device_id: GPU device ID. If None, uses current device. - - Returns: - Tuple of (total_memory_gb, available_memory_gb) - """ - if not torch.cuda.is_available(): - return (0.0, 0.0) - - if device_id is None: - device_id = torch.cuda.current_device() - - # Get total memory - total_memory = torch.cuda.get_device_properties(device_id).total_memory - - # Get reserved memory (memory allocated by the caching allocator) - reserved_memory = torch.cuda.memory_reserved(device_id) - - # Available memory is approximately total - reserved - available_memory = total_memory - reserved_memory - - # Convert to GB - total_gb = total_memory / (1024**3) - available_gb = available_memory / (1024**3) - - return (total_gb, available_gb)