Skip to content

Commit 220e2ed

Browse files
committed
replace memory related APIs
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
1 parent baa4f9e commit 220e2ed

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

vllm/utils/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2553,10 +2553,10 @@ def __post_init__(self):
25532553

25542554
def measure(self):
25552555
# we measure the torch peak memory usage via allocated_bytes,
2556-
# rather than `torch.cuda.memory_reserved()` .
2557-
# After `torch.cuda.reset_peak_memory_stats()`,
2558-
# `torch.cuda.memory_reserved()` will keep growing, and only shrink
2559-
# when we call `current_platform.empty_cache()` or OOM happens.
2556+
# rather than `current_platform.memory_reserved()` .
2557+
# After `current_platform.reset_peak_memory_stats()`,
2558+
# `current_platform.memory_reserved()` will keep growing, and only
2559+
# shrink when we call `current_platform.empty_cache()` or OOM happens.
25602560
from vllm.platforms import current_platform
25612561

25622562
self.torch_peak = current_platform.memory_stats().get(
@@ -2565,7 +2565,7 @@ def measure(self):
25652565
self.free_memory, self.total_memory = current_platform.mem_get_info()
25662566
self.cuda_memory = self.total_memory - self.free_memory
25672567

2568-
# torch.cuda.memory_reserved() is how many bytes
2568+
# current_platform.memory_reserved() is how many bytes
25692569
# PyTorch gets from cuda (by calling cudaMalloc, etc.)
25702570
# this is used to measure the non-torch memory usage
25712571
self.torch_memory = current_platform.memory_reserved()

0 commit comments

Comments
 (0)