add/fix interface for cpu/cuda/rocm/xpu

jikunshang · jikunshang · commit b9824767fb29 · 2025-07-11T01:37:56.000+08:00
Signed-off-by: Kunshang Ji &lt;kunshang.ji@intel.com&gt;
diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py
@@ -275,3 +275,30 @@ def default_v1(cls, model_config) -> bool:
         arch = cls.get_cpu_architecture()
         return (cls.supports_v1(model_config) and arch
                 in (CpuArchEnum.X86, CpuArchEnum.POWERPC, CpuArchEnum.ARM))
+
+    @classmethod
+    def empty_cache(cls):
+        pass
+
+    @classmethod
+    def reset_peak_memory_stats(cls):
+        pass
+
+    @classmethod
+    def mem_get_info(cls):
+        # FIXME: impl
+        return None
+
+    @classmethod
+    def memory_stats(cls):
+        # FIXME: impl
+        return None
+
+    @classmethod
+    def memory_reserved(cls):
+        # FIXME: impl
+        return None
+
+    @classmethod
+    def synchronize(cls):
+        torch.cpu.synchronize()
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
@@ -425,7 +425,7 @@ def device_count(cls) -> int:
         return cuda_device_count_stateless()
 
     @classmethod
-    def empty_cache(cls, ):
+    def empty_cache(cls):
         torch.cuda.empty_cache()
 
     @classmethod
@@ -446,7 +446,7 @@ def memory_reserved(cls):
 
     @classmethod
     def synchronize(cls):
-        return torch.cuda.synchronize()
+        torch.cuda.synchronize()
 
 
 # NVML utils
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -549,7 +549,7 @@ def stateless_init_device_torch_dist_pg(
         raise RuntimeError(f"Unsupported torch distributed backend: {backend}")
 
     @classmethod
-    def empty_cache(cls, ):
+    def empty_cache(cls):
         raise NotImplementedError
 
     @classmethod
@@ -570,7 +570,7 @@ def memory_reserved(cls):
 
     @classmethod
     def synchronize(cls):
-        torch.accelerator.synchronize()
+        raise NotImplementedError
 
 
 class UnspecifiedPlatform(Platform):
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -463,3 +463,27 @@ def stateless_init_device_torch_dist_pg(
     @classmethod
     def device_count(cls) -> int:
         return cuda_device_count_stateless()
+
+    @classmethod
+    def empty_cache(cls):
+        torch.cuda.empty_cache()
+
+    @classmethod
+    def reset_peak_memory_stats(cls):
+        torch.cuda.reset_peak_memory_stats()
+
+    @classmethod
+    def mem_get_info(cls):
+        return torch.cuda.mem_get_info()
+
+    @classmethod
+    def memory_stats(cls):
+        return torch.cuda.memory_stats()
+
+    @classmethod
+    def memory_reserved(cls):
+        return torch.cuda.memory_reserved()
+
+    @classmethod
+    def synchronize(cls):
+        torch.cuda.synchronize()
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
@@ -196,7 +196,7 @@ def device_count(cls) -> int:
         return torch.xpu.device_count()
 
     @classmethod
-    def empty_cache(cls, ):
+    def empty_cache(cls):
         torch.xpu.empty_cache()
 
     @classmethod
@@ -230,4 +230,4 @@ def memory_reserved(cls):
 
     @classmethod
     def synchronize(cls):
-        return torch.xpu.synchronize()
+        torch.xpu.synchronize()