Upd

ilmarkov · ilmarkov · commit 4950fad08543 · 2025-07-16T11:12:30.000Z
Signed-off-by: ilmarkov &lt;markovilya197@gmail.com&gt;
diff --git a/vllm/distributed/device_communicators/all_reduce_utils.py b/vllm/distributed/device_communicators/all_reduce_utils.py
@@ -28,29 +28,27 @@
 
 # Max size for each world size in case symmetric memory is available
 # For different SM architectures
-
-# TODO(ilia): update max sizes for 6, 8 for sm90
 CUSTOM_ALL_REDUCE_MAX_SIZES = {
     DeviceCapability(9, 0): {
         2: 64 * MiB,  # 64 MB
-        4: MiB,  # 1 MB
-        6: MiB,  # 1 MB
-        8: MiB // 2,  # 512 KB
+        4: 32 * MiB,  # 32 MB
+        6: MiB // 2,  # 512 KB
+        8: MiB // 4,  # 256 KB
     },
     DeviceCapability(10, 0): {
         2: 2 * MiB,  # 2 MB
         4: 2 * MiB,  # 2 MB
-        6: 8 * MiB,  # 8 MB
-        8: 8 * MiB,  # 8 MB
+        6: 2 * MiB,  # 2 MB
+        8: MiB,  # 1 MB
     }
 }
 
 SYMM_MEM_ALL_REDUCE_MAX_SIZES = {
     DeviceCapability(9, 0): {
         2: 64 * MiB,  # 64 MB
         4: 32 * MiB,  # 32 MB
-        6: 128 * MiB,  # 128 MB
-        8: 128 * MiB,  # 128 MB
+        6: 64 * MiB,  # 64 MB
+        8: 64 * MiB,  # 64 MB
     },
     DeviceCapability(10, 0): {
         2: 8 * MiB,  # 8 MB
diff --git a/vllm/distributed/device_communicators/symm_mem.py b/vllm/distributed/device_communicators/symm_mem.py
@@ -82,7 +82,7 @@ def should_use_symm_mem(self, inp: torch.Tensor):
         inp_size = inp.numel() * inp.element_size()
         if inp_size % 4 != 0:
             return False
-        return inp_size <= self.max_size
+        return inp_size < self.max_size
 
     def all_reduce(
             self,