Skip to content

Commit 4950fad

Browse files
committed
Upd
Signed-off-by: ilmarkov <markovilya197@gmail.com>
1 parent b3c42d5 commit 4950fad

File tree

2 files changed

+8
-10
lines changed

2 files changed

+8
-10
lines changed

vllm/distributed/device_communicators/all_reduce_utils.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,27 @@
2828

2929
# Max size for each world size in case symmetric memory is available
3030
# For different SM architectures
31-
32-
# TODO(ilia): update max sizes for 6, 8 for sm90
3331
CUSTOM_ALL_REDUCE_MAX_SIZES = {
3432
DeviceCapability(9, 0): {
3533
2: 64 * MiB, # 64 MB
36-
4: MiB, # 1 MB
37-
6: MiB, # 1 MB
38-
8: MiB // 2, # 512 KB
34+
4: 32 * MiB, # 32 MB
35+
6: MiB // 2, # 512 KB
36+
8: MiB // 4, # 256 KB
3937
},
4038
DeviceCapability(10, 0): {
4139
2: 2 * MiB, # 2 MB
4240
4: 2 * MiB, # 2 MB
43-
6: 8 * MiB, # 8 MB
44-
8: 8 * MiB, # 8 MB
41+
6: 2 * MiB, # 2 MB
42+
8: MiB, # 1 MB
4543
}
4644
}
4745

4846
SYMM_MEM_ALL_REDUCE_MAX_SIZES = {
4947
DeviceCapability(9, 0): {
5048
2: 64 * MiB, # 64 MB
5149
4: 32 * MiB, # 32 MB
52-
6: 128 * MiB, # 128 MB
53-
8: 128 * MiB, # 128 MB
50+
6: 64 * MiB, # 64 MB
51+
8: 64 * MiB, # 64 MB
5452
},
5553
DeviceCapability(10, 0): {
5654
2: 8 * MiB, # 8 MB

vllm/distributed/device_communicators/symm_mem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def should_use_symm_mem(self, inp: torch.Tensor):
8282
inp_size = inp.numel() * inp.element_size()
8383
if inp_size % 4 != 0:
8484
return False
85-
return inp_size <= self.max_size
85+
return inp_size < self.max_size
8686

8787
def all_reduce(
8888
self,

0 commit comments

Comments
 (0)