Open
Description
Sorry if this is unrealistic code. I stumbled upon this, and thought it was weird that the code wasn't optimized properly.
pub fn foo() -> Vec<[[[i32; 16]; 16]; 16]> {
vec![[[[0i32; 16]; 16]; 16]; 0]
}
Assembly code
example::foo::hf7aa743c8384f4c6:
push rbx
sub rsp, 4096
mov qword ptr [rsp], 0
sub rsp, 4096
mov qword ptr [rsp], 0
sub rsp, 4096
mov qword ptr [rsp], 0
sub rsp, 4096
mov rbx, rdi
mov rdi, rsp
mov edx, 16384
xor esi, esi
call qword ptr [rip + memset@GOTPCREL]
mov eax, 960
pxor xmm0, xmm0
.LBB0_1:
movdqu xmm1, xmmword ptr [rsp + rax - 960]
movdqu xmm2, xmmword ptr [rsp + rax - 944]
movdqu xmm3, xmmword ptr [rsp + rax - 928]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 912]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 896]
movdqu xmm2, xmmword ptr [rsp + rax - 880]
movdqu xmm3, xmmword ptr [rsp + rax - 864]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 848]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 832]
movdqu xmm2, xmmword ptr [rsp + rax - 816]
movdqu xmm3, xmmword ptr [rsp + rax - 800]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 784]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 768]
movdqu xmm2, xmmword ptr [rsp + rax - 752]
movdqu xmm3, xmmword ptr [rsp + rax - 736]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 720]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 704]
movdqu xmm2, xmmword ptr [rsp + rax - 688]
movdqu xmm3, xmmword ptr [rsp + rax - 672]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 656]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 640]
movdqu xmm2, xmmword ptr [rsp + rax - 624]
movdqu xmm3, xmmword ptr [rsp + rax - 608]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 592]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 576]
movdqu xmm2, xmmword ptr [rsp + rax - 560]
movdqu xmm3, xmmword ptr [rsp + rax - 544]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 528]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 512]
movdqu xmm2, xmmword ptr [rsp + rax - 496]
movdqu xmm3, xmmword ptr [rsp + rax - 480]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 464]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 448]
movdqu xmm2, xmmword ptr [rsp + rax - 432]
movdqu xmm3, xmmword ptr [rsp + rax - 416]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 400]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 384]
movdqu xmm2, xmmword ptr [rsp + rax - 368]
movdqu xmm3, xmmword ptr [rsp + rax - 352]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 336]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 320]
movdqu xmm2, xmmword ptr [rsp + rax - 304]
movdqu xmm3, xmmword ptr [rsp + rax - 288]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 272]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 256]
movdqu xmm2, xmmword ptr [rsp + rax - 240]
movdqu xmm3, xmmword ptr [rsp + rax - 224]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 208]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 192]
movdqu xmm2, xmmword ptr [rsp + rax - 176]
movdqu xmm3, xmmword ptr [rsp + rax - 160]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 144]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 128]
movdqu xmm2, xmmword ptr [rsp + rax - 112]
movdqu xmm3, xmmword ptr [rsp + rax - 96]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 80]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax - 64]
movdqu xmm2, xmmword ptr [rsp + rax - 48]
movdqu xmm3, xmmword ptr [rsp + rax - 32]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax - 16]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
movdqu xmm1, xmmword ptr [rsp + rax]
movdqu xmm2, xmmword ptr [rsp + rax + 16]
movdqu xmm3, xmmword ptr [rsp + rax + 32]
por xmm3, xmm1
movdqu xmm1, xmmword ptr [rsp + rax + 48]
por xmm1, xmm2
por xmm1, xmm3
pcmpeqd xmm1, xmm0
movmskps ecx, xmm1
xor ecx, 15
jne .LBB0_18
lea rcx, [rax + 1024]
cmp rax, 16320
mov rax, rcx
jne .LBB0_1
.LBB0_18:
mov qword ptr [rbx], 0
mov qword ptr [rbx + 8], 4
mov qword ptr [rbx + 16], 0
mov rax, rbx
add rsp, 16384
pop rbx
ret
I expected the code to get optimized into just returning an empty vec (a few instructions). Instead, the assembly does the following:
- allocate some stack space.
- memset that stack space to zeroes
- check whether the entire array is zeroes, then discards that knowledge
- unconditionally return an empty vec
This seems rather bad.
Note that Rust's vec![]
macro checks whether the value is zero, but only for arrays of length 16 or less. This check was supposed to be used to decide whether to allocate a pre-zeroed chunk of memory or not, although the result of this check is discarded in this case.
Meta
Reproducible on godbolt with:
rustc 1.88.0 (6b00bc388 2025-06-23)
binary: rustc
commit-hash: 6b00bc3880198600130e1cf62b8f8a93494488cc
commit-date: 2025-06-23
host: x86_64-unknown-linux-gnu
release: 1.88.0
LLVM version: 20.1.5
Internal compiler ID: r1880
@rustbot labels +C-optimization