Skip to content

Commit cabe140

Browse files
committed
Strengthen stack size estimation for LSX/LASX extension
1 parent 8423778 commit cabe140

13 files changed

+330
-325
lines changed

llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,12 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
158158
// estimateStackSize has been observed to under-estimate the final stack
159159
// size, so give ourselves wiggle-room by checking for stack size
160160
// representable an 11-bit signed field rather than 12-bits.
161-
if (!isInt<11>(MFI.estimateStackSize(MF)))
161+
// For [x]vstelm.{b/h/w/d} memory instructions with 8 imm offset, 7-bit
162+
// signed field is fine.
163+
unsigned EstimateStackSize = MFI.estimateStackSize(MF);
164+
if (!isInt<11>(EstimateStackSize) ||
165+
(MF.getSubtarget<LoongArchSubtarget>().hasExtLSX() &&
166+
!isInt<7>(EstimateStackSize)))
162167
ScavSlotsNum = std::max(ScavSlotsNum, 1u);
163168

164169
// For CFR spill.

llvm/test/CodeGen/LoongArch/calling-conv-common.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -122,23 +122,23 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
122122
define i64 @caller_large_scalars() nounwind {
123123
; CHECK-LABEL: caller_large_scalars:
124124
; CHECK: # %bb.0:
125-
; CHECK-NEXT: addi.d $sp, $sp, -80
126-
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
127-
; CHECK-NEXT: st.d $zero, $sp, 24
125+
; CHECK-NEXT: addi.d $sp, $sp, -96
126+
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
127+
; CHECK-NEXT: st.d $zero, $sp, 40
128128
; CHECK-NEXT: vrepli.b $vr0, 0
129-
; CHECK-NEXT: vst $vr0, $sp, 8
129+
; CHECK-NEXT: vst $vr0, $sp, 24
130130
; CHECK-NEXT: ori $a0, $zero, 2
131-
; CHECK-NEXT: st.d $a0, $sp, 0
132-
; CHECK-NEXT: st.d $zero, $sp, 56
133-
; CHECK-NEXT: vst $vr0, $sp, 40
131+
; CHECK-NEXT: st.d $a0, $sp, 16
132+
; CHECK-NEXT: st.d $zero, $sp, 72
133+
; CHECK-NEXT: vst $vr0, $sp, 56
134134
; CHECK-NEXT: ori $a2, $zero, 1
135-
; CHECK-NEXT: addi.d $a0, $sp, 32
136-
; CHECK-NEXT: addi.d $a1, $sp, 0
137-
; CHECK-NEXT: st.d $a2, $sp, 32
135+
; CHECK-NEXT: addi.d $a0, $sp, 48
136+
; CHECK-NEXT: addi.d $a1, $sp, 16
137+
; CHECK-NEXT: st.d $a2, $sp, 48
138138
; CHECK-NEXT: pcaddu18i $ra, %call36(callee_large_scalars)
139139
; CHECK-NEXT: jirl $ra, $ra, 0
140-
; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
141-
; CHECK-NEXT: addi.d $sp, $sp, 80
140+
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
141+
; CHECK-NEXT: addi.d $sp, $sp, 96
142142
; CHECK-NEXT: ret
143143
%1 = call i64 @callee_large_scalars(i256 1, i256 2)
144144
ret i64 %1
@@ -177,33 +177,33 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
177177
define i64 @caller_large_scalars_exhausted_regs() nounwind {
178178
; CHECK-LABEL: caller_large_scalars_exhausted_regs:
179179
; CHECK: # %bb.0:
180-
; CHECK-NEXT: addi.d $sp, $sp, -96
181-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
182-
; CHECK-NEXT: addi.d $a0, $sp, 16
180+
; CHECK-NEXT: addi.d $sp, $sp, -112
181+
; CHECK-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
182+
; CHECK-NEXT: addi.d $a0, $sp, 32
183183
; CHECK-NEXT: st.d $a0, $sp, 8
184184
; CHECK-NEXT: ori $a0, $zero, 9
185185
; CHECK-NEXT: st.d $a0, $sp, 0
186-
; CHECK-NEXT: st.d $zero, $sp, 40
186+
; CHECK-NEXT: st.d $zero, $sp, 56
187187
; CHECK-NEXT: vrepli.b $vr0, 0
188-
; CHECK-NEXT: vst $vr0, $sp, 24
188+
; CHECK-NEXT: vst $vr0, $sp, 40
189189
; CHECK-NEXT: ori $a0, $zero, 10
190-
; CHECK-NEXT: st.d $a0, $sp, 16
191-
; CHECK-NEXT: st.d $zero, $sp, 72
190+
; CHECK-NEXT: st.d $a0, $sp, 32
191+
; CHECK-NEXT: st.d $zero, $sp, 88
192192
; CHECK-NEXT: ori $a0, $zero, 8
193-
; CHECK-NEXT: st.d $a0, $sp, 48
193+
; CHECK-NEXT: st.d $a0, $sp, 64
194194
; CHECK-NEXT: ori $a0, $zero, 1
195195
; CHECK-NEXT: ori $a1, $zero, 2
196196
; CHECK-NEXT: ori $a2, $zero, 3
197197
; CHECK-NEXT: ori $a3, $zero, 4
198198
; CHECK-NEXT: ori $a4, $zero, 5
199199
; CHECK-NEXT: ori $a5, $zero, 6
200200
; CHECK-NEXT: ori $a6, $zero, 7
201-
; CHECK-NEXT: addi.d $a7, $sp, 48
202-
; CHECK-NEXT: vst $vr0, $sp, 56
201+
; CHECK-NEXT: addi.d $a7, $sp, 64
202+
; CHECK-NEXT: vst $vr0, $sp, 72
203203
; CHECK-NEXT: pcaddu18i $ra, %call36(callee_large_scalars_exhausted_regs)
204204
; CHECK-NEXT: jirl $ra, $ra, 0
205-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
206-
; CHECK-NEXT: addi.d $sp, $sp, 96
205+
; CHECK-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
206+
; CHECK-NEXT: addi.d $sp, $sp, 112
207207
; CHECK-NEXT: ret
208208
%1 = call i64 @callee_large_scalars_exhausted_regs(
209209
i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9,

llvm/test/CodeGen/LoongArch/calling-conv-half.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,8 +1252,8 @@ define i32 @caller_half_on_stack() nounwind {
12521252
;
12531253
; LA64F-LP64S-LABEL: caller_half_on_stack:
12541254
; LA64F-LP64S: # %bb.0:
1255-
; LA64F-LP64S-NEXT: addi.d $sp, $sp, -80
1256-
; LA64F-LP64S-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
1255+
; LA64F-LP64S-NEXT: addi.d $sp, $sp, -96
1256+
; LA64F-LP64S-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
12571257
; LA64F-LP64S-NEXT: lu12i.w $a0, -12
12581258
; LA64F-LP64S-NEXT: ori $a1, $a0, 3200
12591259
; LA64F-LP64S-NEXT: lu32i.d $a1, 0
@@ -1292,8 +1292,8 @@ define i32 @caller_half_on_stack() nounwind {
12921292
; LA64F-LP64S-NEXT: st.w $t0, $sp, 0
12931293
; LA64F-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_on_stack)
12941294
; LA64F-LP64S-NEXT: jirl $ra, $ra, 0
1295-
; LA64F-LP64S-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
1296-
; LA64F-LP64S-NEXT: addi.d $sp, $sp, 80
1295+
; LA64F-LP64S-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
1296+
; LA64F-LP64S-NEXT: addi.d $sp, $sp, 96
12971297
; LA64F-LP64S-NEXT: ret
12981298
;
12991299
; LA64F-LP64D-LABEL: caller_half_on_stack:
@@ -1336,8 +1336,8 @@ define i32 @caller_half_on_stack() nounwind {
13361336
;
13371337
; LA64D-LP64S-LABEL: caller_half_on_stack:
13381338
; LA64D-LP64S: # %bb.0:
1339-
; LA64D-LP64S-NEXT: addi.d $sp, $sp, -80
1340-
; LA64D-LP64S-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
1339+
; LA64D-LP64S-NEXT: addi.d $sp, $sp, -96
1340+
; LA64D-LP64S-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
13411341
; LA64D-LP64S-NEXT: lu12i.w $a0, -12
13421342
; LA64D-LP64S-NEXT: ori $a1, $a0, 3200
13431343
; LA64D-LP64S-NEXT: lu32i.d $a1, 0
@@ -1376,8 +1376,8 @@ define i32 @caller_half_on_stack() nounwind {
13761376
; LA64D-LP64S-NEXT: st.w $t0, $sp, 0
13771377
; LA64D-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_on_stack)
13781378
; LA64D-LP64S-NEXT: jirl $ra, $ra, 0
1379-
; LA64D-LP64S-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
1380-
; LA64D-LP64S-NEXT: addi.d $sp, $sp, 80
1379+
; LA64D-LP64S-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
1380+
; LA64D-LP64S-NEXT: addi.d $sp, $sp, 96
13811381
; LA64D-LP64S-NEXT: ret
13821382
;
13831383
; LA64D-LP64D-LABEL: caller_half_on_stack:

llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,41 +14,41 @@
1414
define dso_local noundef signext i32 @main() nounwind {
1515
; CHECK-LABEL: main:
1616
; CHECK: # %bb.0: # %entry
17-
; CHECK-NEXT: addi.d $sp, $sp, -272
18-
; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill
17+
; CHECK-NEXT: addi.d $sp, $sp, -288
18+
; CHECK-NEXT: st.d $ra, $sp, 280 # 8-byte Folded Spill
1919
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
2020
; CHECK-NEXT: xvld $xr0, $a0, %pc_lo12(.LCPI0_0)
21-
; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill
21+
; CHECK-NEXT: xvst $xr0, $sp, 112 # 32-byte Folded Spill
2222
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
2323
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
24-
; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill
24+
; CHECK-NEXT: xvst $xr1, $sp, 80 # 32-byte Folded Spill
2525
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2)
2626
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_2)
27-
; CHECK-NEXT: xvst $xr2, $sp, 32 # 32-byte Folded Spill
27+
; CHECK-NEXT: xvst $xr2, $sp, 48 # 32-byte Folded Spill
2828
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3)
2929
; CHECK-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI0_3)
30-
; CHECK-NEXT: xvst $xr3, $sp, 0 # 32-byte Folded Spill
31-
; CHECK-NEXT: xvst $xr0, $sp, 136
32-
; CHECK-NEXT: xvst $xr1, $sp, 168
33-
; CHECK-NEXT: xvst $xr2, $sp, 200
34-
; CHECK-NEXT: xvst $xr3, $sp, 232
35-
; CHECK-NEXT: addi.d $a0, $sp, 136
30+
; CHECK-NEXT: xvst $xr3, $sp, 16 # 32-byte Folded Spill
31+
; CHECK-NEXT: xvst $xr0, $sp, 152
32+
; CHECK-NEXT: xvst $xr1, $sp, 184
33+
; CHECK-NEXT: xvst $xr2, $sp, 216
34+
; CHECK-NEXT: xvst $xr3, $sp, 248
35+
; CHECK-NEXT: addi.d $a0, $sp, 152
3636
; CHECK-NEXT: pcaddu18i $ra, %call36(foo)
3737
; CHECK-NEXT: jirl $ra, $ra, 0
38-
; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload
39-
; CHECK-NEXT: xvst $xr0, $sp, 136
40-
; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
41-
; CHECK-NEXT: xvst $xr0, $sp, 168
42-
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
43-
; CHECK-NEXT: xvst $xr0, $sp, 200
44-
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
45-
; CHECK-NEXT: xvst $xr0, $sp, 232
46-
; CHECK-NEXT: addi.d $a0, $sp, 136
38+
; CHECK-NEXT: xvld $xr0, $sp, 112 # 32-byte Folded Reload
39+
; CHECK-NEXT: xvst $xr0, $sp, 152
40+
; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
41+
; CHECK-NEXT: xvst $xr0, $sp, 184
42+
; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
43+
; CHECK-NEXT: xvst $xr0, $sp, 216
44+
; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
45+
; CHECK-NEXT: xvst $xr0, $sp, 248
46+
; CHECK-NEXT: addi.d $a0, $sp, 152
4747
; CHECK-NEXT: pcaddu18i $ra, %call36(bar)
4848
; CHECK-NEXT: jirl $ra, $ra, 0
4949
; CHECK-NEXT: move $a0, $zero
50-
; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload
51-
; CHECK-NEXT: addi.d $sp, $sp, 272
50+
; CHECK-NEXT: ld.d $ra, $sp, 280 # 8-byte Folded Reload
51+
; CHECK-NEXT: addi.d $sp, $sp, 288
5252
; CHECK-NEXT: ret
5353
entry:
5454
%s = alloca %struct.S, align 2

llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ define void @func() {
2828
; CHECK-NEXT: ld.w $a3, $a1, 0
2929
; CHECK-NEXT: ld.w $a2, $a1, 0
3030
; CHECK-NEXT: ld.w $a0, $a1, 0
31-
; CHECK-NEXT: st.d $fp, $sp, 0
31+
; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
3232
; CHECK-NEXT: lu12i.w $fp, 1
3333
; CHECK-NEXT: ori $fp, $fp, 12
3434
; CHECK-NEXT: add.d $fp, $sp, $fp
3535
; CHECK-NEXT: st.w $t8, $fp, 0
36-
; CHECK-NEXT: ld.d $fp, $sp, 0
36+
; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
3737
; CHECK-NEXT: st.w $t8, $a1, 0
3838
; CHECK-NEXT: st.w $t7, $a1, 0
3939
; CHECK-NEXT: st.w $t6, $a1, 0

llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@
66
define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 dereferenceable(48) %b, i64 %i) {
77
; CHECK-LABEL: box:
88
; CHECK: # %bb.0:
9-
; CHECK-NEXT: addi.d $sp, $sp, -96
10-
; CHECK-NEXT: .cfi_def_cfa_offset 96
9+
; CHECK-NEXT: addi.d $sp, $sp, -112
10+
; CHECK-NEXT: .cfi_def_cfa_offset 112
1111
; CHECK-NEXT: slli.d $a2, $a1, 5
1212
; CHECK-NEXT: alsl.d $a1, $a1, $a2, 4
13-
; CHECK-NEXT: addi.d $a2, $sp, 0
13+
; CHECK-NEXT: addi.d $a2, $sp, 16
1414
; CHECK-NEXT: add.d $a3, $a2, $a1
1515
; CHECK-NEXT: vldx $vr0, $a1, $a2
1616
; CHECK-NEXT: vld $vr1, $a3, 32
1717
; CHECK-NEXT: vld $vr2, $a3, 16
1818
; CHECK-NEXT: vst $vr0, $a0, 0
1919
; CHECK-NEXT: vst $vr1, $a0, 32
2020
; CHECK-NEXT: vst $vr2, $a0, 16
21-
; CHECK-NEXT: addi.d $sp, $sp, 96
21+
; CHECK-NEXT: addi.d $sp, $sp, 112
2222
; CHECK-NEXT: ret
2323
%1 = alloca [2 x %Box], align 16
2424
%2 = getelementptr inbounds [2 x %Box], ptr %1, i64 0, i64 %i

0 commit comments

Comments
 (0)