Skip to content

Commit 4ab011a

Browse files
committed
[RISCV] Precommit store merge tests for pr130430
1 parent bc8b19c commit 4ab011a

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed

llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
55
declare void @g()
66

7+
; TODO: Merging scalars into vectors is unprofitable because we have no
8+
; vector CSRs which creates additional spills around the call.
79
define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
810
; CHECK-LABEL: f:
911
; CHECK: # %bb.0:
@@ -93,3 +95,148 @@ define void @f1(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
9395

9496
ret void
9597
}
98+
99+
; Merging scalars is profitable, it reduces pressure within a single
100+
; register class.
101+
define void @i8_i16(ptr %p, ptr %q) {
102+
; CHECK-LABEL: i8_i16:
103+
; CHECK: # %bb.0:
104+
; CHECK-NEXT: addi sp, sp, -32
105+
; CHECK-NEXT: .cfi_def_cfa_offset 32
106+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
107+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
108+
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
109+
; CHECK-NEXT: .cfi_offset ra, -8
110+
; CHECK-NEXT: .cfi_offset s0, -16
111+
; CHECK-NEXT: .cfi_offset s1, -24
112+
; CHECK-NEXT: lh s1, 0(a0)
113+
; CHECK-NEXT: mv s0, a1
114+
; CHECK-NEXT: call g
115+
; CHECK-NEXT: sh s1, 0(s0)
116+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
117+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
118+
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
119+
; CHECK-NEXT: .cfi_restore ra
120+
; CHECK-NEXT: .cfi_restore s0
121+
; CHECK-NEXT: .cfi_restore s1
122+
; CHECK-NEXT: addi sp, sp, 32
123+
; CHECK-NEXT: .cfi_def_cfa_offset 0
124+
; CHECK-NEXT: ret
125+
%p0 = getelementptr i8, ptr %p, i64 0
126+
%p1 = getelementptr i8, ptr %p, i64 1
127+
%x0 = load i8, ptr %p0, align 2
128+
%x1 = load i8, ptr %p1
129+
call void @g()
130+
%q0 = getelementptr i8, ptr %q, i64 0
131+
%q1 = getelementptr i8, ptr %q, i64 1
132+
store i8 %x0, ptr %q0, align 2
133+
store i8 %x1, ptr %q1
134+
ret void
135+
}
136+
137+
; Merging vectors is profitable, it reduces pressure within a single
138+
; register class.
139+
define void @v2i8_v4i8(ptr %p, ptr %q) {
140+
; CHECK-LABEL: v2i8_v4i8:
141+
; CHECK: # %bb.0:
142+
; CHECK-NEXT: addi sp, sp, -32
143+
; CHECK-NEXT: .cfi_def_cfa_offset 32
144+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
145+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
146+
; CHECK-NEXT: .cfi_offset ra, -8
147+
; CHECK-NEXT: .cfi_offset s0, -16
148+
; CHECK-NEXT: csrr a2, vlenb
149+
; CHECK-NEXT: sub sp, sp, a2
150+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
151+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
152+
; CHECK-NEXT: vle8.v v8, (a0)
153+
; CHECK-NEXT: addi a0, sp, 16
154+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
155+
; CHECK-NEXT: mv s0, a1
156+
; CHECK-NEXT: call g
157+
; CHECK-NEXT: addi a0, sp, 16
158+
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
159+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
160+
; CHECK-NEXT: vse8.v v8, (s0)
161+
; CHECK-NEXT: csrr a0, vlenb
162+
; CHECK-NEXT: add sp, sp, a0
163+
; CHECK-NEXT: .cfi_def_cfa sp, 32
164+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
165+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
166+
; CHECK-NEXT: .cfi_restore ra
167+
; CHECK-NEXT: .cfi_restore s0
168+
; CHECK-NEXT: addi sp, sp, 32
169+
; CHECK-NEXT: .cfi_def_cfa_offset 0
170+
; CHECK-NEXT: ret
171+
%p0 = getelementptr i8, ptr %p, i64 0
172+
%p1 = getelementptr i8, ptr %p, i64 2
173+
%x0 = load <2 x i8>, ptr %p0, align 2
174+
%x1 = load <2 x i8>, ptr %p1
175+
call void @g()
176+
%q0 = getelementptr i8, ptr %q, i64 0
177+
%q1 = getelementptr i8, ptr %q, i64 2
178+
store <2 x i8> %x0, ptr %q0, align 2
179+
store <2 x i8> %x1, ptr %q1
180+
ret void
181+
}
182+
183+
; Merging two 16 x i8 into one 32 x i8 (on zvl128b) will require the same
184+
; numbers of registers to be spilled, but it can be done with fewer
185+
; instructions
186+
define void @v16i8_v32i8(ptr %p, ptr %q) {
187+
; CHECK-LABEL: v16i8_v32i8:
188+
; CHECK: # %bb.0:
189+
; CHECK-NEXT: addi sp, sp, -32
190+
; CHECK-NEXT: .cfi_def_cfa_offset 32
191+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
192+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
193+
; CHECK-NEXT: .cfi_offset ra, -8
194+
; CHECK-NEXT: .cfi_offset s0, -16
195+
; CHECK-NEXT: csrr a2, vlenb
196+
; CHECK-NEXT: slli a2, a2, 1
197+
; CHECK-NEXT: sub sp, sp, a2
198+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
199+
; CHECK-NEXT: addi a2, a0, 16
200+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
201+
; CHECK-NEXT: vle8.v v8, (a0)
202+
; CHECK-NEXT: csrr a0, vlenb
203+
; CHECK-NEXT: add a0, sp, a0
204+
; CHECK-NEXT: addi a0, a0, 16
205+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
206+
; CHECK-NEXT: vle8.v v8, (a2)
207+
; CHECK-NEXT: addi a0, sp, 16
208+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
209+
; CHECK-NEXT: mv s0, a1
210+
; CHECK-NEXT: call g
211+
; CHECK-NEXT: addi a0, s0, 2
212+
; CHECK-NEXT: csrr a1, vlenb
213+
; CHECK-NEXT: add a1, sp, a1
214+
; CHECK-NEXT: addi a1, a1, 16
215+
; CHECK-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
216+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
217+
; CHECK-NEXT: vse8.v v8, (s0)
218+
; CHECK-NEXT: addi a1, sp, 16
219+
; CHECK-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
220+
; CHECK-NEXT: vse8.v v8, (a0)
221+
; CHECK-NEXT: csrr a0, vlenb
222+
; CHECK-NEXT: slli a0, a0, 1
223+
; CHECK-NEXT: add sp, sp, a0
224+
; CHECK-NEXT: .cfi_def_cfa sp, 32
225+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
226+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
227+
; CHECK-NEXT: .cfi_restore ra
228+
; CHECK-NEXT: .cfi_restore s0
229+
; CHECK-NEXT: addi sp, sp, 32
230+
; CHECK-NEXT: .cfi_def_cfa_offset 0
231+
; CHECK-NEXT: ret
232+
%p0 = getelementptr i8, ptr %p, i64 0
233+
%p1 = getelementptr i8, ptr %p, i64 16
234+
%x0 = load <16 x i8>, ptr %p0, align 2
235+
%x1 = load <16 x i8>, ptr %p1
236+
call void @g()
237+
%q0 = getelementptr i8, ptr %q, i64 0
238+
%q1 = getelementptr i8, ptr %q, i64 2
239+
store <16 x i8> %x0, ptr %q0, align 16
240+
store <16 x i8> %x1, ptr %q1
241+
ret void
242+
}

0 commit comments

Comments
 (0)