Skip to content

Commit 3c86b74

Browse files
committed
[AMDGPU] add_i1.ll / sub_i1.ll - regenerate test checks
1 parent 49dc163 commit 3c86b74

File tree

2 files changed

+424
-27
lines changed

2 files changed

+424
-27
lines changed

llvm/test/CodeGen/AMDGPU/add_i1.ll

Lines changed: 212 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,232 @@
1-
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
45

5-
; GCN-LABEL: {{^}}add_var_var_i1:
6-
; GFX9: v_xor_b32_e32
7-
; GFX10: v_xor_b32_e32
86
define amdgpu_kernel void @add_var_var_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
7+
; GFX9-LABEL: add_var_var_i1:
8+
; GFX9: ; %bb.0:
9+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
10+
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
11+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
12+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
13+
; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3] glc
14+
; GFX9-NEXT: s_waitcnt vmcnt(0)
15+
; GFX9-NEXT: global_load_ubyte v2, v0, s[6:7] glc
16+
; GFX9-NEXT: s_waitcnt vmcnt(0)
17+
; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2
18+
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
19+
; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
20+
; GFX9-NEXT: s_endpgm
21+
;
22+
; GFX10-LABEL: add_var_var_i1:
23+
; GFX10: ; %bb.0:
24+
; GFX10-NEXT: s_clause 0x1
25+
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
26+
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
27+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
28+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
29+
; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3] glc dlc
30+
; GFX10-NEXT: s_waitcnt vmcnt(0)
31+
; GFX10-NEXT: global_load_ubyte v2, v0, s[6:7] glc dlc
32+
; GFX10-NEXT: s_waitcnt vmcnt(0)
33+
; GFX10-NEXT: v_xor_b32_e32 v1, v1, v2
34+
; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
35+
; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
36+
; GFX10-NEXT: s_endpgm
37+
;
38+
; GFX11-LABEL: add_var_var_i1:
39+
; GFX11: ; %bb.0:
40+
; GFX11-NEXT: s_clause 0x1
41+
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
42+
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
43+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
44+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
45+
; GFX11-NEXT: global_load_u8 v1, v0, s[2:3] glc dlc
46+
; GFX11-NEXT: s_waitcnt vmcnt(0)
47+
; GFX11-NEXT: global_load_u8 v2, v0, s[4:5] glc dlc
48+
; GFX11-NEXT: s_waitcnt vmcnt(0)
49+
; GFX11-NEXT: v_xor_b32_e32 v1, v1, v2
50+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
51+
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
52+
; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
53+
; GFX11-NEXT: s_endpgm
954
%a = load volatile i1, ptr addrspace(1) %in0
1055
%b = load volatile i1, ptr addrspace(1) %in1
1156
%add = add i1 %a, %b
1257
store i1 %add, ptr addrspace(1) %out
1358
ret void
1459
}
1560

16-
; GCN-LABEL: {{^}}add_var_imm_i1:
17-
; GFX9: s_xor_b64
18-
; GFX10: s_xor_b32
1961
define amdgpu_kernel void @add_var_imm_i1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
62+
; GFX9-LABEL: add_var_imm_i1:
63+
; GFX9: ; %bb.0:
64+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
65+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
66+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
67+
; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3] glc
68+
; GFX9-NEXT: s_waitcnt vmcnt(0)
69+
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
70+
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
71+
; GFX9-NEXT: s_xor_b64 s[2:3], vcc, -1
72+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
73+
; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
74+
; GFX9-NEXT: s_endpgm
75+
;
76+
; GFX10-LABEL: add_var_imm_i1:
77+
; GFX10: ; %bb.0:
78+
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
79+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
80+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
81+
; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3] glc dlc
82+
; GFX10-NEXT: s_waitcnt vmcnt(0)
83+
; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
84+
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
85+
; GFX10-NEXT: s_xor_b32 s2, vcc_lo, -1
86+
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
87+
; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
88+
; GFX10-NEXT: s_endpgm
89+
;
90+
; GFX11-LABEL: add_var_imm_i1:
91+
; GFX11: ; %bb.0:
92+
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
93+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
94+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
95+
; GFX11-NEXT: global_load_u8 v1, v0, s[2:3] glc dlc
96+
; GFX11-NEXT: s_waitcnt vmcnt(0)
97+
; GFX11-NEXT: v_and_b32_e32 v1, 1, v1
98+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
99+
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
100+
; GFX11-NEXT: s_xor_b32 s2, vcc_lo, -1
101+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
102+
; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
103+
; GFX11-NEXT: s_endpgm
20104
%a = load volatile i1, ptr addrspace(1) %in
21105
%add = add i1 %a, 1
22106
store i1 %add, ptr addrspace(1) %out
23107
ret void
24108
}
25109

26-
; GCN-LABEL: {{^}}add_i1_cf:
27-
; GCN: ; %endif
28-
; GFX9: s_xor_b64
29-
; GFX10: s_xor_b32
30110
define amdgpu_kernel void @add_i1_cf(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
111+
; GFX9-LABEL: add_i1_cf:
112+
; GFX9: ; %bb.0: ; %entry
113+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
114+
; GFX9-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
115+
; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
116+
; GFX9-NEXT: ; implicit-def: $sgpr4_sgpr5
117+
; GFX9-NEXT: s_and_saveexec_b64 s[6:7], vcc
118+
; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
119+
; GFX9-NEXT: s_cbranch_execz .LBB2_2
120+
; GFX9-NEXT: ; %bb.1: ; %else
121+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
122+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
123+
; GFX9-NEXT: global_load_ubyte v0, v0, s[8:9] glc
124+
; GFX9-NEXT: s_waitcnt vmcnt(0)
125+
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
126+
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
127+
; GFX9-NEXT: .LBB2_2: ; %Flow
128+
; GFX9-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
129+
; GFX9-NEXT: s_cbranch_execz .LBB2_4
130+
; GFX9-NEXT: ; %bb.3: ; %if
131+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
132+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
133+
; GFX9-NEXT: global_load_ubyte v0, v0, s[2:3] glc
134+
; GFX9-NEXT: s_waitcnt vmcnt(0)
135+
; GFX9-NEXT: s_andn2_b64 s[2:3], s[4:5], exec
136+
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
137+
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
138+
; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec
139+
; GFX9-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5]
140+
; GFX9-NEXT: .LBB2_4: ; %endif
141+
; GFX9-NEXT: s_or_b64 exec, exec, s[6:7]
142+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
143+
; GFX9-NEXT: s_xor_b64 s[2:3], s[4:5], -1
144+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
145+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
146+
; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
147+
; GFX9-NEXT: s_endpgm
148+
;
149+
; GFX10-LABEL: add_i1_cf:
150+
; GFX10: ; %bb.0: ; %entry
151+
; GFX10-NEXT: s_clause 0x1
152+
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
153+
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
154+
; GFX10-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0
155+
; GFX10-NEXT: ; implicit-def: $sgpr4
156+
; GFX10-NEXT: s_and_saveexec_b32 s5, vcc_lo
157+
; GFX10-NEXT: s_xor_b32 s5, exec_lo, s5
158+
; GFX10-NEXT: s_cbranch_execz .LBB2_2
159+
; GFX10-NEXT: ; %bb.1: ; %else
160+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
161+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
162+
; GFX10-NEXT: global_load_ubyte v0, v0, s[6:7] glc dlc
163+
; GFX10-NEXT: s_waitcnt vmcnt(0)
164+
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
165+
; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v0
166+
; GFX10-NEXT: .LBB2_2: ; %Flow
167+
; GFX10-NEXT: s_andn2_saveexec_b32 s5, s5
168+
; GFX10-NEXT: s_cbranch_execz .LBB2_4
169+
; GFX10-NEXT: ; %bb.3: ; %if
170+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
171+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
172+
; GFX10-NEXT: global_load_ubyte v0, v0, s[2:3] glc dlc
173+
; GFX10-NEXT: s_waitcnt vmcnt(0)
174+
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
175+
; GFX10-NEXT: s_andn2_b32 s2, s4, exec_lo
176+
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
177+
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
178+
; GFX10-NEXT: s_and_b32 s3, vcc_lo, exec_lo
179+
; GFX10-NEXT: s_or_b32 s4, s2, s3
180+
; GFX10-NEXT: .LBB2_4: ; %endif
181+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
182+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
183+
; GFX10-NEXT: s_xor_b32 s2, s4, -1
184+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
185+
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
186+
; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
187+
; GFX10-NEXT: s_endpgm
188+
;
189+
; GFX11-LABEL: add_i1_cf:
190+
; GFX11: ; %bb.0: ; %entry
191+
; GFX11-NEXT: s_clause 0x1
192+
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
193+
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
194+
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
195+
; GFX11-NEXT: s_mov_b32 s7, exec_lo
196+
; GFX11-NEXT: ; implicit-def: $sgpr6
197+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
198+
; GFX11-NEXT: v_cmpx_lt_u32_e32 15, v0
199+
; GFX11-NEXT: s_xor_b32 s7, exec_lo, s7
200+
; GFX11-NEXT: s_cbranch_execz .LBB2_2
201+
; GFX11-NEXT: ; %bb.1: ; %else
202+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
203+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
204+
; GFX11-NEXT: global_load_u8 v0, v0, s[4:5] glc dlc
205+
; GFX11-NEXT: s_waitcnt vmcnt(0)
206+
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
207+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
208+
; GFX11-NEXT: v_cmp_eq_u32_e64 s6, 1, v0
209+
; GFX11-NEXT: .LBB2_2: ; %Flow
210+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
211+
; GFX11-NEXT: s_and_not1_saveexec_b32 s4, s7
212+
; GFX11-NEXT: s_cbranch_execz .LBB2_4
213+
; GFX11-NEXT: ; %bb.3: ; %if
214+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
215+
; GFX11-NEXT: global_load_u8 v0, v0, s[2:3] glc dlc
216+
; GFX11-NEXT: s_waitcnt vmcnt(0)
217+
; GFX11-NEXT: s_and_not1_b32 s2, s6, exec_lo
218+
; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
219+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
220+
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
221+
; GFX11-NEXT: s_and_b32 s3, vcc_lo, exec_lo
222+
; GFX11-NEXT: s_or_b32 s6, s2, s3
223+
; GFX11-NEXT: .LBB2_4: ; %endif
224+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4
225+
; GFX11-NEXT: s_xor_b32 s2, s6, -1
226+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
227+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
228+
; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
229+
; GFX11-NEXT: s_endpgm
31230
entry:
32231
%tid = call i32 @llvm.amdgcn.workitem.id.x()
33232
%d_cmp = icmp ult i32 %tid, 16

0 commit comments

Comments
 (0)