1
- ;RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
2
- ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=FUNC %s
3
- ;RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
4
-
5
- ; FUNC-LABEL: {{^}}test_select_v2i32:
6
-
7
- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
8
- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
9
-
10
- ; VI: s_cmp_gt_i32
11
- ; VI: s_cselect_b32
12
- ; VI: s_cmp_gt_i32
13
- ; VI: s_cselect_b32
14
-
15
- ; SI-DAG: s_cmp_gt_i32
16
- ; SI-DAG: s_cselect_b32
17
- ; SI-DAG: s_cmp_gt_i32
18
- ; SI-DAG: s_cselect_b32
1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
+ ;RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs | FileCheck --check-prefixes=SI %s
3
+ ;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=VI %s
4
+ ;RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck --check-prefixes=EG %s
19
5
20
6
define amdgpu_kernel void @test_select_v2i32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , <2 x i32 > %val ) {
7
+ ; SI-LABEL: test_select_v2i32:
8
+ ; SI: ; %bb.0: ; %entry
9
+ ; SI-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x9
10
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
11
+ ; SI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x0
12
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
13
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
14
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
15
+ ; SI-NEXT: s_cmp_gt_i32 s9, s5
16
+ ; SI-NEXT: s_cselect_b32 s5, s7, s9
17
+ ; SI-NEXT: s_cmp_gt_i32 s8, s4
18
+ ; SI-NEXT: s_cselect_b32 s4, s6, s8
19
+ ; SI-NEXT: s_mov_b32 s2, -1
20
+ ; SI-NEXT: v_mov_b32_e32 v1, s5
21
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
22
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
23
+ ; SI-NEXT: s_endpgm
24
+ ;
25
+ ; VI-LABEL: test_select_v2i32:
26
+ ; VI: ; %bb.0: ; %entry
27
+ ; VI-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x24
28
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
29
+ ; VI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x0
30
+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
31
+ ; VI-NEXT: s_mov_b32 s3, 0xf000
32
+ ; VI-NEXT: s_mov_b32 s2, -1
33
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
34
+ ; VI-NEXT: s_cmp_gt_i32 s9, s5
35
+ ; VI-NEXT: s_cselect_b32 s5, s7, s9
36
+ ; VI-NEXT: s_cmp_gt_i32 s8, s4
37
+ ; VI-NEXT: s_cselect_b32 s4, s6, s8
38
+ ; VI-NEXT: v_mov_b32_e32 v0, s4
39
+ ; VI-NEXT: v_mov_b32_e32 v1, s5
40
+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
41
+ ; VI-NEXT: s_endpgm
42
+ ;
43
+ ; EG-LABEL: test_select_v2i32:
44
+ ; EG: ; %bb.0: ; %entry
45
+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
46
+ ; EG-NEXT: TEX 1 @6
47
+ ; EG-NEXT: ALU 5, @12, KC0[CB0:0-32], KC1[]
48
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
49
+ ; EG-NEXT: CF_END
50
+ ; EG-NEXT: PAD
51
+ ; EG-NEXT: Fetch clause starting at 6:
52
+ ; EG-NEXT: VTX_READ_64 T1.XY, T1.X, 0, #1
53
+ ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
54
+ ; EG-NEXT: ALU clause starting at 10:
55
+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
56
+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
57
+ ; EG-NEXT: ALU clause starting at 12:
58
+ ; EG-NEXT: SETGT_INT * T0.W, T0.Y, T1.Y,
59
+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T0.Y, KC0[3].Z,
60
+ ; EG-NEXT: SETGT_INT * T0.W, T0.X, T1.X,
61
+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T0.X, KC0[3].Y,
62
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
63
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
21
64
entry:
22
65
%load0 = load <2 x i32 >, ptr addrspace (1 ) %in0
23
66
%load1 = load <2 x i32 >, ptr addrspace (1 ) %in1
@@ -27,17 +70,72 @@ entry:
27
70
ret void
28
71
}
29
72
30
- ; FUNC-LABEL: {{^}}test_select_v2f32:
31
-
32
- ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
- ; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
34
-
35
- ; SI: v_cmp_neq_f32_e32 vcc
36
- ; SI: v_cndmask_b32_e32
37
- ; SI: v_cmp_neq_f32_e32 vcc
38
- ; SI: v_cndmask_b32_e32
39
-
40
73
define amdgpu_kernel void @test_select_v2f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 ) {
74
+ ; SI-LABEL: test_select_v2f32:
75
+ ; SI: ; %bb.0: ; %entry
76
+ ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
77
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
78
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
79
+ ; SI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
80
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
81
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
82
+ ; SI-NEXT: s_mov_b32 s6, -1
83
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
84
+ ; SI-NEXT: v_mov_b32_e32 v0, s0
85
+ ; SI-NEXT: v_mov_b32_e32 v1, s1
86
+ ; SI-NEXT: v_mov_b32_e32 v2, s3
87
+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v1
88
+ ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
89
+ ; SI-NEXT: v_mov_b32_e32 v2, s2
90
+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v0
91
+ ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
92
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
93
+ ; SI-NEXT: s_endpgm
94
+ ;
95
+ ; VI-LABEL: test_select_v2f32:
96
+ ; VI: ; %bb.0: ; %entry
97
+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x34
98
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
99
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
100
+ ; VI-NEXT: s_mov_b32 s6, -1
101
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
102
+ ; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0
103
+ ; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
104
+ ; VI-NEXT: s_mov_b32 s4, s0
105
+ ; VI-NEXT: s_mov_b32 s5, s1
106
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
107
+ ; VI-NEXT: v_mov_b32_e32 v1, s9
108
+ ; VI-NEXT: v_mov_b32_e32 v0, s8
109
+ ; VI-NEXT: v_mov_b32_e32 v2, s3
110
+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v1
111
+ ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
112
+ ; VI-NEXT: v_mov_b32_e32 v2, s2
113
+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v0
114
+ ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
115
+ ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
116
+ ; VI-NEXT: s_endpgm
117
+ ;
118
+ ; EG-LABEL: test_select_v2f32:
119
+ ; EG: ; %bb.0: ; %entry
120
+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
121
+ ; EG-NEXT: TEX 1 @6
122
+ ; EG-NEXT: ALU 5, @12, KC0[CB0:0-32], KC1[]
123
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
124
+ ; EG-NEXT: CF_END
125
+ ; EG-NEXT: PAD
126
+ ; EG-NEXT: Fetch clause starting at 6:
127
+ ; EG-NEXT: VTX_READ_64 T1.XY, T1.X, 0, #1
128
+ ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
129
+ ; EG-NEXT: ALU clause starting at 10:
130
+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
131
+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
132
+ ; EG-NEXT: ALU clause starting at 12:
133
+ ; EG-NEXT: SETNE_DX10 * T0.W, T0.Y, T1.Y,
134
+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T1.Y, T0.Y,
135
+ ; EG-NEXT: SETNE_DX10 * T0.W, T0.X, T1.X,
136
+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T1.X, T0.X,
137
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
138
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
41
139
entry:
42
140
%0 = load <2 x float >, ptr addrspace (1 ) %in0
43
141
%1 = load <2 x float >, ptr addrspace (1 ) %in1
@@ -47,24 +145,86 @@ entry:
47
145
ret void
48
146
}
49
147
50
- ;FUNC-LABEL: {{^}}test_select_v4i32:
51
-
52
- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[4].X
53
- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].W
54
- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
55
- ; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
56
-
57
- ; VI: s_cselect_b32
58
- ; VI: s_cselect_b32
59
- ; VI: s_cselect_b32
60
- ; VI: s_cselect_b32
61
-
62
- ; SI-DAG: s_cselect_b32
63
- ; SI-DAG: s_cselect_b32
64
- ; SI-DAG: s_cselect_b32
65
- ; SI-DAG: s_cselect_b32
66
-
67
148
define amdgpu_kernel void @test_select_v4i32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 , <4 x i32 > %val ) {
149
+ ; SI-LABEL: test_select_v4i32:
150
+ ; SI: ; %bb.0: ; %entry
151
+ ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
152
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
153
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
154
+ ; SI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0
155
+ ; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x0
156
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x11
157
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
158
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
159
+ ; SI-NEXT: s_cmp_gt_i32 s10, s14
160
+ ; SI-NEXT: s_cselect_b32 s2, s2, s10
161
+ ; SI-NEXT: s_cmp_gt_i32 s9, s13
162
+ ; SI-NEXT: s_cselect_b32 s1, s1, s9
163
+ ; SI-NEXT: s_cmp_gt_i32 s11, s15
164
+ ; SI-NEXT: s_cselect_b32 s3, s3, s11
165
+ ; SI-NEXT: s_cmp_gt_i32 s8, s12
166
+ ; SI-NEXT: s_cselect_b32 s0, s0, s8
167
+ ; SI-NEXT: s_mov_b32 s6, -1
168
+ ; SI-NEXT: v_mov_b32_e32 v2, s2
169
+ ; SI-NEXT: v_mov_b32_e32 v1, s1
170
+ ; SI-NEXT: v_mov_b32_e32 v3, s3
171
+ ; SI-NEXT: v_mov_b32_e32 v0, s0
172
+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
173
+ ; SI-NEXT: s_endpgm
174
+ ;
175
+ ; VI-LABEL: test_select_v4i32:
176
+ ; VI: ; %bb.0: ; %entry
177
+ ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
178
+ ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
179
+ ; VI-NEXT: s_mov_b32 s11, 0xf000
180
+ ; VI-NEXT: s_mov_b32 s10, -1
181
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
182
+ ; VI-NEXT: s_load_dwordx4 s[12:15], s[6:7], 0x0
183
+ ; VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x0
184
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x44
185
+ ; VI-NEXT: s_mov_b32 s8, s4
186
+ ; VI-NEXT: s_mov_b32 s9, s5
187
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
188
+ ; VI-NEXT: s_cmp_gt_i32 s14, s18
189
+ ; VI-NEXT: s_cselect_b32 s2, s2, s14
190
+ ; VI-NEXT: s_cmp_gt_i32 s13, s17
191
+ ; VI-NEXT: s_cselect_b32 s1, s1, s13
192
+ ; VI-NEXT: s_cmp_gt_i32 s15, s19
193
+ ; VI-NEXT: s_cselect_b32 s3, s3, s15
194
+ ; VI-NEXT: s_cmp_gt_i32 s12, s16
195
+ ; VI-NEXT: s_cselect_b32 s0, s0, s12
196
+ ; VI-NEXT: v_mov_b32_e32 v0, s0
197
+ ; VI-NEXT: v_mov_b32_e32 v1, s1
198
+ ; VI-NEXT: v_mov_b32_e32 v2, s2
199
+ ; VI-NEXT: v_mov_b32_e32 v3, s3
200
+ ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
201
+ ; VI-NEXT: s_endpgm
202
+ ;
203
+ ; EG-LABEL: test_select_v4i32:
204
+ ; EG: ; %bb.0: ; %entry
205
+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
206
+ ; EG-NEXT: TEX 1 @6
207
+ ; EG-NEXT: ALU 9, @12, KC0[CB0:0-32], KC1[]
208
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
209
+ ; EG-NEXT: CF_END
210
+ ; EG-NEXT: PAD
211
+ ; EG-NEXT: Fetch clause starting at 6:
212
+ ; EG-NEXT: VTX_READ_128 T1.XYZW, T1.X, 0, #1
213
+ ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
214
+ ; EG-NEXT: ALU clause starting at 10:
215
+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
216
+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
217
+ ; EG-NEXT: ALU clause starting at 12:
218
+ ; EG-NEXT: SETGT_INT T1.W, T0.W, T1.W,
219
+ ; EG-NEXT: SETGT_INT * T2.W, T0.Z, T1.Z,
220
+ ; EG-NEXT: CNDE_INT * T0.W, PV.W, T0.W, KC0[4].X,
221
+ ; EG-NEXT: CNDE_INT T0.Z, T2.W, T0.Z, KC0[3].W,
222
+ ; EG-NEXT: SETGT_INT * T1.W, T0.Y, T1.Y,
223
+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T0.Y, KC0[3].Z,
224
+ ; EG-NEXT: SETGT_INT * T1.W, T0.X, T1.X,
225
+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T0.X, KC0[3].Y,
226
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
227
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
68
228
entry:
69
229
%load0 = load <4 x i32 >, ptr addrspace (1 ) %in0
70
230
%load1 = load <4 x i32 >, ptr addrspace (1 ) %in1
@@ -74,17 +234,92 @@ entry:
74
234
ret void
75
235
}
76
236
77
- ;FUNC-LABEL: {{^}}test_select_v4f32:
78
- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
79
- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
80
- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
81
- ;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
82
-
83
- ; SI: v_cndmask_b32_e32
84
- ; SI: v_cndmask_b32_e32
85
- ; SI: v_cndmask_b32_e32
86
- ; SI: v_cndmask_b32_e32
87
237
define amdgpu_kernel void @test_select_v4f32 (ptr addrspace (1 ) %out , ptr addrspace (1 ) %in0 , ptr addrspace (1 ) %in1 ) {
238
+ ; SI-LABEL: test_select_v4f32:
239
+ ; SI: ; %bb.0: ; %entry
240
+ ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
241
+ ; SI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0xd
242
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
243
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
244
+ ; SI-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x0
245
+ ; SI-NEXT: s_mov_b32 s7, 0xf000
246
+ ; SI-NEXT: s_mov_b32 s6, -1
247
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
248
+ ; SI-NEXT: v_mov_b32_e32 v0, s8
249
+ ; SI-NEXT: v_mov_b32_e32 v1, s9
250
+ ; SI-NEXT: v_mov_b32_e32 v2, s10
251
+ ; SI-NEXT: v_mov_b32_e32 v3, s11
252
+ ; SI-NEXT: v_mov_b32_e32 v4, s3
253
+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v3
254
+ ; SI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
255
+ ; SI-NEXT: v_mov_b32_e32 v4, s2
256
+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v2
257
+ ; SI-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
258
+ ; SI-NEXT: v_mov_b32_e32 v4, s1
259
+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s1, v1
260
+ ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
261
+ ; SI-NEXT: v_mov_b32_e32 v4, s0
262
+ ; SI-NEXT: v_cmp_neq_f32_e32 vcc, s0, v0
263
+ ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
264
+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
265
+ ; SI-NEXT: s_endpgm
266
+ ;
267
+ ; VI-LABEL: test_select_v4f32:
268
+ ; VI: ; %bb.0: ; %entry
269
+ ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x34
270
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
271
+ ; VI-NEXT: s_mov_b32 s7, 0xf000
272
+ ; VI-NEXT: s_mov_b32 s6, -1
273
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
274
+ ; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0
275
+ ; VI-NEXT: s_mov_b32 s4, s0
276
+ ; VI-NEXT: s_mov_b32 s5, s1
277
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
278
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
279
+ ; VI-NEXT: v_mov_b32_e32 v3, s11
280
+ ; VI-NEXT: v_mov_b32_e32 v2, s10
281
+ ; VI-NEXT: v_mov_b32_e32 v1, s9
282
+ ; VI-NEXT: v_mov_b32_e32 v4, s3
283
+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s3, v3
284
+ ; VI-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
285
+ ; VI-NEXT: v_mov_b32_e32 v4, s2
286
+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s2, v2
287
+ ; VI-NEXT: v_mov_b32_e32 v0, s8
288
+ ; VI-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
289
+ ; VI-NEXT: v_mov_b32_e32 v4, s1
290
+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s1, v1
291
+ ; VI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
292
+ ; VI-NEXT: v_mov_b32_e32 v4, s0
293
+ ; VI-NEXT: v_cmp_neq_f32_e32 vcc, s0, v0
294
+ ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
295
+ ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
296
+ ; VI-NEXT: s_endpgm
297
+ ;
298
+ ; EG-LABEL: test_select_v4f32:
299
+ ; EG: ; %bb.0: ; %entry
300
+ ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
301
+ ; EG-NEXT: TEX 1 @6
302
+ ; EG-NEXT: ALU 9, @12, KC0[CB0:0-32], KC1[]
303
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
304
+ ; EG-NEXT: CF_END
305
+ ; EG-NEXT: PAD
306
+ ; EG-NEXT: Fetch clause starting at 6:
307
+ ; EG-NEXT: VTX_READ_128 T1.XYZW, T1.X, 0, #1
308
+ ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
309
+ ; EG-NEXT: ALU clause starting at 10:
310
+ ; EG-NEXT: MOV T0.X, KC0[2].Z,
311
+ ; EG-NEXT: MOV * T1.X, KC0[2].W,
312
+ ; EG-NEXT: ALU clause starting at 12:
313
+ ; EG-NEXT: SETNE_DX10 T2.W, T0.W, T1.W,
314
+ ; EG-NEXT: SETNE_DX10 * T3.W, T0.Z, T1.Z,
315
+ ; EG-NEXT: CNDE_INT * T0.W, PV.W, T1.W, T0.W,
316
+ ; EG-NEXT: CNDE_INT T0.Z, T3.W, T1.Z, T0.Z,
317
+ ; EG-NEXT: SETNE_DX10 * T1.W, T0.Y, T1.Y,
318
+ ; EG-NEXT: CNDE_INT T0.Y, PV.W, T1.Y, T0.Y,
319
+ ; EG-NEXT: SETNE_DX10 * T1.W, T0.X, T1.X,
320
+ ; EG-NEXT: CNDE_INT T0.X, PV.W, T1.X, T0.X,
321
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
322
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
88
323
entry:
89
324
%0 = load <4 x float >, ptr addrspace (1 ) %in0
90
325
%1 = load <4 x float >, ptr addrspace (1 ) %in1
0 commit comments