@@ -152,4 +152,90 @@ end:
152
152
ret float %r
153
153
}
154
154
155
+ ; Two chains of phi network that have the same value from %if block.
156
+ define amdgpu_ps < 2 x float > @while_break_two_chains_of_phi (float %v , i32 %x , i32 %y , i32 %z , ptr addrspace (1 ) %p ) #0 {
157
+ ; GCN-LABEL: while_break_two_chains_of_phi:
158
+ ; GCN: ; %bb.0: ; %entry
159
+ ; GCN-NEXT: v_mov_b32_e32 v6, 0
160
+ ; GCN-NEXT: s_mov_b32 s2, 0
161
+ ; GCN-NEXT: s_mov_b32 s0, 0
162
+ ; GCN-NEXT: s_branch .LBB2_2
163
+ ; GCN-NEXT: .LBB2_1: ; %Flow1
164
+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
165
+ ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s1
166
+ ; GCN-NEXT: s_and_b32 s1, exec_lo, s4
167
+ ; GCN-NEXT: s_or_b32 s2, s1, s2
168
+ ; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
169
+ ; GCN-NEXT: s_cbranch_execz .LBB2_6
170
+ ; GCN-NEXT: .LBB2_2: ; %header
171
+ ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
172
+ ; GCN-NEXT: v_cmp_ge_i32_e64 s3, s0, v1
173
+ ; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v1
174
+ ; GCN-NEXT: s_and_saveexec_b32 s4, vcc_lo
175
+ ; GCN-NEXT: s_cbranch_execz .LBB2_4
176
+ ; GCN-NEXT: ; %bb.3: ; %if
177
+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
178
+ ; GCN-NEXT: s_ashr_i32 s1, s0, 31
179
+ ; GCN-NEXT: s_lshl_b64 s[6:7], s[0:1], 2
180
+ ; GCN-NEXT: s_andn2_b32 s1, s3, exec_lo
181
+ ; GCN-NEXT: v_add_co_u32 v6, vcc_lo, v4, s6
182
+ ; GCN-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, s7, v5, vcc_lo
183
+ ; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v2
184
+ ; GCN-NEXT: global_load_dword v0, v[6:7], off
185
+ ; GCN-NEXT: s_and_b32 s3, vcc_lo, exec_lo
186
+ ; GCN-NEXT: s_or_b32 s3, s1, s3
187
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
188
+ ; GCN-NEXT: v_add_f32_e32 v6, 1.0, v0
189
+ ; GCN-NEXT: v_mov_b32_e32 v0, v6
190
+ ; GCN-NEXT: .LBB2_4: ; %Flow
191
+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
192
+ ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
193
+ ; GCN-NEXT: v_mov_b32_e32 v7, v6
194
+ ; GCN-NEXT: s_mov_b32 s4, -1
195
+ ; GCN-NEXT: s_and_saveexec_b32 s1, s3
196
+ ; GCN-NEXT: s_cbranch_execz .LBB2_1
197
+ ; GCN-NEXT: ; %bb.5: ; %latch
198
+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
199
+ ; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v3
200
+ ; GCN-NEXT: v_mov_b32_e32 v7, v0
201
+ ; GCN-NEXT: s_add_i32 s0, s0, 1
202
+ ; GCN-NEXT: s_orn2_b32 s4, vcc_lo, exec_lo
203
+ ; GCN-NEXT: s_branch .LBB2_1
204
+ ; GCN-NEXT: .LBB2_6: ; %end
205
+ ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s2
206
+ ; GCN-NEXT: v_mov_b32_e32 v0, v7
207
+ ; GCN-NEXT: v_mov_b32_e32 v1, v6
208
+ ; GCN-NEXT: ; return to shader part epilog
209
+ entry:
210
+ br label %header
211
+
212
+ header:
213
+ %v.1 = phi float [ %v , %entry ], [ %v.2 , %latch ]
214
+ %v.copy = phi float [ 0 .0 , %entry ], [ %v.copy.2 , %latch ]
215
+ %ind = phi i32 [ 0 , %entry ], [ %ind.inc , %latch ]
216
+ %cc = icmp slt i32 %ind , %x
217
+ br i1 %cc , label %if , label %latch
218
+
219
+ if:
220
+ %v.ptr = getelementptr float , ptr addrspace (1 ) %p , i32 %ind
221
+ %v.load = load float , ptr addrspace (1 ) %v.ptr
222
+ %v.if = fadd float %v.load , 1 .0
223
+ %cc2 = icmp slt i32 %ind , %y
224
+ br i1 %cc2 , label %latch , label %end
225
+
226
+ latch:
227
+ %v.2 = phi float [ %v.1 , %header ], [ %v.if , %if ]
228
+ %v.copy.2 = phi float [ %v.copy , %header ], [ %v.if , %if ]
229
+ %ind.inc = add i32 %ind , 1
230
+ %cc3 = icmp slt i32 %ind , %z
231
+ br i1 %cc3 , label %end , label %header
232
+
233
+ end:
234
+ %r = phi float [ %v.2 , %latch ], [ %v.if , %if ]
235
+ %r2 = phi float [ %v.copy.2 , %latch ], [ %v.if , %if ]
236
+ %packed0 = insertelement < 2 x float > poison, float %r , i32 0
237
+ %packed1 = insertelement < 2 x float > %packed0 , float %r2 , i32 1
238
+ ret < 2 x float > %packed1
239
+ }
240
+
155
241
attributes #0 = { nounwind }
0 commit comments