@@ -120,25 +120,27 @@ define protected amdgpu_kernel void @nand(i32 addrspace(1)* %p, %S addrspace(1)*
120
120
; CHECK: ; %bb.0:
121
121
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
122
122
; CHECK-NEXT: s_mov_b64 s[4:5], 0
123
- ; CHECK-NEXT: v_mov_b32_e32 v0 , 0
123
+ ; CHECK-NEXT: v_mov_b32_e32 v1 , 0
124
124
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
125
125
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
126
126
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
127
- ; CHECK-NEXT: v_mov_b32_e32 v1 , s6
127
+ ; CHECK-NEXT: v_mov_b32_e32 v0 , s6
128
128
; CHECK-NEXT: .LBB5_1: ; %atomicrmw.start
129
129
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
130
- ; CHECK-NEXT: v_mov_b32_e32 v3, v1
131
- ; CHECK-NEXT: v_not_b32_e32 v1 , v3
132
- ; CHECK-NEXT: v_or_b32_e32 v2, -2, v1
133
- ; CHECK-NEXT: global_atomic_cmpswap v1, v0 , v[2:3], s[0:1] glc
130
+ ; CHECK-NEXT: v_mov_b32_e32 v3, v0
131
+ ; CHECK-NEXT: v_not_b32_e32 v0 , v3
132
+ ; CHECK-NEXT: v_or_b32_e32 v2, -2, v0
133
+ ; CHECK-NEXT: global_atomic_cmpswap v0, v1 , v[2:3], s[0:1] glc
134
134
; CHECK-NEXT: s_waitcnt vmcnt(0)
135
- ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1 , v3
135
+ ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0 , v3
136
136
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
137
137
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
138
138
; CHECK-NEXT: s_cbranch_execnz .LBB5_1
139
139
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
140
140
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
141
- ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, 12, s[2:3]
141
+ ; CHECK-NEXT: v_mov_b32_e32 v2, s2
142
+ ; CHECK-NEXT: v_mov_b32_e32 v3, s3
143
+ ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, v[2:3]
142
144
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
143
145
; CHECK-NEXT: global_store_dword v[0:1], v2, off
144
146
; CHECK-NEXT: s_endpgm
@@ -330,26 +332,28 @@ define protected amdgpu_kernel void @fadd(float addrspace(1)* %p, %S addrspace(1
330
332
; CHECK: ; %bb.0:
331
333
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
332
334
; CHECK-NEXT: s_mov_b64 s[4:5], 0
333
- ; CHECK-NEXT: v_mov_b32_e32 v0 , 0
335
+ ; CHECK-NEXT: v_mov_b32_e32 v1 , 0
334
336
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
335
337
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
336
338
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
337
- ; CHECK-NEXT: v_mov_b32_e32 v1 , s6
339
+ ; CHECK-NEXT: v_mov_b32_e32 v0 , s6
338
340
; CHECK-NEXT: .LBB14_1: ; %atomicrmw.start
339
341
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
340
- ; CHECK-NEXT: v_mov_b32_e32 v3, v1
342
+ ; CHECK-NEXT: v_mov_b32_e32 v3, v0
341
343
; CHECK-NEXT: v_add_f32_e32 v2, 1.0, v3
342
- ; CHECK-NEXT: global_atomic_cmpswap v1, v0 , v[2:3], s[0:1] glc
344
+ ; CHECK-NEXT: global_atomic_cmpswap v0, v1 , v[2:3], s[0:1] glc
343
345
; CHECK-NEXT: s_waitcnt vmcnt(0)
344
- ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1 , v3
346
+ ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0 , v3
345
347
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
346
348
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
347
349
; CHECK-NEXT: s_cbranch_execnz .LBB14_1
348
350
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
349
351
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
350
- ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1
352
+ ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v0
353
+ ; CHECK-NEXT: v_mov_b32_e32 v0, s2
354
+ ; CHECK-NEXT: v_mov_b32_e32 v1, s3
355
+ ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
351
356
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
352
- ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[2:3]
353
357
; CHECK-NEXT: global_store_dword v[0:1], v2, off
354
358
; CHECK-NEXT: s_endpgm
355
359
%f32 = atomicrmw fadd float addrspace (1 )* %p , float 1 .0 monotonic
@@ -365,26 +369,28 @@ define protected amdgpu_kernel void @fsub(float addrspace(1)* %p, %S addrspace(1
365
369
; CHECK: ; %bb.0:
366
370
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
367
371
; CHECK-NEXT: s_mov_b64 s[4:5], 0
368
- ; CHECK-NEXT: v_mov_b32_e32 v0 , 0
372
+ ; CHECK-NEXT: v_mov_b32_e32 v1 , 0
369
373
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
370
374
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
371
375
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
372
- ; CHECK-NEXT: v_mov_b32_e32 v1 , s6
376
+ ; CHECK-NEXT: v_mov_b32_e32 v0 , s6
373
377
; CHECK-NEXT: .LBB15_1: ; %atomicrmw.start
374
378
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
375
- ; CHECK-NEXT: v_mov_b32_e32 v3, v1
379
+ ; CHECK-NEXT: v_mov_b32_e32 v3, v0
376
380
; CHECK-NEXT: v_add_f32_e32 v2, -1.0, v3
377
- ; CHECK-NEXT: global_atomic_cmpswap v1, v0 , v[2:3], s[0:1] glc
381
+ ; CHECK-NEXT: global_atomic_cmpswap v0, v1 , v[2:3], s[0:1] glc
378
382
; CHECK-NEXT: s_waitcnt vmcnt(0)
379
- ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1 , v3
383
+ ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v0 , v3
380
384
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
381
385
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
382
386
; CHECK-NEXT: s_cbranch_execnz .LBB15_1
383
387
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
384
388
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
385
- ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1
389
+ ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v0
390
+ ; CHECK-NEXT: v_mov_b32_e32 v0, s2
391
+ ; CHECK-NEXT: v_mov_b32_e32 v1, s3
392
+ ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1]
386
393
; CHECK-NEXT: v_mov_b32_e32 v2, 1.0
387
- ; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 12, s[2:3]
388
394
; CHECK-NEXT: global_store_dword v[0:1], v2, off
389
395
; CHECK-NEXT: s_endpgm
390
396
%f32 = atomicrmw fsub float addrspace (1 )* %p , float 1 .0 monotonic
0 commit comments