Skip to content

Commit 59e8bd3

Browse files
author
Tim Renouf
committed
[AMDGPU] Flag new raw/struct atomic ops as source of divergence
Differential Revision: https://reviews.llvm.org/D60731 Change-Id: I821d93dec8b9cdd247b8172d92fb5e15340a9e7d llvm-svn: 358579
1 parent 272f15a commit 59e8bd3

File tree

2 files changed

+222
-0
lines changed

2 files changed

+222
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,28 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_and>;
7171
def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>;
7272
def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
7373
def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
74+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
75+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
76+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
77+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smin>;
78+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umin>;
79+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smax>;
80+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
81+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
82+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
83+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
84+
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
85+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
86+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
87+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
88+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smin>;
89+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umin>;
90+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smax>;
91+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
92+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
93+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
94+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
95+
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
7496
def : SourceOfDivergence<int_amdgcn_ps_live>;
7597
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
7698
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;

llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,182 @@ main_body:
8888
ret float %r
8989
}
9090

91+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(
92+
define float @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
93+
main_body:
94+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
95+
%r = bitcast i32 %orig to float
96+
ret float %r
97+
}
98+
99+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(
100+
define float @raw_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
101+
main_body:
102+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
103+
%r = bitcast i32 %orig to float
104+
ret float %r
105+
}
106+
107+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(
108+
define float @raw_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
109+
main_body:
110+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
111+
%r = bitcast i32 %orig to float
112+
ret float %r
113+
}
114+
115+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(
116+
define float @raw_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
117+
main_body:
118+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
119+
%r = bitcast i32 %orig to float
120+
ret float %r
121+
}
122+
123+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(
124+
define float @raw_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
125+
main_body:
126+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
127+
%r = bitcast i32 %orig to float
128+
ret float %r
129+
}
130+
131+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(
132+
define float @raw_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
133+
main_body:
134+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
135+
%r = bitcast i32 %orig to float
136+
ret float %r
137+
}
138+
139+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(
140+
define float @raw_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
141+
main_body:
142+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
143+
%r = bitcast i32 %orig to float
144+
ret float %r
145+
}
146+
147+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(
148+
define float @raw_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
149+
main_body:
150+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
151+
%r = bitcast i32 %orig to float
152+
ret float %r
153+
}
154+
155+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(
156+
define float @raw_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
157+
main_body:
158+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
159+
%r = bitcast i32 %orig to float
160+
ret float %r
161+
}
162+
163+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(
164+
define float @raw_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
165+
main_body:
166+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
167+
%r = bitcast i32 %orig to float
168+
ret float %r
169+
}
170+
171+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(
172+
define float @raw_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
173+
main_body:
174+
%orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
175+
%r = bitcast i32 %orig to float
176+
ret float %r
177+
}
178+
179+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(
180+
define float @struct_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
181+
main_body:
182+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
183+
%r = bitcast i32 %orig to float
184+
ret float %r
185+
}
186+
187+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(
188+
define float @struct_buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
189+
main_body:
190+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
191+
%r = bitcast i32 %orig to float
192+
ret float %r
193+
}
194+
195+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(
196+
define float @struct_buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
197+
main_body:
198+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
199+
%r = bitcast i32 %orig to float
200+
ret float %r
201+
}
202+
203+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(
204+
define float @struct_buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
205+
main_body:
206+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
207+
%r = bitcast i32 %orig to float
208+
ret float %r
209+
}
210+
211+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(
212+
define float @struct_buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
213+
main_body:
214+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
215+
%r = bitcast i32 %orig to float
216+
ret float %r
217+
}
218+
219+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(
220+
define float @struct_buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
221+
main_body:
222+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
223+
%r = bitcast i32 %orig to float
224+
ret float %r
225+
}
226+
227+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(
228+
define float @struct_buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
229+
main_body:
230+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
231+
%r = bitcast i32 %orig to float
232+
ret float %r
233+
}
234+
235+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(
236+
define float @struct_buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
237+
main_body:
238+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
239+
%r = bitcast i32 %orig to float
240+
ret float %r
241+
}
242+
243+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(
244+
define float @struct_buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
245+
main_body:
246+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
247+
%r = bitcast i32 %orig to float
248+
ret float %r
249+
}
250+
251+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(
252+
define float @struct_buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
253+
main_body:
254+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
255+
%r = bitcast i32 %orig to float
256+
ret float %r
257+
}
258+
259+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(
260+
define float @struct_buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 {
261+
main_body:
262+
%orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
263+
%r = bitcast i32 %orig to float
264+
ret float %r
265+
}
266+
91267
declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
92268
declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
93269
declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
@@ -100,4 +276,28 @@ declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
100276
declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
101277
declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
102278

279+
declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0
280+
declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0
281+
declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0
282+
declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0
283+
declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) #0
284+
declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) #0
285+
declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) #0
286+
declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0
287+
declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0
288+
declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0
289+
declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0
290+
291+
declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
292+
declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
293+
declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
294+
declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
295+
declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
296+
declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
297+
declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
298+
declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
299+
declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
300+
declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
301+
declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
302+
103303
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)