@@ -2,22 +2,55 @@ using KernelAbstractions, CUDA, Test
2
2
3
3
# Note: kernels affect second element because some CPU defaults will affect the
4
4
# first element of a pointer if not specified, so I am covering the bases
5
- @kernel function atomic_add_kernel (input, b)
5
+ @kernel function atomic_add_kernel! (input, b)
6
6
atomic_add! (pointer (input,2 ),b)
7
7
end
8
8
9
- @kernel function atomic_sub_kernel (input, b)
9
+ @kernel function atomic_sub_kernel! (input, b)
10
10
atomic_sub! (pointer (input,2 ),b)
11
11
end
12
12
13
- @kernel function atomic_inc_kernel (input, b)
13
+ @kernel function atomic_inc_kernel! (input, b)
14
14
atomic_inc! (pointer (input,2 ),b)
15
15
end
16
16
17
- @kernel function atomic_dec_kernel (input, b)
17
+ @kernel function atomic_dec_kernel! (input, b)
18
18
atomic_dec! (pointer (input,2 ),b)
19
19
end
20
20
21
+ @kernel function atomic_xchg_kernel! (input, b)
22
+ atomic_xchg! (pointer (input,2 ),b)
23
+ end
24
+
25
+ @kernel function atomic_and_kernel! (input, b)
26
+ tid = @index (Global)
27
+ atomic_and! (pointer (input),b[tid])
28
+ end
29
+
30
+ @kernel function atomic_or_kernel! (input, b)
31
+ tid = @index (Global)
32
+ atomic_or! (pointer (input),b[tid])
33
+ end
34
+
35
+ @kernel function atomic_xor_kernel! (input, b)
36
+ tid = @index (Global)
37
+ atomic_xor! (pointer (input),b[tid])
38
+ end
39
+
40
+ @kernel function atomic_max_kernel! (input, b)
41
+ tid = @index (Global)
42
+ atomic_max! (pointer (input,2 ), b[tid])
43
+ end
44
+
45
+ @kernel function atomic_min_kernel! (input, b)
46
+ tid = @index (Global)
47
+ atomic_min! (pointer (input,2 ), b[tid])
48
+ end
49
+
50
+ @kernel function atomic_cas_kernel! (input, b, c)
51
+ atomic_cas! (pointer (input,2 ),b,c)
52
+ end
53
+
21
54
function atomics_testsuite (backend)
22
55
23
56
@testset " atomic addition tests" begin
@@ -33,8 +66,8 @@ function atomics_testsuite(backend)
33
66
for T in types
34
67
A = ArrayT {T} ([0 ,0 ])
35
68
36
- kernel = atomic_add_kernel (backend (), 4 )
37
- wait (kernel (A, one (T), ndrange= (1024 )))
69
+ kernel! = atomic_add_kernel! (backend (), 4 )
70
+ wait (kernel! (A, one (T), ndrange= (1024 )))
38
71
39
72
@test Array (A)[2 ] == 1024
40
73
end
@@ -53,8 +86,8 @@ function atomics_testsuite(backend)
53
86
for T in types
54
87
A = ArrayT {T} ([2048 ,2048 ])
55
88
56
- kernel = atomic_sub_kernel (backend (), 4 )
57
- wait (kernel (A, one (T), ndrange= (1024 )))
89
+ kernel! = atomic_sub_kernel! (backend (), 4 )
90
+ wait (kernel! (A, one (T), ndrange= (1024 )))
58
91
59
92
@test Array (A)[2 ] == 1024
60
93
end
@@ -66,8 +99,8 @@ function atomics_testsuite(backend)
66
99
for T in types
67
100
A = ArrayT {T} ([0 ,0 ])
68
101
69
- kernel = atomic_inc_kernel (backend (), 4 )
70
- wait (kernel (A, T (512 ), ndrange= (768 )))
102
+ kernel! = atomic_inc_kernel! (backend (), 4 )
103
+ wait (kernel! (A, T (512 ), ndrange= (768 )))
71
104
72
105
@test Array (A)[2 ] == 255
73
106
end
@@ -79,11 +112,115 @@ function atomics_testsuite(backend)
79
112
for T in types
80
113
A = ArrayT {T} ([1024 ,1024 ])
81
114
82
- kernel = atomic_dec_kernel (backend (), 4 )
83
- wait (kernel (A, T (512 ), ndrange= (256 )))
115
+ kernel! = atomic_dec_kernel! (backend (), 4 )
116
+ wait (kernel! (A, T (512 ), ndrange= (256 )))
84
117
85
118
@test Array (A)[2 ] == 257
86
119
end
87
120
end
88
121
122
+ @testset " atomic xchg tests" begin
123
+ types = [Int32, Int64, UInt32, UInt64]
124
+
125
+ for T in types
126
+ A = ArrayT {T} ([0 ,0 ])
127
+
128
+ kernel! = atomic_xchg_kernel! (backend (), 4 )
129
+ wait (kernel! (A, T (1 ), ndrange= (256 )))
130
+
131
+ @test Array (A)[2 ] == one (T)
132
+ end
133
+ end
134
+
135
+ @testset " atomic and tests" begin
136
+ types = [Int32, Int64, UInt32, UInt64]
137
+
138
+ for T in types
139
+ A = ArrayT {T} ([1023 ])
140
+ B = ArrayT {T} ([1023 - 2 ^ (i- 1 ) for i = 1 : 10 ])
141
+
142
+ kernel! = atomic_and_kernel! (backend (), 4 )
143
+ wait (kernel! (A, B, ndrange= length (B)))
144
+
145
+ @test Array (A)[1 ] == zero (T)
146
+ end
147
+ end
148
+
149
+ @testset " atomic or tests" begin
150
+ types = [Int32, Int64, UInt32, UInt64]
151
+
152
+ for T in types
153
+ A = ArrayT {T} ([0 ])
154
+ B = ArrayT {T} ([2 ^ (i- 1 ) for i = 1 : 10 ])
155
+
156
+ kernel! = atomic_or_kernel! (backend (), 4 )
157
+ wait (kernel! (A, B, ndrange= length (B)))
158
+
159
+ @test Array (A)[1 ] == T (1023 )
160
+ end
161
+ end
162
+
163
+ @testset " atomic xor tests" begin
164
+ types = [Int32, Int64, UInt32, UInt64]
165
+
166
+ for T in types
167
+ A = ArrayT {T} ([1023 ])
168
+ B = ArrayT {T} ([2 ^ (i- 1 ) for i = 1 : 10 ])
169
+
170
+ kernel! = atomic_xor_kernel! (backend (), 4 )
171
+ wait (kernel! (A, B, ndrange= length (B)))
172
+
173
+ @test Array (A)[1 ] == T (0 )
174
+ end
175
+ end
176
+
177
+ @testset " atomic max tests" begin
178
+ types = [Int32, Int64, UInt32, UInt64]
179
+
180
+ for T in types
181
+ A = ArrayT {T} ([0 ,0 ])
182
+ B = ArrayT {T} ([i for i = 1 : 1024 ])
183
+
184
+ kernel! = atomic_max_kernel! (backend (), 4 )
185
+ wait (kernel! (A, B, ndrange= length (B)))
186
+
187
+ @test Array (A)[2 ] == T (1024 )
188
+ end
189
+ end
190
+
191
+ @testset " atomic min tests" begin
192
+ types = [Int32, Int64, UInt32, UInt64]
193
+
194
+ for T in types
195
+ A = ArrayT {T} ([1024 ,1024 ])
196
+ B = ArrayT {T} ([i for i = 1 : 1024 ])
197
+
198
+ kernel! = atomic_min_kernel! (backend (), 4 )
199
+ wait (kernel! (A, B, ndrange= length (B)))
200
+
201
+ @test Array (A)[2 ] == T (1 )
202
+ end
203
+ end
204
+
205
+
206
+ @testset " atomic cas tests" begin
207
+ types = [Int32, Int64, UInt32, UInt64]
208
+ if ArrayT == CuArray
209
+ CUDA. capability (CUDA. device ()) >= v " 7.0" && push! (types, UInt16)
210
+ else
211
+ push! (types, UInt16)
212
+ end
213
+
214
+ for T in types
215
+ A = ArrayT {T} ([0 ,0 ])
216
+
217
+ kernel! = atomic_cas_kernel! (backend (), 4 )
218
+ wait (kernel! (A, zero (T), one (T), ndrange= (1024 )))
219
+
220
+ @test Array (A)[2 ] == 1
221
+ end
222
+ end
223
+
224
+
225
+
89
226
end
0 commit comments