Skip to content

Commit d117166

Browse files
committed
small test fixes
1 parent e5eba4f commit d117166

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

lib/CUDAKernels/src/CUDAKernels.jl

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,11 +391,24 @@ end
391391
KernelAbstractions.@print(ex)
392392
end
393393

394-
for afx in [atomic_add!, atomic_and!, atomic_cas!, atomic_dec!,
395-
atomic_inc!, atomic_max!, atomic_min!, atomic_op!,
396-
atomic_or!, atomic_sub!, atomic_xchg!, atomic_xor!]
394+
afxs = Dict(
395+
atomic_add! => CUDA.atomic_add!,
396+
atomic_and! => CUDA.atomic_and!,
397+
atomic_cas! => CUDA.atomic_cas!,
398+
atomic_dec! => CUDA.atomic_dec!,
399+
atomic_inc! => CUDA.atomic_inc!,
400+
atomic_max! => CUDA.atomic_max!,
401+
atomic_min! => CUDA.atomic_min!,
402+
atomic_op! => CUDA.atomic_op!,
403+
atomic_or! => CUDA.atomic_or!,
404+
atomic_sub! => CUDA.atomic_sub!,
405+
atomic_xchg! => CUDA.atomic_xchg!,
406+
atomic_xor! => CUDA.atomic_xor!
407+
)
408+
409+
for (afx, cfx) in afxs
397410
@inline function Cassette.overdub(::CUDACtx, ::typeof(afx), args...)
398-
CUDA.atomic_add!(args...)
411+
cfx(args...)
399412
end
400413
end
401414

src/atomics.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@ export @atomic, atomic_add!, atomic_sub!, atomic_and!, atomic_or!, atomic_xor!,
66
atomic_min!, atomic_max!, atomic_inc!, atomic_dec!, atomic_xchg!,
77
atomic_op!, atomic_cas!
88

9+
# helper functions for inc and dec
10+
11+
function dec(a::T,b::T) where T
12+
((a == 0) | (a > b)) ? b : (old-T(1))
13+
end
14+
15+
function inc(a::T,b::T) where T
16+
(a >= b) ? T(0) : (a+T(1))
17+
end
18+
919
"""
1020
@atomic macro
1121
@@ -58,23 +68,13 @@ function atomic_cas!(ptr::Ptr{T}, old::T, new::T) where T
5868
end
5969

6070
# Implementation of: (((old == 0) | (old > b)) ? b : (old-1)), returns old
61-
# Currently broken
6271
function atomic_dec!(ptr::Ptr{T}, b::T) where T
63-
Core.Intrinsics.atomic_fence(:monotonic)
64-
if (unsafe_load(ptr) == 0 | unsafe_load(ptr) > b)
65-
Core.Intrinsics.atomic_pointerswap(ptr, b, :monotonic)
66-
else
67-
Core.Intrinsics.atomic_pointermodify(ptr, -, T(1), :monotonic)
68-
end
72+
Core.Intrinsics.atomic_pointermodify(ptr::Ptr{T}, dec, b::T, :monotonic)
6973
end
7074

7175
# implementation of: ((old >= b) ? 0 : (old+1)), returns old
7276
function atomic_inc!(ptr::Ptr{T}, b::T) where T
73-
if unsafe_load(ptr) >= b
74-
Core.Intrinsics.atomic_pointerswap(ptr, T(0), :monotonic)
75-
else
76-
Core.Intrinsics.atomic_pointermodify(ptr, +, T(1), :monotonic)
77-
end
77+
Core.Intrinsics.atomic_pointermodify(ptr::Ptr{T}, inc, b::T, :monotonic)
7878
end
7979

8080
function atomic_xchg!(ptr::Ptr{T}, b::T) where T

test/atomic_test.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ function atomics_testsuite(backend)
2222

2323
@testset "atomic addition tests" begin
2424
types = [Int32, Int64, UInt32, UInt64, Float32]
25-
if ArrayT != CuArray
25+
if ArrayT == CuArray
2626
CUDA.capability(CUDA.device()) >= v"6.0" && push!(types, Float64)
2727
CUDA.capability(CUDA.device()) >= v"7.0" && push!(types, Float16)
2828
else
@@ -82,9 +82,8 @@ function atomics_testsuite(backend)
8282
kernel = atomic_inc_kernel(backend(), 4)
8383
wait(kernel(A, T(512), ndrange=(256)))
8484

85-
@test Array(A)[2] == 257
85+
@test Array(A)[2] == 255
8686
end
8787
end
8888

89-
9089
end

0 commit comments

Comments
 (0)