Skip to content

Commit 16cdbfe

Browse files
committed
adding overdubs for CUDA atomics
1 parent 762d2b4 commit 16cdbfe

File tree

2 files changed

+64
-3
lines changed

2 files changed

+64
-3
lines changed

lib/CUDAKernels/src/CUDAKernels.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ else
320320
end
321321

322322
import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size
323-
import KernelAbstractions: __atomic, atomic_add!
323+
import KernelAbstractions: __atomic, atomic_add!, atomic_and!, atomic_cas!, atomic_dec!, atomic_inc!, atomic_max!, atomic_min!, atomic_op!, atomic_or!, atomic_sub!, atomic_xchg!, atomic_xor!
324324

325325
###
326326
# GPU implementation of shared memory
@@ -391,8 +391,12 @@ end
391391
KernelAbstractions.@print(ex)
392392
end
393393

394-
@inline function Cassette.overdub(::CUDACtx, ::typeof(atomic_add!), ptr, b)
395-
CUDA.atomic_add!(ptr, b)
394+
for afx in [atomic_add!, atomic_and!, atomic_cas!, atomic_dec!,
395+
atomic_inc!, atomic_max!, atomic_min!, atomic_op!,
396+
atomic_or!, atomic_sub!, atomic_xchg!, atomic_xor!]
397+
@inline function Cassette.overdub(::CUDACtx, ::typeof(afx), args...)
398+
CUDA.atomic_add!(args...)
399+
end
396400
end
397401

398402
end

src/atomics.jl

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ This is a unified atomic interface
1212
- `CPU`: This reorganized the command to use atomic pointer logic
1313
"""
1414

15+
# TODO: Remove?
16+
#const atomic_acquire = LLVM.API.LLVMAtomicOrderingAcquire
17+
#const atomic_release = LLVM.API.LLVMAtomicOrderingRelease
18+
#const atomic_acquire_release = LLVM.API.LLVMAtomicOrderingAcquireRelease
19+
1520
# NOTE: This only grabs the first symbol of the expression, not the entire expr
1621
macro atomic(ex)
1722
quote
@@ -28,6 +33,58 @@ function atomic_add!(ptr, b)
2833
Core.Intrinsics.atomic_pointermodify(ptr, +, b, :monotonic)
2934
end
3035

36+
function atomic_and!(ptr, b)
37+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
38+
end
39+
40+
# Not sure what this one is...
41+
function atomic_cas!(ptr, b)
42+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
43+
end
44+
45+
# TODO
46+
function atomic_dec!(ptr, b)
47+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
48+
end
49+
50+
# TODO
51+
function atomic_inc!(ptr, b)
52+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
53+
end
54+
55+
# TODO
56+
function atomic_max!(ptr, b)
57+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
58+
end
59+
60+
# TODO
61+
function atomic_min!(ptr, b)
62+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
63+
end
64+
65+
# TODO
66+
function atomic_op!(ptr, b)
67+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
68+
end
69+
70+
function atomic_or!(ptr, b)
71+
Core.Intrinsics.atomic_pointermodify(ptr, |, b, :monotonic)
72+
end
73+
74+
function atomic_sub!(ptr, b)
75+
Core.Intrinsics.atomic_pointermodify(ptr, -, b, :monotonic)
76+
end
77+
78+
# TODO
79+
function atomic_xchg!(ptr, b)
80+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
81+
end
82+
83+
# TODO
84+
function atomic_xor!(ptr, b)
85+
Core.Intrinsics.atomic_pointermodify(ptr, &, b, :monotonic)
86+
end
87+
3188
###
3289
# CPU implementation of atomic macro
3390
###

0 commit comments

Comments
 (0)