Skip to content

Commit 6bf251b

Browse files
committed
restructuring into an atomics.jl file
1 parent 5191bf9 commit 6bf251b

File tree

4 files changed

+52
-31
lines changed

4 files changed

+52
-31
lines changed

lib/CUDAKernels/src/CUDAKernels.jl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,8 @@ else
319319
const emit_shmem = CUDA._shmem
320320
end
321321

322-
import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size, __atomic
322+
import KernelAbstractions: ConstAdaptor, SharedMemory, Scratchpad, __synchronize, __size
323+
import KernelAbstractions: __atomic, atomic_add!
323324

324325
###
325326
# GPU implementation of shared memory
@@ -386,7 +387,12 @@ end
386387
###
387388

388389
@inline function Cassette.overdub(::CUDACtx, ::typeof(__atomic), ex)
389-
CUDA.@atomic(ex)
390+
#CUDA.@atomic(ex)
391+
KernelAbstractions.@print(ex)
392+
end
393+
394+
@inline function Cassette.overdub(::CUDACtx, ::typeof(atomic_add!), ptr, b)
395+
CUDA.atomic_add!(ptr, b)
390396
end
391397

392398
end

src/KernelAbstractions.jl

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
module KernelAbstractions
22

33
export @kernel
4-
export @Const, @localmem, @private, @uniform, @synchronize, @index, @groupsize, @print, @atomic
4+
export @Const, @localmem, @private, @uniform, @synchronize, @index, @groupsize, @print
5+
export @atomic, atomic_add!
56
export Device, GPU, CPU, Event, MultiEvent, NoneEvent
67
export async_copy!
78

@@ -307,22 +308,6 @@ macro index(locale, args...)
307308
Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
308309
end
309310

310-
"""
311-
@atomic command
312-
313-
This is a unified atomic interface
314-
315-
# Platform differences
316-
- `GPU`: This uses standard `@atomic` calls from CUDA.jl
317-
- `CPU`: This reorganized the command to use atomic pointer logic
318-
"""
319-
320-
macro atomic(ex)
321-
quote
322-
$__atomic($ex)
323-
end
324-
end
325-
326311
###
327312
# Internal kernel functions
328313
###
@@ -468,10 +453,6 @@ function __synchronize()
468453
error("@synchronize used outside kernel or not captured")
469454
end
470455

471-
function __atomic(ex)
472-
error("@atomic used outside kernel or not captured")
473-
end
474-
475456
@generated function __print(items...)
476457
str = ""
477458
args = []
@@ -503,6 +484,10 @@ include("extras/extras.jl")
503484

504485
include("reflection.jl")
505486

487+
# Atomics
488+
489+
include("atomics.jl")
490+
506491
# CPU backend
507492

508493
include("cpu.jl")

src/atomics.jl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
###
2+
# Atomics
3+
###
4+
5+
"""
6+
@atomic macro
7+
8+
This is a unified atomic interface
9+
10+
# Platform differences
11+
- `GPU`: This uses standard `@atomic` calls from CUDA.jl
12+
- `CPU`: This reorganized the command to use atomic pointer logic
13+
"""
14+
15+
# NOTE: This only grabs the first symbol of the expression, not the entire expr
16+
macro atomic(ex)
17+
quote
18+
$__atomic($ex)
19+
end
20+
end
21+
22+
# Implement CPU macro here?
23+
function __atomic(ex)
24+
error("@atomic macro incomplete!")
25+
end
26+
27+
function atomic_add!(ptr, b)
28+
Core.Intrinsics.atomic_pointermodify(ptr, +, b, :monotonic)
29+
end
30+
31+
###
32+
# CPU implementation of atomic macro
33+
###
34+
35+
#@inline function Cassette.overdub(::CPUCtx, ::typeof(__atomic), ex)
36+
# println(ex)
37+
#end
38+

src/cpu.jl

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,3 @@ end
269269

270270
# Argument conversion
271271
KernelAbstractions.argconvert(k::Kernel{CPU}, arg) = arg
272-
273-
###
274-
# CPU implementation of atomic macro
275-
###
276-
277-
@inline function Cassette.overdub(::CUDACtx, ::typeof(__atomic), ex)
278-
println(ex)
279-
end

0 commit comments

Comments
 (0)