Skip to content

Commit 1282705

Browse files
authored
Merge pull request #129 from JuliaGPU/vc/cusync
Remove `unsafe_wait`
2 parents e0359b8 + d174c8a commit 1282705

File tree

4 files changed

+1
-133
lines changed

4 files changed

+1
-133
lines changed

Project.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ version = "0.3.3"
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
88
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
99
Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
10-
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1110
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1211
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
1312
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
@@ -16,7 +15,6 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
1615
Adapt = "0.4, 1.0, 2.0"
1716
CUDA = "~1.0, ~1.1, ~1.2, 1.3"
1817
Cassette = "0.3.2"
19-
LLVM = "1.5, 3.0"
2018
MacroTools = "0.5"
2119
SpecialFunctions = "0.10"
2220
StaticArrays = "0.12"

src/backends/cuda.jl

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -84,53 +84,14 @@ function wait(::CUDADevice, ev::MultiEvent, progress=nothing, stream=CUDA.CuDefa
8484
end
8585
end
8686

87-
include("cusynchronization.jl")
88-
import .CuSynchronization: unsafe_volatile_load, unsafe_volatile_store!
89-
9087
function wait(::CUDADevice, ev::CPUEvent, progress=nothing, stream=nothing)
9188
error("""
9289
Waiting on the GPU for an CPU event to finish is currently not supported.
9390
We have encountered deadlocks arising, due to interactions with the CUDA
94-
driver. If you are certain that you are deadlock free, you can use `unsafe_wait`
95-
instead.
91+
driver.
9692
""")
9793
end
9894

99-
# This implements waiting for a CPUEvent on the GPU.
100-
# Most importantly this implementation needs to be asynchronous w.r.t to the host,
101-
# otherwise one could introduce deadlocks with outside event systems.
102-
# It uses a device visible host buffer to create a barrier/semaphore.
103-
# On a CPU task we wait for the `ev` to finish and then signal the GPU
104-
# by setting the flag 0->1, the CPU then in return needs to wait for the GPU
105-
# to set trhe flag 1->2 so that we can deallocate the memory.
106-
# TODO:
107-
# - In case of an error we should probably also kill the waiting GPU code.
108-
unsafe_wait(dev::Device, ev, progress=nothing) = wait(dev, ev, progress)
109-
function unsafe_wait(::CUDADevice, ev::CPUEvent, progress=nothing, stream=CUDA.CuDefaultStream())
110-
buf = CUDA.Mem.alloc(CUDA.Mem.HostBuffer, sizeof(UInt32), CUDA.Mem.HOSTREGISTER_DEVICEMAP)
111-
unsafe_store!(convert(Ptr{UInt32}, buf), UInt32(0))
112-
# TODO: Switch to `@spawn` when CUDA.jl is thread-safe
113-
@async begin
114-
try
115-
wait(ev.task)
116-
catch err
117-
bt = catch_backtrace()
118-
@error "Error thrown during CUDA wait on CPUEvent" _ex=(err, bt)
119-
finally
120-
@debug "notifying GPU"
121-
unsafe_volatile_store!(convert(Ptr{UInt32}, buf), UInt32(1))
122-
while !(unsafe_volatile_load(convert(Ptr{UInt32}, buf)) == UInt32(2))
123-
yield()
124-
end
125-
@debug "GPU released"
126-
CUDA.Mem.free(buf)
127-
end
128-
end
129-
ptr = convert(CUDA.DevicePtr{UInt32}, convert(CUDA.Mem.CuPtr{UInt32}, buf))
130-
sem = CuSynchronization.Semaphore(ptr, UInt32(1))
131-
CUDA.@cuda threads=1 stream=stream CuSynchronization.wait(sem)
132-
end
133-
13495
###
13596
# async_copy
13697
###

src/backends/cusynchronization.jl

Lines changed: 0 additions & 81 deletions
This file was deleted.

test/events.jl

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,3 @@ end
2424
end
2525
end
2626

27-
if has_cuda_gpu()
28-
barrier = Base.Threads.Event()
29-
cpu_event = Event(wait, barrier)
30-
31-
KernelAbstractions.unsafe_wait(CUDADevice(), cpu_event) # Event edge on CuDefaultStream
32-
gpu_event = Event(CUDADevice()) # Event on CuDefaultStream
33-
34-
notify(barrier)
35-
wait(gpu_event)
36-
end

0 commit comments

Comments
 (0)