Merge pull request #129 from JuliaGPU/vc/cusync

vchuravy · web-flow · commit 1282705d2a50 · 2020-09-09T19:52:02.000+02:00
Remove `unsafe_wait`
diff --git a/Project.toml b/Project.toml
@@ -7,7 +7,6 @@ version = "0.3.3"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Cassette = "7057c7e9-c182-5462-911a-8362d720325c"
-LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
@@ -16,7 +15,6 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Adapt = "0.4, 1.0, 2.0"
 CUDA = "~1.0, ~1.1, ~1.2, 1.3"
 Cassette = "0.3.2"
-LLVM = "1.5, 3.0"
 MacroTools = "0.5"
 SpecialFunctions = "0.10"
 StaticArrays = "0.12"
diff --git a/src/backends/cuda.jl b/src/backends/cuda.jl
@@ -84,53 +84,14 @@ function wait(::CUDADevice, ev::MultiEvent, progress=nothing, stream=CUDA.CuDefa
     end
 end
 
-include("cusynchronization.jl")
-import .CuSynchronization: unsafe_volatile_load, unsafe_volatile_store!
-
 function wait(::CUDADevice, ev::CPUEvent, progress=nothing, stream=nothing)
     error("""
     Waiting on the GPU for an CPU event to finish is currently not supported.
     We have encountered deadlocks arising, due to interactions with the CUDA
-    driver. If you are certain that you are deadlock free, you can use `unsafe_wait`
-    instead.
+    driver.
     """)
 end
 
-# This implements waiting for a CPUEvent on the GPU.
-# Most importantly this implementation needs to be asynchronous w.r.t to the host,
-# otherwise one could introduce deadlocks with outside event systems.
-# It uses a device visible host buffer to create a barrier/semaphore.
-# On a CPU task we wait for the `ev` to finish and then signal the GPU
-# by setting the flag 0->1, the CPU then in return needs to wait for the GPU
-# to set trhe flag 1->2 so that we can deallocate the memory.
-# TODO:
-# - In case of an error we should probably also kill the waiting GPU code.
-unsafe_wait(dev::Device, ev, progress=nothing) = wait(dev, ev, progress) 
-function unsafe_wait(::CUDADevice, ev::CPUEvent, progress=nothing, stream=CUDA.CuDefaultStream())
-    buf = CUDA.Mem.alloc(CUDA.Mem.HostBuffer, sizeof(UInt32), CUDA.Mem.HOSTREGISTER_DEVICEMAP)
-    unsafe_store!(convert(Ptr{UInt32}, buf), UInt32(0))
-    # TODO: Switch to `@spawn` when CUDA.jl is thread-safe
-    @async begin
-        try
-            wait(ev.task)
-        catch err
-            bt = catch_backtrace()
-            @error "Error thrown during CUDA wait on CPUEvent" _ex=(err, bt)
-        finally
-            @debug "notifying GPU"
-            unsafe_volatile_store!(convert(Ptr{UInt32}, buf), UInt32(1))
-            while !(unsafe_volatile_load(convert(Ptr{UInt32}, buf)) == UInt32(2))
-                yield()
-            end
-            @debug "GPU released"
-            CUDA.Mem.free(buf)
-        end
-    end
-    ptr = convert(CUDA.DevicePtr{UInt32}, convert(CUDA.Mem.CuPtr{UInt32}, buf))
-    sem = CuSynchronization.Semaphore(ptr, UInt32(1))
-    CUDA.@cuda threads=1 stream=stream CuSynchronization.wait(sem)
-end
-
 ###
 # async_copy
 ###
diff --git a/src/backends/cusynchronization.jl b/src/backends/cusynchronization.jl
diff --git a/test/events.jl b/test/events.jl
@@ -24,13 +24,3 @@ end
     end
 end
 
-if has_cuda_gpu()
-    barrier = Base.Threads.Event()
-    cpu_event = Event(wait, barrier)
-
-    KernelAbstractions.unsafe_wait(CUDADevice(), cpu_event) # Event edge on CuDefaultStream
-    gpu_event = Event(CUDADevice()) # Event on CuDefaultStream
-
-    notify(barrier)
-    wait(gpu_event)
-end