@@ -84,53 +84,14 @@ function wait(::CUDADevice, ev::MultiEvent, progress=nothing, stream=CUDA.CuDefa
84
84
end
85
85
end
86
86
87
- include (" cusynchronization.jl" )
88
- import . CuSynchronization: unsafe_volatile_load, unsafe_volatile_store!
89
-
90
87
function wait (:: CUDADevice , ev:: CPUEvent , progress= nothing , stream= nothing )
91
88
error ("""
92
89
Waiting on the GPU for an CPU event to finish is currently not supported.
93
90
We have encountered deadlocks arising, due to interactions with the CUDA
94
- driver. If you are certain that you are deadlock free, you can use `unsafe_wait`
95
- instead.
91
+ driver.
96
92
""" )
97
93
end
98
94
99
- # This implements waiting for a CPUEvent on the GPU.
100
- # Most importantly this implementation needs to be asynchronous w.r.t to the host,
101
- # otherwise one could introduce deadlocks with outside event systems.
102
- # It uses a device visible host buffer to create a barrier/semaphore.
103
- # On a CPU task we wait for the `ev` to finish and then signal the GPU
104
- # by setting the flag 0->1, the CPU then in return needs to wait for the GPU
105
- # to set trhe flag 1->2 so that we can deallocate the memory.
106
- # TODO :
107
- # - In case of an error we should probably also kill the waiting GPU code.
108
- unsafe_wait (dev:: Device , ev, progress= nothing ) = wait (dev, ev, progress)
109
- function unsafe_wait (:: CUDADevice , ev:: CPUEvent , progress= nothing , stream= CUDA. CuDefaultStream ())
110
- buf = CUDA. Mem. alloc (CUDA. Mem. HostBuffer, sizeof (UInt32), CUDA. Mem. HOSTREGISTER_DEVICEMAP)
111
- unsafe_store! (convert (Ptr{UInt32}, buf), UInt32 (0 ))
112
- # TODO : Switch to `@spawn` when CUDA.jl is thread-safe
113
- @async begin
114
- try
115
- wait (ev. task)
116
- catch err
117
- bt = catch_backtrace ()
118
- @error " Error thrown during CUDA wait on CPUEvent" _ex= (err, bt)
119
- finally
120
- @debug " notifying GPU"
121
- unsafe_volatile_store! (convert (Ptr{UInt32}, buf), UInt32 (1 ))
122
- while ! (unsafe_volatile_load (convert (Ptr{UInt32}, buf)) == UInt32 (2 ))
123
- yield ()
124
- end
125
- @debug " GPU released"
126
- CUDA. Mem. free (buf)
127
- end
128
- end
129
- ptr = convert (CUDA. DevicePtr{UInt32}, convert (CUDA. Mem. CuPtr{UInt32}, buf))
130
- sem = CuSynchronization. Semaphore (ptr, UInt32 (1 ))
131
- CUDA. @cuda threads= 1 stream= stream CuSynchronization. wait (sem)
132
- end
133
-
134
95
# ##
135
96
# async_copy
136
97
# ##
0 commit comments