47
47
struct CudaEvent <: Event
48
48
event:: CuEvent
49
49
end
50
- function wait (ev:: CudaEvent , progress= nothing )
50
+
51
+ function Event (:: CUDA )
52
+ stream = CUDAdrv. CuDefaultStream ()
53
+ event = CuEvent (CUDAdrv. EVENT_DISABLE_TIMING)
54
+ CUDAdrv. record (event, stream)
55
+ CudaEvent (event)
56
+ end
57
+
58
+ wait (ev:: CudaEvent , progress= nothing ) = wait (CPU (), ev, progress)
59
+ function wait (:: CPU , ev:: CudaEvent , progress= nothing )
51
60
if progress === nothing
52
61
CUDAdrv. synchronize (ev. event)
53
62
else
@@ -58,6 +67,19 @@ function wait(ev::CudaEvent, progress=nothing)
58
67
end
59
68
end
60
69
70
+ # Use this to synchronize between computation using the CuDefaultStream
71
+ wait (:: CUDA , ev:: CudaEvent , progress= nothing ) = __enqueue_wait (ev, CUDAdrv. CuDefaultStream ())
72
+
73
+ # There is no efficient wait for CPU->GPU synchronization, so instead we
74
+ # do a CPU wait, and therefore block anyone from submitting more work.
75
+ # We maybe could do a spinning wait on the GPU and atomic flag to signal from the CPU,
76
+ # but which stream would we target?
77
+ wait (:: CUDA , ev:: CPUEvent , progress= nothing ) = wait (CPU (), ev, progress)
78
+
79
+ function __enqueue_wait (ev:: CudaEvent , stream:: CuStream )
80
+ CUDAdrv. wait (ev. event, stream)
81
+ end
82
+
61
83
function (obj:: Kernel{CUDA} )(args... ; ndrange= nothing , dependencies= nothing , workgroupsize= nothing )
62
84
if ndrange isa Integer
63
85
ndrange = (ndrange,)
@@ -73,12 +95,12 @@ function (obj::Kernel{CUDA})(args...; ndrange=nothing, dependencies=nothing, wor
73
95
if dependencies != = nothing
74
96
for event in dependencies
75
97
if event isa CudaEvent
76
- CUDAdrv . wait (event . event, stream)
98
+ __enqueue_wait ( event, stream)
77
99
end
78
100
end
79
101
for event in dependencies
80
102
if ! (event isa CudaEvent)
81
- wait (event, ()-> yield ())
103
+ wait (CUDA (), event, ()-> yield ())
82
104
end
83
105
end
84
106
end
0 commit comments