Skip to content

Commit 68dc6e6

Browse files
committed
Allow CPU and CUDA kernels to wait on each other
This allows the event system to be used to coordinate work done on the host with work done on the device.
1 parent ba2bdbf commit 68dc6e6

File tree

3 files changed

+43
-3
lines changed

3 files changed

+43
-3
lines changed

src/backends/cpu.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,18 @@ end
4141
function __run(obj, ndrange, iterspace, args, dependencies)
4242
return Threads.@spawn begin
4343
if dependencies !== nothing
44-
Base.sync_end(map(e->e.task, dependencies))
44+
cpu_tasks = Core.Task[]
45+
for event in dependencies
46+
if event isa CPUEvent
47+
push!(cpu_tasks, event.task)
48+
end
49+
end
50+
!isempty(cpu_tasks) && Base.sync_end(cpu_tasks)
51+
for event in dependencies
52+
if !(event isa CPUEvent)
53+
wait(event, ()->yield())
54+
end
55+
end
4556
end
4657
@sync begin
4758
# TODO: how do we use the information that the iteration space maps perfectly to

src/backends/cuda.jl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,14 @@ function (obj::Kernel{CUDA})(args...; ndrange=nothing, dependencies=nothing, wor
7272
stream = next_stream()
7373
if dependencies !== nothing
7474
for event in dependencies
75-
@assert event isa CudaEvent
76-
CUDAdrv.wait(event.event, stream)
75+
if event isa CudaEvent
76+
CUDAdrv.wait(event.event, stream)
77+
end
78+
end
79+
for event in dependencies
80+
if !(event isa CudaEvent)
81+
wait(event, ()->yield())
82+
end
7783
end
7884
end
7985

test/test.jl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,26 @@ if has_cuda_gpu()
161161
wait(kernel_val!(CUDA())(A,Val(3), ndrange=size(A)))
162162
@test all((a)->a==3, A)
163163
end
164+
165+
@kernel function kernel_empty()
166+
return
167+
end
168+
if has_cuda_gpu()
169+
@testset "CPU--CUDA dependencies" begin
170+
event1 = kernel_empty(CPU(), 1)(ndrange=1)
171+
event2 = kernel_empty(CUDA(), 1)(ndrange=1)
172+
event3 = kernel_empty(CPU(), 1)(ndrange=1)
173+
event4 = kernel_empty(CUDA(), 1)(ndrange=1)
174+
event5 = kernel_empty(CUDA(), 1)(ndrange=1, dependencies=(event1, event2, event3, event4))
175+
wait(event5)
176+
@test event5 isa KernelAbstractions.Event
177+
178+
event1 = kernel_empty(CPU(), 1)(ndrange=1)
179+
event2 = kernel_empty(CUDA(), 1)(ndrange=1)
180+
event3 = kernel_empty(CPU(), 1)(ndrange=1)
181+
event4 = kernel_empty(CUDA(), 1)(ndrange=1)
182+
event5 = kernel_empty(CPU(), 1)(ndrange=1, dependencies=(event1, event2, event3, event4))
183+
wait(event5)
184+
@test event5 isa KernelAbstractions.Event
185+
end
186+
end

0 commit comments

Comments
 (0)