Skip to content

Commit d983201

Browse files
bors[bot]lcw
andauthored
Merge #41
41: Allow CPU and CUDA kernels to wait on each other r=vchuravy a=lcw This allows the event system to be used to coordinate work done on the host with work done on the device. Co-authored-by: Lucas C Wilcox <lucas@swirlee.com>
2 parents a72a0d3 + 68dc6e6 commit d983201

File tree

3 files changed

+43
-3
lines changed

3 files changed

+43
-3
lines changed

src/backends/cpu.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,18 @@ end
4141
function __run(obj, ndrange, iterspace, args, dependencies)
4242
return Threads.@spawn begin
4343
if dependencies !== nothing
44-
Base.sync_end(map(e->e.task, dependencies))
44+
cpu_tasks = Core.Task[]
45+
for event in dependencies
46+
if event isa CPUEvent
47+
push!(cpu_tasks, event.task)
48+
end
49+
end
50+
!isempty(cpu_tasks) && Base.sync_end(cpu_tasks)
51+
for event in dependencies
52+
if !(event isa CPUEvent)
53+
wait(event, ()->yield())
54+
end
55+
end
4556
end
4657
@sync begin
4758
# TODO: how do we use the information that the iteration space maps perfectly to

src/backends/cuda.jl

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,14 @@ function (obj::Kernel{CUDA})(args...; ndrange=nothing, dependencies=nothing, wor
7272
stream = next_stream()
7373
if dependencies !== nothing
7474
for event in dependencies
75-
@assert event isa CudaEvent
76-
CUDAdrv.wait(event.event, stream)
75+
if event isa CudaEvent
76+
CUDAdrv.wait(event.event, stream)
77+
end
78+
end
79+
for event in dependencies
80+
if !(event isa CudaEvent)
81+
wait(event, ()->yield())
82+
end
7783
end
7884
end
7985

test/test.jl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,26 @@ if has_cuda_gpu()
161161
wait(kernel_val!(CUDA())(A,Val(3), ndrange=size(A)))
162162
@test all((a)->a==3, A)
163163
end
164+
165+
@kernel function kernel_empty()
166+
return
167+
end
168+
if has_cuda_gpu()
169+
@testset "CPU--CUDA dependencies" begin
170+
event1 = kernel_empty(CPU(), 1)(ndrange=1)
171+
event2 = kernel_empty(CUDA(), 1)(ndrange=1)
172+
event3 = kernel_empty(CPU(), 1)(ndrange=1)
173+
event4 = kernel_empty(CUDA(), 1)(ndrange=1)
174+
event5 = kernel_empty(CUDA(), 1)(ndrange=1, dependencies=(event1, event2, event3, event4))
175+
wait(event5)
176+
@test event5 isa KernelAbstractions.Event
177+
178+
event1 = kernel_empty(CPU(), 1)(ndrange=1)
179+
event2 = kernel_empty(CUDA(), 1)(ndrange=1)
180+
event3 = kernel_empty(CPU(), 1)(ndrange=1)
181+
event4 = kernel_empty(CUDA(), 1)(ndrange=1)
182+
event5 = kernel_empty(CPU(), 1)(ndrange=1, dependencies=(event1, event2, event3, event4))
183+
wait(event5)
184+
@test event5 isa KernelAbstractions.Event
185+
end
186+
end

0 commit comments

Comments
 (0)