Skip to content

Commit 01b5a14

Browse files
christophernhillChristopher
andauthored
Make CUDA stream from cache match CUDA context (#294)
Co-authored-by: Christopher <christophernhill@gmail.com>
1 parent cd8aa37 commit 01b5a14

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

lib/CUDAKernels/src/CUDAKernels.jl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ end
3131
# - Add a background task that occasionally scans all streams
3232
# - Add a hysterisis by checking a "since last scanned" timestamp
3333
const STREAM_GC_LOCK = Threads.ReentrantLock()
34+
#=
3435
function next_stream()
3536
lock(STREAM_GC_LOCK) do
3637
if !isempty(FREE_STREAMS)
@@ -53,6 +54,43 @@ function next_stream()
5354
return stream
5455
end
5556
end
57+
=#
58+
const FREE_STREAMS_D = Dict{CUDA.CuContext,Array{CUDA.CuStream,1}}()
59+
const STREAMS_D = Dict{CUDA.CuContext,Array{CUDA.CuStream,1}}()
60+
function next_stream()
61+
ctx = CUDA.current_context()
62+
lock(STREAM_GC_LOCK) do
63+
# see if there is a compatible free stream
64+
FREE_STREAMS_CT = get!(FREE_STREAMS_D, ctx) do
65+
CUDA.CuStream[]
66+
end
67+
if !isempty(FREE_STREAMS_CT)
68+
return pop!(FREE_STREAMS_CT)
69+
end
70+
71+
# GC to recover streams that are not busy
72+
STREAMS_CT = get!(STREAMS_D, ctx) do
73+
CUDA.CuStream[]
74+
end
75+
if length(STREAMS_CT) > STREAM_GC_THRESHOLD[]
76+
for stream in STREAMS_CT
77+
if CUDA.query(stream)
78+
push!(FREE_STREAMS_CT, stream)
79+
end
80+
end
81+
end
82+
83+
# if there is a compatible free stream after GC, return that stream
84+
if !isempty(FREE_STREAMS_CT)
85+
return pop!(FREE_STREAMS_CT)
86+
end
87+
88+
# no compatible free stream available so create a new one
89+
stream = CUDA.CuStream(flags = CUDA.STREAM_NON_BLOCKING)
90+
push!(STREAMS_CT, stream)
91+
return stream
92+
end
93+
end
5694

5795
import KernelAbstractions: Event, CPUEvent, NoneEvent, MultiEvent, CPU, GPU, isdone, failed
5896

0 commit comments

Comments
 (0)