Skip to content

Commit 7e07ecc

Browse files
authored
Merge pull request #2304 from JuliaGPU/tb/maybe_collect
Consider running GC when allocating and synchronizing
2 parents e674004 + 577beb9 commit 7e07ecc

File tree

11 files changed

+158
-59
lines changed

11 files changed

+158
-59
lines changed

docs/src/lib/driver.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ methods then work with these raw pointers:
138138

139139
```@docs
140140
CUDA.memory_status
141-
CUDA.available_memory
141+
CUDA.free_memory
142142
CUDA.total_memory
143143
```
144144

lib/cudadrv/memory.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -767,11 +767,11 @@ end
767767
end # module Mem
768768

769769
"""
770-
available_memory()
770+
free_memory()
771771
772-
Returns the available amount of memory (in bytes), available for allocation by the CUDA context.
772+
Returns the free amount of memory (in bytes), available for allocation by the CUDA context.
773773
"""
774-
available_memory() = Mem.info()[1]
774+
free_memory() = Mem.info()[1]
775775

776776
"""
777777
total_memory()

lib/cudadrv/synchronization.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,11 @@ function device_synchronize(; blocking::Bool=false, spin::Bool=true)
182182
if spin && spinning_synchronization(isdone, legacy_stream())
183183
cuCtxSynchronize()
184184
else
185+
maybe_collect(true)
185186
nonblocking_synchronize(context())
186187
end
187188
else
189+
maybe_collect(true)
188190
cuCtxSynchronize()
189191
end
190192

@@ -196,9 +198,11 @@ function synchronize(stream::CuStream=stream(); blocking::Bool=false, spin::Bool
196198
if spin && spinning_synchronization(isdone, stream)
197199
cuStreamSynchronize(stream)
198200
else
201+
maybe_collect(true)
199202
nonblocking_synchronize(stream)
200203
end
201204
else
205+
maybe_collect(true)
202206
cuStreamSynchronize(stream)
203207
end
204208

@@ -210,9 +214,11 @@ function synchronize(event::CuEvent; blocking::Bool=false, spin::Bool=true)
210214
if spin && spinning_synchronization(isdone, event)
211215
cuEventSynchronize(event)
212216
else
217+
maybe_collect(true)
213218
nonblocking_synchronize(event)
214219
end
215220
else
221+
maybe_collect(true)
216222
cuEventSynchronize(event)
217223
end
218224
end
@@ -269,6 +275,7 @@ function device_synchronize(; blocking::Bool=false, spin::Bool=true)
269275
nonblocking_synchronize(stream)
270276
end
271277
end
278+
maybe_collect(true)
272279
cuCtxSynchronize()
273280

274281
check_exceptions()
@@ -280,6 +287,7 @@ function synchronize(stream::CuStream=stream(); blocking::Bool=false, spin::Bool
280287
nonblocking_synchronize(stream)
281288
end
282289
end
290+
maybe_collect(true)
283291
cuStreamSynchronize(stream)
284292

285293
check_exceptions()
@@ -289,6 +297,7 @@ function synchronize(event::CuEvent; blocking::Bool=false, spin::Bool=true)
289297
if use_nonblocking_synchronization && !blocking
290298
spin && spinning_synchronization(isdone, event)
291299
end
300+
maybe_collect(true)
292301
cuEventSynchronize(event)
293302
end
294303

lib/cudnn/src/convolution.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,6 @@ function cudnnFindConvolutionAlgorithmWorkspaceSize(x)
331331
# Because algorithm discovery runs infrequently yet allocates more than conv functions,
332332
# This is a good place to synchronize and trim the memory pool to reduce fragmentation.
333333
CUDA.reclaim()
334-
gpufree = CUDA.available_memory() + coalesce(CUDA.cached_memory(), 0)
334+
gpufree = CUDA.free_memory() + coalesce(CUDA.cached_memory(), 0)
335335
min(gpufree ÷ 10, sizeof(x) * 100)
336336
end

src/CUDA.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ export CUDABackend
125125
# StaticArrays is still a direct dependency, so directly include the extension
126126
include("../ext/StaticArraysExt.jl")
127127

128+
include("deprecated.jl")
128129
include("precompile.jl")
129130

130131
end

src/deprecated.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
@deprecate available_memory() free_memory()

src/initialization.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ function __init__()
7272
return
7373
end
7474

75-
if driver < v"11.2"
75+
if driver < v"11.3"
7676
@warn """The NVIDIA driver on this system only supports up to CUDA $driver.
77-
For performance reasons, it is recommended to upgrade to a driver that supports CUDA 11.2 or higher."""
77+
For performance reasons, it is recommended to upgrade to a driver that supports CUDA 11.3 or higher."""
7878
end
7979

8080
# check that we have a runtime

0 commit comments

Comments
 (0)