Skip to content

Commit a50b5b8

Browse files
authored
Avoid OOMs during OOM handling. (#2299)
1 parent 6829998 commit a50b5b8

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

src/pool.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,12 +334,25 @@ struct OutOfGPUMemoryError <: Exception
334334
# if this error was triggered before the TLS was initialized, we should not try to
335335
# fetch memory info as those API calls will just trigger TLS initialization again.
336336
nothing
337+
elseif in_oom_ctor[]
338+
# if we triggered an OOM while trying to construct an OOM object, break the cycle
339+
nothing
337340
else
338-
MemoryInfo()
341+
in_oom_ctor[] = true
342+
try
343+
MemoryInfo()
344+
catch err
345+
# when extremely close to OOM, just inspecting `Mem.info()` may trigger an OOM again
346+
isa(err, OutOfGPUMemoryError) || rethrow()
347+
nothing
348+
finally
349+
in_oom_ctor[] = false
350+
end
339351
end
340352
new(sz, info)
341353
end
342354
end
355+
const in_oom_ctor = Ref{Bool}(false)
343356

344357
function Base.showerror(io::IO, err::OutOfGPUMemoryError)
345358
print(io, "Out of GPU memory")

0 commit comments

Comments
 (0)