Skip to content

Commit 868d33e

Browse files
authored
Make versioninfo() resilient against NVML EPERM. (#1771)
1 parent 3b20f54 commit 868d33e

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

src/utilities.jl

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,21 +84,41 @@ function versioninfo(io::IO=stdout)
8484
println(io, length(devs), " devices:")
8585
end
8686
for (i, dev) in enumerate(devs)
87-
if has_nvml()
87+
function query_nvml()
8888
mig = uuid(dev) != parent_uuid(dev)
8989
nvml_gpu = NVML.Device(parent_uuid(dev))
9090
nvml_dev = NVML.Device(uuid(dev); mig)
9191

9292
str = NVML.name(nvml_dev)
9393
cap = NVML.compute_capability(nvml_gpu)
9494
mem = NVML.memory_info(nvml_dev)
95-
else
95+
96+
(; str, cap, mem)
97+
end
98+
99+
function query_cuda()
96100
str = name(dev)
97101
cap = capability(dev)
98102
mem = device!(dev) do
99103
# this requires a device context, so we prefer NVML
100104
(free=available_memory(), total=total_memory())
101105
end
106+
(; str, cap, mem)
107+
end
108+
109+
str, cap, mem = if has_nvml()
110+
try
111+
query_nvml()
112+
catch err
113+
@show err
114+
if !isa(err, NVML.NVMLError) ||
115+
!in(err.code, [NVML.ERROR_NOT_SUPPORTED, NVML.ERROR_NO_PERMISSION])
116+
rethrow()
117+
end
118+
query_cuda()
119+
end
120+
else
121+
query_cuda()
102122
end
103123
println(io, " $(i-1): $str (sm_$(cap.major)$(cap.minor), $(Base.format_bytes(mem.free)) / $(Base.format_bytes(mem.total)) available)")
104124
end

0 commit comments

Comments
 (0)