Skip to content

Commit 4467dc6

Browse files
authored
Use atomics for allocation statistics. (#1884)
1 parent c9d87c4 commit 4467dc6

File tree

1 file changed

+39
-24
lines changed

1 file changed

+39
-24
lines changed

src/pool.jl

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,42 @@ using Logging
77
## allocation statistics
88

99
mutable struct AllocStats
10-
alloc_count::Int
11-
alloc_bytes::Int
10+
alloc_count::Threads.Atomic{Int}
11+
alloc_bytes::Threads.Atomic{Int}
1212

13-
free_count::Int
14-
free_bytes::Int
13+
free_count::Threads.Atomic{Int}
14+
free_bytes::Threads.Atomic{Int}
1515

16-
total_time::Float64
17-
end
16+
total_time::Threads.Atomic{Float64}
1817

19-
const alloc_stats = AllocStats(0, 0, 0, 0, 0.0)
18+
function AllocStats()
19+
new(Threads.Atomic{Int}(0), Threads.Atomic{Int}(0),
20+
Threads.Atomic{Int}(0), Threads.Atomic{Int}(0),
21+
Threads.Atomic{Float64}(0.0))
22+
end
23+
24+
function AllocStats(alloc_count::Integer, alloc_bytes::Integer,
25+
free_count::Integer, free_bytes::Integer,
26+
total_time::Float64)
27+
new(Threads.Atomic{Int}(alloc_count), Threads.Atomic{Int}(alloc_bytes),
28+
Threads.Atomic{Int}(free_count), Threads.Atomic{Int}(free_bytes),
29+
Threads.Atomic{Float64}(total_time))
30+
end
31+
end
2032

2133
Base.copy(alloc_stats::AllocStats) =
22-
AllocStats((getfield(alloc_stats, field) for field in fieldnames(AllocStats))...)
34+
AllocStats(alloc_stats.alloc_count[], alloc_stats.alloc_bytes[],
35+
alloc_stats.free_count[], alloc_stats.free_bytes[],
36+
alloc_stats.total_time[])
37+
38+
Base.:(-)(a::AllocStats, b::AllocStats) = (;
39+
alloc_count = a.alloc_count[] - b.alloc_count[],
40+
alloc_bytes = a.alloc_bytes[] - b.alloc_bytes[],
41+
free_count = a.free_count[] - b.free_count[],
42+
free_bytes = a.free_bytes[] - b.free_bytes[],
43+
total_time = a.total_time[] - b.total_time[])
2344

24-
AllocStats(b::AllocStats, a::AllocStats) =
25-
AllocStats(
26-
b.alloc_count - a.alloc_count,
27-
b.alloc_bytes - a.alloc_bytes,
28-
b.free_count - a.free_count,
29-
b.free_bytes - a.free_bytes,
30-
b.total_time - a.total_time)
45+
const alloc_stats = AllocStats()
3146

3247

3348
## CUDA allocator
@@ -379,9 +394,9 @@ an [`OutOfGPUMemoryError`](@ref) if the allocation request cannot be satisfied.
379394
buf, time = _alloc(B, sz; stream)
380395

381396
memory_use[] += sz
382-
alloc_stats.alloc_count += 1
383-
alloc_stats.alloc_bytes += sz
384-
alloc_stats.total_time += time
397+
alloc_stats.alloc_count[] += 1
398+
alloc_stats.alloc_bytes[] += sz
399+
alloc_stats.total_time[] += time
385400
# NOTE: total_time might be an over-estimation if we trigger GC somewhere else
386401

387402
return buf
@@ -431,9 +446,9 @@ Releases a buffer `buf` to the memory pool.
431446
end
432447

433448
memory_use[] -= sz
434-
alloc_stats.free_count += 1
435-
alloc_stats.free_bytes += sz
436-
alloc_stats.total_time += time
449+
alloc_stats.free_count[] += 1
450+
alloc_stats.free_bytes[] += sz
451+
alloc_stats.total_time[] += time
437452
catch ex
438453
Base.showerror_nostdio(ex, "WARNING: Error while freeing $buf")
439454
Base.show_backtrace(Core.stdout, catch_backtrace())
@@ -495,9 +510,9 @@ macro allocated(ex)
495510
let
496511
local f
497512
function f()
498-
b0 = alloc_stats.alloc_bytes
513+
b0 = alloc_stats.alloc_bytes[]
499514
$(esc(ex))
500-
alloc_stats.alloc_bytes - b0
515+
alloc_stats.alloc_bytes[] - b0
501516
end
502517
f()
503518
end
@@ -579,7 +594,7 @@ macro timed(ex)
579594
local cpu_time = (cpu_time1 - cpu_time0) / 1e9
580595

581596
local cpu_mem_stats = Base.GC_Diff(cpu_mem_stats1, cpu_mem_stats0)
582-
local gpu_mem_stats = AllocStats(gpu_mem_stats1, gpu_mem_stats0)
597+
local gpu_mem_stats = gpu_mem_stats1 - gpu_mem_stats0
583598

584599
(value=val, time=cpu_time,
585600
cpu_bytes=cpu_mem_stats.allocd, cpu_gctime=cpu_mem_stats.total_time / 1e9, cpu_gcstats=cpu_mem_stats,

0 commit comments

Comments
 (0)