Skip to content
This repository was archived by the owner on Mar 12, 2021. It is now read-only.

Commit 13870c7

Browse files
committed
Use a separate allocation timer.
1 parent 6509a34 commit 13870c7

File tree

3 files changed

+61
-47
lines changed

3 files changed

+61
-47
lines changed

src/memory.jl

Lines changed: 57 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# GPU memory management and pooling
22

3+
using Printf
4+
using TimerOutputs
5+
6+
37
## allocation statistics
48

59
mutable struct AllocStats
@@ -28,6 +32,10 @@ Base.copy(alloc_stats::AllocStats) =
2832

2933
## CUDA allocator
3034

35+
const alloc_to = TimerOutput()
36+
37+
alloc_timings() = (show(alloc_to; allocations=false, sortby=:name); println())
38+
3139
const usage = Ref(0)
3240
const usage_limit = Ref{Union{Nothing,Int}}(nothing)
3341

@@ -42,8 +50,9 @@ function actual_alloc(bytes)
4250
# try the actual allocation
4351
try
4452
alloc_stats.actual_time += Base.@elapsed begin
45-
buf = Mem.alloc(Mem.Device, bytes)
53+
@timeit alloc_to "alloc" buf = Mem.alloc(Mem.Device, bytes)
4654
end
55+
@assert sizeof(buf) == bytes
4756
alloc_stats.actual_nalloc += 1
4857
alloc_stats.actual_alloc += bytes
4958
usage[] += bytes
@@ -61,25 +70,24 @@ function actual_free(buf)
6170
usage[] -= sizeof(buf)
6271

6372
if CUDAdrv.isvalid(buf.ctx)
64-
alloc_stats.actual_time += Base.@elapsed Mem.free(buf)
73+
@timeit alloc_to "free" begin
74+
alloc_stats.actual_time += Base.@elapsed Mem.free(buf)
75+
end
6576
end
6677

6778
return
6879
end
6980

7081

71-
## pool timings
72-
73-
using TimerOutputs
82+
## memory pool
7483

7584
const pool_to = TimerOutput()
7685

7786
macro pool_timeit(args...)
7887
TimerOutputs.timer_expr(__module__, false, :($CuArrays.pool_to), args...)
7988
end
8089

81-
82-
## pool implementations
90+
pool_timings() = (show(pool_to; allocations=false, sortby=:name); println())
8391

8492
# API:
8593
# - init()
@@ -137,56 +145,23 @@ end
137145
return
138146
end
139147

140-
function __init_memory__()
141-
if haskey(ENV, "CUARRAYS_MEMORY_LIMIT")
142-
usage_limit[] = parse(Int, ENV["CUARRAYS_MEMORY_LIMIT"])
143-
end
144-
145-
if haskey(ENV, "CUARRAYS_MEMORY_POOL")
146-
memory_pool!(
147-
if ENV["CUARRAYS_MEMORY_POOL"] == "binned"
148-
BinnedPool
149-
elseif ENV["CUARRAYS_MEMORY_POOL"] == "simple"
150-
SimplePool
151-
elseif ENV["CUARRAYS_MEMORY_POOL"] == "split"
152-
SplittingPool
153-
elseif ENV["CUARRAYS_MEMORY_POOL"] == "none"
154-
DummyPool
155-
else
156-
error("Invalid allocator selected")
157-
end)
158-
else
159-
memory_pool!()
160-
end
161-
162-
# if the user hand-picked an allocator, be a little verbose
163-
if haskey(ENV, "CUARRAYS_MEMORY_POOL")
164-
atexit(()->begin
165-
Core.println("""
166-
CuArrays.jl $(nameof(pool[])) statistics:
167-
- $(alloc_stats.pool_nalloc) pool allocations: $(Base.format_bytes(alloc_stats.pool_alloc)) in $(round(alloc_stats.pool_time; digits=2))s
168-
- $(alloc_stats.actual_nalloc) CUDA allocations: $(Base.format_bytes(alloc_stats.actual_alloc)) in $(round(alloc_stats.actual_time; digits=2))s""")
169-
end)
170-
end
171-
end
172-
173148
function memory_pool!(mod::Module=BinnedPool)
174149
if pool[] !== nothing
175150
pool[].deinit()
176151
end
177152

178-
TimerOutputs.reset_timer!(pool_to)
153+
reset_timers!()
179154

180155
pool[] = mod
181156
mod.init()
182157

183158
return
184159
end
185160

161+
pool_dump() = pool[].dump()
186162

187-
## utilities
188163

189-
using Printf
164+
## utilities
190165

191166
macro allocated(ex)
192167
quote
@@ -308,6 +283,43 @@ function memory_status()
308283
end
309284
end
310285

311-
pool_timings() = (show(pool_to; allocations=false, sortby=:name); println())
312286

313-
pool_dump() = pool[].dump()
287+
## init
288+
289+
function __init_memory__()
290+
if haskey(ENV, "CUARRAYS_MEMORY_LIMIT")
291+
usage_limit[] = parse(Int, ENV["CUARRAYS_MEMORY_LIMIT"])
292+
end
293+
294+
if haskey(ENV, "CUARRAYS_MEMORY_POOL")
295+
memory_pool!(
296+
if ENV["CUARRAYS_MEMORY_POOL"] == "binned"
297+
BinnedPool
298+
elseif ENV["CUARRAYS_MEMORY_POOL"] == "simple"
299+
SimplePool
300+
elseif ENV["CUARRAYS_MEMORY_POOL"] == "split"
301+
SplittingPool
302+
elseif ENV["CUARRAYS_MEMORY_POOL"] == "none"
303+
DummyPool
304+
else
305+
error("Invalid allocator selected")
306+
end)
307+
else
308+
memory_pool!()
309+
end
310+
311+
# if the user hand-picked an allocator, be a little verbose
312+
if haskey(ENV, "CUARRAYS_MEMORY_POOL")
313+
atexit(()->begin
314+
Core.println("""
315+
CuArrays.jl $(nameof(pool[])) statistics:
316+
- $(alloc_stats.pool_nalloc) pool allocations: $(Base.format_bytes(alloc_stats.pool_alloc)) in $(round(alloc_stats.pool_time; digits=2))s
317+
- $(alloc_stats.actual_nalloc) CUDA allocations: $(Base.format_bytes(alloc_stats.actual_alloc)) in $(round(alloc_stats.actual_time; digits=2))s""")
318+
end)
319+
end
320+
end
321+
322+
function reset_timers!()
323+
TimerOutputs.reset_timer!(alloc_to)
324+
TimerOutputs.reset_timer!(pool_to)
325+
end

src/memory/split.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ function merge!(head, blocks...)
130130
end
131131

132132
@inline function actual_alloc(sz)
133-
@pool_timeit "alloc" buf = CuArrays.actual_alloc(sz)
133+
buf = CuArrays.actual_alloc(sz)
134134
block = buf === nothing ? nothing : Block(buf)
135135
end
136136

@@ -139,7 +139,7 @@ function actual_free(block::Block)
139139
if block.state != AVAILABLE
140140
error("Cannot free $block: block is not available")
141141
else
142-
@pool_timeit "free" CuArrays.actual_free(block.buf)
142+
CuArrays.actual_free(block.buf)
143143
block.state = INVALID
144144
end
145145
return

test/runtests.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,7 @@ include("forwarddiff.jl")
3030

3131
CuArrays.memory_status()
3232
CuArrays.pool_timings()
33+
CuArrays.alloc_timings()
34+
CuArrays.reset_timers!()
3335

3436
end

0 commit comments

Comments
 (0)