@@ -7,27 +7,42 @@ using Logging
7
7
# # allocation statistics
8
8
9
9
mutable struct AllocStats
10
- alloc_count:: Int
11
- alloc_bytes:: Int
10
+ alloc_count:: Threads.Atomic{ Int}
11
+ alloc_bytes:: Threads.Atomic{ Int}
12
12
13
- free_count:: Int
14
- free_bytes:: Int
13
+ free_count:: Threads.Atomic{ Int}
14
+ free_bytes:: Threads.Atomic{ Int}
15
15
16
- total_time:: Float64
17
- end
16
+ total_time:: Threads.Atomic{Float64}
18
17
19
- const alloc_stats = AllocStats (0 , 0 , 0 , 0 , 0.0 )
18
+ function AllocStats ()
19
+ new (Threads. Atomic {Int} (0 ), Threads. Atomic {Int} (0 ),
20
+ Threads. Atomic {Int} (0 ), Threads. Atomic {Int} (0 ),
21
+ Threads. Atomic {Float64} (0.0 ))
22
+ end
23
+
24
+ function AllocStats (alloc_count:: Integer , alloc_bytes:: Integer ,
25
+ free_count:: Integer , free_bytes:: Integer ,
26
+ total_time:: Float64 )
27
+ new (Threads. Atomic {Int} (alloc_count), Threads. Atomic {Int} (alloc_bytes),
28
+ Threads. Atomic {Int} (free_count), Threads. Atomic {Int} (free_bytes),
29
+ Threads. Atomic {Float64} (total_time))
30
+ end
31
+ end
20
32
21
33
Base. copy (alloc_stats:: AllocStats ) =
22
- AllocStats ((getfield (alloc_stats, field) for field in fieldnames (AllocStats)). .. )
34
+ AllocStats (alloc_stats. alloc_count[], alloc_stats. alloc_bytes[],
35
+ alloc_stats. free_count[], alloc_stats. free_bytes[],
36
+ alloc_stats. total_time[])
37
+
38
+ Base.:(- )(a:: AllocStats , b:: AllocStats ) = (;
39
+ alloc_count = a. alloc_count[] - b. alloc_count[],
40
+ alloc_bytes = a. alloc_bytes[] - b. alloc_bytes[],
41
+ free_count = a. free_count[] - b. free_count[],
42
+ free_bytes = a. free_bytes[] - b. free_bytes[],
43
+ total_time = a. total_time[] - b. total_time[])
23
44
24
- AllocStats (b:: AllocStats , a:: AllocStats ) =
25
- AllocStats (
26
- b. alloc_count - a. alloc_count,
27
- b. alloc_bytes - a. alloc_bytes,
28
- b. free_count - a. free_count,
29
- b. free_bytes - a. free_bytes,
30
- b. total_time - a. total_time)
45
+ const alloc_stats = AllocStats ()
31
46
32
47
33
48
# # CUDA allocator
@@ -379,9 +394,9 @@ an [`OutOfGPUMemoryError`](@ref) if the allocation request cannot be satisfied.
379
394
buf, time = _alloc (B, sz; stream)
380
395
381
396
memory_use[] += sz
382
- alloc_stats. alloc_count += 1
383
- alloc_stats. alloc_bytes += sz
384
- alloc_stats. total_time += time
397
+ alloc_stats. alloc_count[] += 1
398
+ alloc_stats. alloc_bytes[] += sz
399
+ alloc_stats. total_time[] += time
385
400
# NOTE: total_time might be an over-estimation if we trigger GC somewhere else
386
401
387
402
return buf
@@ -431,9 +446,9 @@ Releases a buffer `buf` to the memory pool.
431
446
end
432
447
433
448
memory_use[] -= sz
434
- alloc_stats. free_count += 1
435
- alloc_stats. free_bytes += sz
436
- alloc_stats. total_time += time
449
+ alloc_stats. free_count[] += 1
450
+ alloc_stats. free_bytes[] += sz
451
+ alloc_stats. total_time[] += time
437
452
catch ex
438
453
Base. showerror_nostdio (ex, " WARNING: Error while freeing $buf " )
439
454
Base. show_backtrace (Core. stdout , catch_backtrace ())
@@ -495,9 +510,9 @@ macro allocated(ex)
495
510
let
496
511
local f
497
512
function f ()
498
- b0 = alloc_stats. alloc_bytes
513
+ b0 = alloc_stats. alloc_bytes[]
499
514
$ (esc (ex))
500
- alloc_stats. alloc_bytes - b0
515
+ alloc_stats. alloc_bytes[] - b0
501
516
end
502
517
f ()
503
518
end
@@ -579,7 +594,7 @@ macro timed(ex)
579
594
local cpu_time = (cpu_time1 - cpu_time0) / 1e9
580
595
581
596
local cpu_mem_stats = Base. GC_Diff (cpu_mem_stats1, cpu_mem_stats0)
582
- local gpu_mem_stats = AllocStats ( gpu_mem_stats1, gpu_mem_stats0)
597
+ local gpu_mem_stats = gpu_mem_stats1 - gpu_mem_stats0
583
598
584
599
(value= val, time= cpu_time,
585
600
cpu_bytes= cpu_mem_stats. allocd, cpu_gctime= cpu_mem_stats. total_time / 1e9 , cpu_gcstats= cpu_mem_stats,
0 commit comments