Skip to content

Commit 30785d3

Browse files
vchuravyNHDaly
andauthored
Support Metadata like Threadid (#40)
* Support Meta * Add test that we enforce meta * backwards compatibility: support non-meta profiles * Add corner-cases tests for manually constructed profiles; ensure we catch every sample Fix the bugs in handling those cases :) * Bump to version 3.1.0: Support meta! * Disable meta test for v1.6 --------- Co-authored-by: Nathan Daly <NHDaly@gmail.com>
1 parent 6e9f56c commit 30785d3

File tree

3 files changed

+136
-25
lines changed

3 files changed

+136
-25
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "PProf"
22
uuid = "e4faabce-9ead-11e9-39d9-4379958e3056"
33
authors = ["Valentin Churavy <v.churavy@gmail.com>", "Nathan Daly <nhdaly@gmail.com>"]
4-
version = "3.0.0"
4+
version = "3.1.0"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
@@ -17,18 +17,18 @@ pprof_jll = "cf2c5f97-e748-59fa-a03f-dda3c62118cb"
1717

1818
[compat]
1919
AbstractTrees = "0.3, 0.4"
20+
CodecZlib = "0.7"
2021
EnumX = "1"
2122
FlameGraphs = "0.2, 1"
2223
OrderedCollections = "1.1"
2324
ProgressMeter = "1.7"
2425
ProtoBuf = "1"
2526
julia = "1.6"
2627
pprof_jll = "0.1, 1"
27-
CodecZlib = "0.7"
2828

2929
[extras]
30-
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
3130
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
31+
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
3232
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
3333

3434
[targets]

src/PProf.jl

Lines changed: 69 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ clear
1919

2020
include(joinpath("..", "lib", "perftools", "perftools.jl"))
2121

22-
import .perftools.profiles: ValueType, Sample, Function, Location, Line
22+
import .perftools.profiles: ValueType, Sample, Function,
23+
Location, Line, Label
2324
const PProfile = perftools.profiles.Profile
2425

2526
const proc = Ref{Union{Base.Process, Nothing}}(nothing)
@@ -96,14 +97,16 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
9697
keep_frames::Union{Nothing, AbstractString} = nothing,
9798
ui_relative_percentages::Bool = true,
9899
)
100+
has_meta = false
99101
if data === nothing
100102
data = if isdefined(Profile, :has_meta)
101-
copy(Profile.fetch(include_meta = false))
103+
has_meta = true
104+
copy(Profile.fetch(include_meta = true))
102105
else
103106
copy(Profile.fetch())
104107
end
105-
elseif isdefined(Profile, :has_meta) && Profile.has_meta(data)
106-
data = Profile.strip_meta(data)
108+
elseif isdefined(Profile, :has_meta)
109+
has_meta = Profile.has_meta(data)
107110
end
108111
lookup = lidict
109112
if lookup === nothing
@@ -122,6 +125,8 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
122125
enter!(string) = _enter!(string_table, string)
123126
enter!(::Nothing) = _enter!(string_table, "nothing")
124127
ValueType!(_type, unit) = ValueType(enter!(_type), enter!(unit))
128+
Label!(key, value, unit) = Label(key = enter!(key), num = value, num_unit = enter!(unit))
129+
Label!(key, value) = Label(key = enter!(key), str = enter!(string(value)))
125130

126131
# Setup:
127132
enter!("") # NOTE: pprof requires first entry to be ""
@@ -136,35 +141,71 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
136141

137142
sample_type = [
138143
ValueType!("events", "count"), # Mandatory
139-
ValueType!("stack_depth", "count")
140144
]
141145

142146
period_type = ValueType!("cpu", "nanoseconds")
143147
drop_frames = isnothing(drop_frames) ? 0 : enter!(drop_frames)
144148
keep_frames = isnothing(keep_frames) ? 0 : enter!(keep_frames)
145149
# start decoding backtraces
146150
location_id = Vector{eltype(data)}()
147-
lastwaszero = true
148151

149-
for ip in data
150-
# ip == 0x0 is the sentinel value for finishing a backtrace, therefore finising a sample
151-
if ip == 0
152+
# All samples get the same value for CPU profiles.
153+
value = [
154+
1, # events
155+
]
156+
157+
lastwaszero = true # (Legacy: used when has_meta = false)
158+
159+
# The Profile data buffer is a big array, with each sample appended one after the other.
160+
# Each sample now looks like this:
161+
# | ip | ip | ip | meta1 | meta2 | meta3 | meta4| 0x0 | 0x0 |
162+
# We iterate backwards, starting from the end, so that we don't encounter the metadata
163+
# and mistake it for more ip addresses. For each sample, we skip the zeros, consume the
164+
# metadata, then continue scanning the ip addresses, and when we hit another end of a
165+
# block, we finish the sample we just consumed.
166+
idx = length(data)
167+
meta = nothing
168+
while idx > 0
169+
# We handle the very first sample after the loop.
170+
if has_meta && Profile.is_block_end(data, idx)
171+
if meta !== nothing
172+
# Finish last block
173+
push!(samples, Sample(;location_id = reverse!(location_id), value = value, label = meta))
174+
location_id = Vector{eltype(data)}()
175+
end
176+
177+
# Consume all of the metadata entries in the buffer, and then position the IP
178+
# at the idx for the actual ip.
179+
thread_sleeping = data[idx - Profile.META_OFFSET_SLEEPSTATE] - 1 # "Sleeping" is recorded as 1 or 2, to avoid 0s, which indicate end-of-block.
180+
cpu_cycle_clock = data[idx - Profile.META_OFFSET_CPUCYCLECLOCK]
181+
taskid = data[idx - Profile.META_OFFSET_TASKID]
182+
threadid = data[idx - Profile.META_OFFSET_THREADID]
183+
184+
meta = Label[
185+
Label!("thread_sleeping", thread_sleeping != 0),
186+
Label!("cycle_clock", cpu_cycle_clock, "nanoseconds"),
187+
Label!("taskid", taskid),
188+
Label!("threadid", threadid),
189+
]
190+
idx -= (Profile.nmeta + 2) # skip all the metas, plus the 2 nulls that end a block.
191+
continue
192+
elseif !has_meta && data[idx] == 0
152193
# Avoid creating empty samples
194+
# ip == 0x0 is the sentinel value for finishing a backtrace (when meta is disabled), therefore finising a sample
195+
# On some platforms, we sometimes get two 0s in a row for some reason...
153196
if lastwaszero
154197
@assert length(location_id) == 0
155-
continue
198+
else
199+
# Finish last block
200+
push!(samples, Sample(;location_id = reverse!(location_id), value = value))
201+
location_id = Vector{eltype(data)}()
202+
lastwaszero = true
156203
end
157-
158-
# End of sample
159-
value = [
160-
1, # events
161-
length(location_id), # stack_depth
162-
]
163-
push!(samples, Sample(;location_id, value))
164-
location_id = Vector{eltype(data)}()
165-
lastwaszero = true
204+
idx -= 1
166205
continue
167206
end
207+
ip = data[idx]
208+
idx -= 1
168209
lastwaszero = false
169210

170211
# A backtrace consists of a set of IP (Instruction Pointers), each IP points
@@ -245,6 +286,15 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
245286
push!(location_id, ip)
246287
end
247288
end
289+
if length(data) > 0
290+
# Finish the very last sample
291+
if has_meta
292+
push!(samples, Sample(;location_id = reverse!(location_id), value = value, label = meta))
293+
else
294+
push!(samples, Sample(;location_id = reverse!(location_id), value = value))
295+
end
296+
location_id = Vector{eltype(data)}()
297+
end
248298

249299
# If from_c=false funcs and locs should NOT contain C functions
250300
prof = PProfile(

test/PProf.jl

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,44 @@ function load_prof_proto(file)
6565
open(io->decode(ProtoDecoder(GzipDecompressorStream(io)), PProf.perftools.profiles.Profile), file, "r")
6666
end
6767

68+
const HAS_META = isdefined(Profile, :has_meta)
69+
@testset "Corner Cases" begin
70+
@testset "non-meta profile" begin
71+
72+
@testset "0 sample profile" begin
73+
prof = load_prof_proto(pprof(UInt64[], out=tempname(), web=false))
74+
@test length(prof.sample) == 0
75+
end
76+
@testset "1 sample profile" begin
77+
prof = load_prof_proto(pprof(UInt64[0xdeadbeef,0], out=tempname(), web=false))
78+
@test length(prof.sample) == 1
79+
end
80+
81+
@testset "2 sample, 1 location profile" begin
82+
prof = load_prof_proto(pprof(UInt64[0xdeadbeef,0, 0xdeadbeef, 0], out=tempname(), web=false))
83+
@test length(prof.sample) == 2
84+
@test length(prof.location) == 1
85+
end
86+
end
87+
if HAS_META
88+
@testset "with-meta profile" begin
89+
@testset "1 sample profile" begin
90+
data = UInt64[0xdeadbeef, 1, 1, 1, 1, 0, 0]
91+
prof = load_prof_proto(pprof(data, out=tempname(), web=false))
92+
@test length(prof.sample) == 1
93+
end
94+
95+
@testset "2 sample 1 location profile" begin
96+
data = UInt64[0xdeadbeef, 1, 1, 1, 1, 0, 0, 0xdeadbeef, 1, 1, 1, 1, 0, 0]
97+
prof = load_prof_proto(pprof(data, out=tempname(), web=false))
98+
@test length(prof.sample) == 2
99+
@test length(prof.location) == 1
100+
end
101+
end
102+
end
103+
end
104+
105+
68106
@testset "with_c" begin
69107
Profile.clear()
70108

@@ -74,12 +112,34 @@ end
74112
end
75113
sleep(2)
76114
end
77-
for i in 1:2
115+
@testset for i in 1:4
78116
if i == 1
79-
data = Profile.fetch()
117+
if !HAS_META
118+
continue
119+
end
120+
data = Profile.fetch(include_meta = true)
121+
args = (data,)
122+
elseif i == 2
123+
if !HAS_META
124+
continue
125+
end
126+
data,lidict = Profile.retrieve(include_meta = true)
127+
args = (data, lidict)
128+
elseif i == 3
129+
# Ensure we are backwards compatible with older, non-meta profiles
130+
if HAS_META
131+
data = Profile.fetch(include_meta = false)
132+
else
133+
data = Profile.fetch()
134+
end
80135
args = (data,)
81136
else
82-
data,lidict = Profile.retrieve()
137+
# Ensure we are backwards compatible with older, non-meta profiles
138+
if HAS_META
139+
data,lidict = Profile.retrieve(include_meta = false)
140+
else
141+
data,lidict = Profile.retrieve()
142+
end
83143
args = (data, lidict)
84144
end
85145

@@ -135,6 +195,7 @@ end
135195

136196
@testset "subprocess refresh" begin
137197

198+
PProf.kill()
138199
@pprof foo(10000, 5, [])
139200

140201
current_proc = PProf.proc[]

0 commit comments

Comments
 (0)