19
19
20
20
include (joinpath (" .." , " lib" , " perftools" , " perftools.jl" ))
21
21
22
- import . perftools. profiles: ValueType, Sample, Function, Location, Line
22
+ import . perftools. profiles: ValueType, Sample, Function,
23
+ Location, Line, Label
23
24
const PProfile = perftools. profiles. Profile
24
25
25
26
const proc = Ref {Union{Base.Process, Nothing}} (nothing )
@@ -96,14 +97,16 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
96
97
keep_frames:: Union{Nothing, AbstractString} = nothing ,
97
98
ui_relative_percentages:: Bool = true ,
98
99
)
100
+ has_meta = false
99
101
if data === nothing
100
102
data = if isdefined (Profile, :has_meta )
101
- copy (Profile. fetch (include_meta = false ))
103
+ has_meta = true
104
+ copy (Profile. fetch (include_meta = true ))
102
105
else
103
106
copy (Profile. fetch ())
104
107
end
105
- elseif isdefined (Profile, :has_meta ) && Profile . has_meta (data)
106
- data = Profile. strip_meta (data)
108
+ elseif isdefined (Profile, :has_meta )
109
+ has_meta = Profile. has_meta (data)
107
110
end
108
111
lookup = lidict
109
112
if lookup === nothing
@@ -122,6 +125,8 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
122
125
enter! (string) = _enter! (string_table, string)
123
126
enter! (:: Nothing ) = _enter! (string_table, " nothing" )
124
127
ValueType! (_type, unit) = ValueType (enter! (_type), enter! (unit))
128
+ Label! (key, value, unit) = Label (key = enter! (key), num = value, num_unit = enter! (unit))
129
+ Label! (key, value) = Label (key = enter! (key), str = enter! (string (value)))
125
130
126
131
# Setup:
127
132
enter! (" " ) # NOTE: pprof requires first entry to be ""
@@ -136,35 +141,71 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
136
141
137
142
sample_type = [
138
143
ValueType! (" events" , " count" ), # Mandatory
139
- ValueType! (" stack_depth" , " count" )
140
144
]
141
145
142
146
period_type = ValueType! (" cpu" , " nanoseconds" )
143
147
drop_frames = isnothing (drop_frames) ? 0 : enter! (drop_frames)
144
148
keep_frames = isnothing (keep_frames) ? 0 : enter! (keep_frames)
145
149
# start decoding backtraces
146
150
location_id = Vector {eltype(data)} ()
147
- lastwaszero = true
148
151
149
- for ip in data
150
- # ip == 0x0 is the sentinel value for finishing a backtrace, therefore finising a sample
151
- if ip == 0
152
+ # All samples get the same value for CPU profiles.
153
+ value = [
154
+ 1 , # events
155
+ ]
156
+
157
+ lastwaszero = true # (Legacy: used when has_meta = false)
158
+
159
+ # The Profile data buffer is a big array, with each sample appended one after the other.
160
+ # Each sample now looks like this:
161
+ # | ip | ip | ip | meta1 | meta2 | meta3 | meta4| 0x0 | 0x0 |
162
+ # We iterate backwards, starting from the end, so that we don't encounter the metadata
163
+ # and mistake it for more ip addresses. For each sample, we skip the zeros, consume the
164
+ # metadata, then continue scanning the ip addresses, and when we hit another end of a
165
+ # block, we finish the sample we just consumed.
166
+ idx = length (data)
167
+ meta = nothing
168
+ while idx > 0
169
+ # We handle the very first sample after the loop.
170
+ if has_meta && Profile. is_block_end (data, idx)
171
+ if meta != = nothing
172
+ # Finish last block
173
+ push! (samples, Sample (;location_id = reverse! (location_id), value = value, label = meta))
174
+ location_id = Vector {eltype(data)} ()
175
+ end
176
+
177
+ # Consume all of the metadata entries in the buffer, and then position the IP
178
+ # at the idx for the actual ip.
179
+ thread_sleeping = data[idx - Profile. META_OFFSET_SLEEPSTATE] - 1 # "Sleeping" is recorded as 1 or 2, to avoid 0s, which indicate end-of-block.
180
+ cpu_cycle_clock = data[idx - Profile. META_OFFSET_CPUCYCLECLOCK]
181
+ taskid = data[idx - Profile. META_OFFSET_TASKID]
182
+ threadid = data[idx - Profile. META_OFFSET_THREADID]
183
+
184
+ meta = Label[
185
+ Label! (" thread_sleeping" , thread_sleeping != 0 ),
186
+ Label! (" cycle_clock" , cpu_cycle_clock, " nanoseconds" ),
187
+ Label! (" taskid" , taskid),
188
+ Label! (" threadid" , threadid),
189
+ ]
190
+ idx -= (Profile. nmeta + 2 ) # skip all the metas, plus the 2 nulls that end a block.
191
+ continue
192
+ elseif ! has_meta && data[idx] == 0
152
193
# Avoid creating empty samples
194
+ # ip == 0x0 is the sentinel value for finishing a backtrace (when meta is disabled), therefore finising a sample
195
+ # On some platforms, we sometimes get two 0s in a row for some reason...
153
196
if lastwaszero
154
197
@assert length (location_id) == 0
155
- continue
198
+ else
199
+ # Finish last block
200
+ push! (samples, Sample (;location_id = reverse! (location_id), value = value))
201
+ location_id = Vector {eltype(data)} ()
202
+ lastwaszero = true
156
203
end
157
-
158
- # End of sample
159
- value = [
160
- 1 , # events
161
- length (location_id), # stack_depth
162
- ]
163
- push! (samples, Sample (;location_id, value))
164
- location_id = Vector {eltype(data)} ()
165
- lastwaszero = true
204
+ idx -= 1
166
205
continue
167
206
end
207
+ ip = data[idx]
208
+ idx -= 1
168
209
lastwaszero = false
169
210
170
211
# A backtrace consists of a set of IP (Instruction Pointers), each IP points
@@ -245,6 +286,15 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
245
286
push! (location_id, ip)
246
287
end
247
288
end
289
+ if length (data) > 0
290
+ # Finish the very last sample
291
+ if has_meta
292
+ push! (samples, Sample (;location_id = reverse! (location_id), value = value, label = meta))
293
+ else
294
+ push! (samples, Sample (;location_id = reverse! (location_id), value = value))
295
+ end
296
+ location_id = Vector {eltype(data)} ()
297
+ end
248
298
249
299
# If from_c=false funcs and locs should NOT contain C functions
250
300
prof = PProfile (
0 commit comments