Skip to content

Commit ad0b58d

Browse files
authored
Merge pull request #132 from michaelwoerister/paged-sink-2
Implement paged data serialization
2 parents 5e7a6ef + 4f17692 commit ad0b58d

File tree

13 files changed

+626
-239
lines changed

13 files changed

+626
-239
lines changed

analyzeme/src/profiling_data.rs

Lines changed: 78 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@ use crate::lightweight_event::LightweightEvent;
33
use crate::timestamp::Timestamp;
44
use crate::StringTable;
55
use measureme::file_header::{
6-
read_file_header, write_file_header, CURRENT_FILE_FORMAT_VERSION, FILE_HEADER_SIZE,
7-
FILE_MAGIC_EVENT_STREAM,
6+
verify_file_header, write_file_header, FILE_EXTENSION, FILE_HEADER_SIZE,
7+
FILE_MAGIC_EVENT_STREAM, FILE_MAGIC_TOP_LEVEL,
8+
};
9+
use measureme::{
10+
EventId, PageTag, RawEvent, SerializationSink, SerializationSinkBuilder, StringTableBuilder,
811
};
9-
use measureme::{EventId, ProfilerFiles, RawEvent, SerializationSink, StringTableBuilder};
1012
use serde::{Deserialize, Deserializer};
11-
use std::error::Error;
1213
use std::fs;
1314
use std::mem;
1415
use std::path::Path;
1516
use std::sync::Arc;
1617
use std::time::{Duration, SystemTime, UNIX_EPOCH};
18+
use std::{error::Error, path::PathBuf};
1719

1820
const RAW_EVENT_SIZE: usize = mem::size_of::<RawEvent>();
1921

@@ -43,35 +45,60 @@ pub struct ProfilingData {
4345
}
4446

4547
impl ProfilingData {
46-
pub fn new(path_stem: &Path) -> Result<ProfilingData, Box<dyn Error>> {
47-
let paths = ProfilerFiles::new(path_stem);
48+
pub fn new(path_stem: &Path) -> Result<ProfilingData, Box<dyn Error + Send + Sync>> {
49+
let paged_path = path_stem.with_extension(FILE_EXTENSION);
50+
51+
if paged_path.exists() {
52+
let data = fs::read(&paged_path)?;
53+
54+
verify_file_header(&data, FILE_MAGIC_TOP_LEVEL, Some(&paged_path), "top-level")?;
55+
56+
let mut split_data = measureme::split_streams(&data[FILE_HEADER_SIZE..]);
57+
58+
let string_data = split_data.remove(&PageTag::StringData).unwrap();
59+
let index_data = split_data.remove(&PageTag::StringIndex).unwrap();
60+
let event_data = split_data.remove(&PageTag::Events).unwrap();
4861

49-
let string_data = fs::read(paths.string_data_file).expect("couldn't read string_data file");
50-
let index_data =
51-
fs::read(paths.string_index_file).expect("couldn't read string_index file");
52-
let event_data = fs::read(paths.events_file).expect("couldn't read events file");
62+
ProfilingData::from_buffers(string_data, index_data, event_data, Some(&paged_path))
63+
} else {
64+
let mut msg = format!(
65+
"Could not find profiling data file `{}`.",
66+
paged_path.display()
67+
);
5368

54-
ProfilingData::from_buffers(string_data, index_data, event_data)
69+
// Let's try to give a helpful error message if we encounter files
70+
// in the old three-file-format:
71+
let paths = ProfilerFiles::new(path_stem);
72+
73+
if paths.events_file.exists()
74+
|| paths.string_data_file.exists()
75+
|| paths.string_index_file.exists()
76+
{
77+
msg += "It looks like your profiling data has been generated \
78+
by an out-dated version of measureme (0.7 or older).";
79+
}
80+
81+
return Err(From::from(msg));
82+
}
5583
}
5684

5785
pub fn from_buffers(
5886
string_data: Vec<u8>,
5987
string_index: Vec<u8>,
6088
events: Vec<u8>,
61-
) -> Result<ProfilingData, Box<dyn Error>> {
89+
diagnostic_file_path: Option<&Path>,
90+
) -> Result<ProfilingData, Box<dyn Error + Send + Sync>> {
6291
let index_data = string_index;
6392
let event_data = events;
6493

65-
let event_data_format = read_file_header(&event_data, FILE_MAGIC_EVENT_STREAM)?;
66-
if event_data_format != CURRENT_FILE_FORMAT_VERSION {
67-
Err(format!(
68-
"Event stream file format version '{}' is not supported
69-
by this version of `measureme`.",
70-
event_data_format
71-
))?;
72-
}
94+
verify_file_header(
95+
&event_data,
96+
FILE_MAGIC_EVENT_STREAM,
97+
diagnostic_file_path,
98+
"event",
99+
)?;
73100

74-
let string_table = StringTable::new(string_data, index_data)?;
101+
let string_table = StringTable::new(string_data, index_data, diagnostic_file_path)?;
75102

76103
let metadata = string_table.get_metadata().to_string();
77104
let metadata: Metadata = serde_json::from_str(&metadata)?;
@@ -207,17 +234,20 @@ pub struct ProfilingDataBuilder {
207234

208235
impl ProfilingDataBuilder {
209236
pub fn new() -> ProfilingDataBuilder {
210-
let event_sink = SerializationSink::new_in_memory();
211-
let string_table_data_sink = Arc::new(SerializationSink::new_in_memory());
212-
let string_table_index_sink = Arc::new(SerializationSink::new_in_memory());
237+
let sink_builder = SerializationSinkBuilder::new_in_memory();
238+
239+
let event_sink = sink_builder.new_sink(PageTag::Events);
240+
let string_table_data_sink = Arc::new(sink_builder.new_sink(PageTag::StringData));
241+
let string_table_index_sink = Arc::new(sink_builder.new_sink(PageTag::StringIndex));
213242

214243
// The first thing in every file we generate must be the file header.
215-
write_file_header(&event_sink, FILE_MAGIC_EVENT_STREAM);
244+
write_file_header(&mut event_sink.as_std_write(), FILE_MAGIC_EVENT_STREAM).unwrap();
216245

217246
let string_table = StringTableBuilder::new(
218247
string_table_data_sink.clone(),
219248
string_table_index_sink.clone(),
220-
);
249+
)
250+
.unwrap();
221251

222252
ProfilingDataBuilder {
223253
event_sink,
@@ -287,11 +317,9 @@ impl ProfilingDataBuilder {
287317
.unwrap()
288318
.into_bytes();
289319

290-
assert_eq!(
291-
read_file_header(&event_data, FILE_MAGIC_EVENT_STREAM).unwrap(),
292-
CURRENT_FILE_FORMAT_VERSION
293-
);
294-
let string_table = StringTable::new(data_bytes, index_bytes).unwrap();
320+
verify_file_header(&event_data, FILE_MAGIC_EVENT_STREAM, None, "event").unwrap();
321+
322+
let string_table = StringTable::new(data_bytes, index_bytes, None).unwrap();
295323
let metadata = Metadata {
296324
start_time: UNIX_EPOCH,
297325
process_id: 0,
@@ -319,6 +347,25 @@ fn event_index_to_addr(event_index: usize) -> usize {
319347
FILE_HEADER_SIZE + event_index * mem::size_of::<RawEvent>()
320348
}
321349

350+
// This struct reflects what filenames were in old versions of measureme. It is
351+
// used only for giving helpful error messages now if a user tries to load old
352+
// data.
353+
struct ProfilerFiles {
354+
pub events_file: PathBuf,
355+
pub string_data_file: PathBuf,
356+
pub string_index_file: PathBuf,
357+
}
358+
359+
impl ProfilerFiles {
360+
fn new<P: AsRef<Path>>(path_stem: P) -> ProfilerFiles {
361+
ProfilerFiles {
362+
events_file: path_stem.as_ref().with_extension("events"),
363+
string_data_file: path_stem.as_ref().with_extension("string_data"),
364+
string_index_file: path_stem.as_ref().with_extension("string_index"),
365+
}
366+
}
367+
}
368+
322369
#[rustfmt::skip]
323370
#[cfg(test)]
324371
mod tests {

analyzeme/src/stringtable.rs

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! See module-level documentation `measureme::stringtable`.
22
33
use measureme::file_header::{
4-
read_file_header, strip_file_header, CURRENT_FILE_FORMAT_VERSION, FILE_MAGIC_STRINGTABLE_DATA,
4+
strip_file_header, verify_file_header, FILE_MAGIC_STRINGTABLE_DATA,
55
FILE_MAGIC_STRINGTABLE_INDEX,
66
};
77
use measureme::stringtable::{METADATA_STRING_ID, STRING_ID_MASK, TERMINATOR};
@@ -11,6 +11,7 @@ use rustc_hash::FxHashMap;
1111
use std::borrow::Cow;
1212
use std::convert::TryInto;
1313
use std::error::Error;
14+
use std::path::Path;
1415

1516
fn deserialize_index_entry(bytes: &[u8]) -> (StringId, Addr) {
1617
(
@@ -204,21 +205,23 @@ pub struct StringTable {
204205
}
205206

206207
impl StringTable {
207-
pub fn new(string_data: Vec<u8>, index_data: Vec<u8>) -> Result<StringTable, Box<dyn Error>> {
208-
let string_data_format = read_file_header(&string_data, FILE_MAGIC_STRINGTABLE_DATA)?;
209-
let index_data_format = read_file_header(&index_data, FILE_MAGIC_STRINGTABLE_INDEX)?;
210-
211-
if string_data_format != index_data_format {
212-
Err("Mismatch between StringTable DATA and INDEX format version")?;
213-
}
214-
215-
if string_data_format != CURRENT_FILE_FORMAT_VERSION {
216-
Err(format!(
217-
"StringTable file format version '{}' is not supported
218-
by this version of `measureme`.",
219-
string_data_format
220-
))?;
221-
}
208+
pub fn new(
209+
string_data: Vec<u8>,
210+
index_data: Vec<u8>,
211+
diagnostic_file_path: Option<&Path>,
212+
) -> Result<StringTable, Box<dyn Error + Send + Sync>> {
213+
verify_file_header(
214+
&string_data,
215+
FILE_MAGIC_STRINGTABLE_DATA,
216+
diagnostic_file_path,
217+
"StringTable Data",
218+
)?;
219+
verify_file_header(
220+
&index_data,
221+
FILE_MAGIC_STRINGTABLE_INDEX,
222+
diagnostic_file_path,
223+
"StringTable Index",
224+
)?;
222225

223226
assert!(index_data.len() % 8 == 0);
224227
let index: FxHashMap<_, _> = strip_file_header(&index_data)
@@ -243,13 +246,14 @@ impl StringTable {
243246
#[cfg(test)]
244247
mod tests {
245248
use super::*;
246-
use measureme::{SerializationSink, StringComponent, StringTableBuilder};
249+
use measureme::{PageTag, SerializationSinkBuilder, StringComponent, StringTableBuilder};
247250
use std::sync::Arc;
248251

249252
#[test]
250253
fn simple_strings() {
251-
let data_sink = Arc::new(SerializationSink::new_in_memory());
252-
let index_sink = Arc::new(SerializationSink::new_in_memory());
254+
let sink_builder = SerializationSinkBuilder::new_in_memory();
255+
let data_sink = Arc::new(sink_builder.new_sink(PageTag::StringData));
256+
let index_sink = Arc::new(sink_builder.new_sink(PageTag::StringIndex));
253257

254258
let expected_strings = &[
255259
"abc",
@@ -264,7 +268,7 @@ mod tests {
264268
let mut string_ids = vec![];
265269

266270
{
267-
let builder = StringTableBuilder::new(data_sink.clone(), index_sink.clone());
271+
let builder = StringTableBuilder::new(data_sink.clone(), index_sink.clone()).unwrap();
268272

269273
for &s in expected_strings {
270274
string_ids.push(builder.alloc(s));
@@ -274,7 +278,7 @@ mod tests {
274278
let data_bytes = Arc::try_unwrap(data_sink).unwrap().into_bytes();
275279
let index_bytes = Arc::try_unwrap(index_sink).unwrap().into_bytes();
276280

277-
let string_table = StringTable::new(data_bytes, index_bytes).unwrap();
281+
let string_table = StringTable::new(data_bytes, index_bytes, None).unwrap();
278282

279283
for (&id, &expected_string) in string_ids.iter().zip(expected_strings.iter()) {
280284
let str_ref = string_table.get(id);
@@ -289,8 +293,9 @@ mod tests {
289293

290294
#[test]
291295
fn composite_string() {
292-
let data_sink = Arc::new(SerializationSink::new_in_memory());
293-
let index_sink = Arc::new(SerializationSink::new_in_memory());
296+
let sink_builder = SerializationSinkBuilder::new_in_memory();
297+
let data_sink = Arc::new(sink_builder.new_sink(PageTag::StringData));
298+
let index_sink = Arc::new(sink_builder.new_sink(PageTag::StringIndex));
294299

295300
let expected_strings = &[
296301
"abc", // 0
@@ -306,7 +311,7 @@ mod tests {
306311
let mut string_ids = vec![];
307312

308313
{
309-
let builder = StringTableBuilder::new(data_sink.clone(), index_sink.clone());
314+
let builder = StringTableBuilder::new(data_sink.clone(), index_sink.clone()).unwrap();
310315

311316
let r = |id| StringComponent::Ref(id);
312317
let v = |s| StringComponent::Value(s);
@@ -329,7 +334,7 @@ mod tests {
329334
let data_bytes = Arc::try_unwrap(data_sink).unwrap().into_bytes();
330335
let index_bytes = Arc::try_unwrap(index_sink).unwrap().into_bytes();
331336

332-
let string_table = StringTable::new(data_bytes, index_bytes).unwrap();
337+
let string_table = StringTable::new(data_bytes, index_bytes, None).unwrap();
333338

334339
for (&id, &expected_string) in string_ids.iter().zip(expected_strings.iter()) {
335340
let str_ref = string_table.get(id);

analyzeme/src/testing_common.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,20 @@ fn generate_profiling_data(
9696
})
9797
.collect();
9898

99-
let expected_events: Vec<_> = threads
100-
.into_iter()
101-
.flat_map(|t| t.join().unwrap())
102-
.collect();
103-
10499
// An example of allocating the string contents of an event id that has
105100
// already been used
106101
profiler.map_virtual_to_concrete_string(
107102
event_id_virtual.to_string_id(),
108103
profiler.alloc_string("SomeQuery"),
109104
);
110105

106+
drop(profiler);
107+
108+
let expected_events: Vec<_> = threads
109+
.into_iter()
110+
.flat_map(|t| t.join().unwrap())
111+
.collect();
112+
111113
expected_events
112114
}
113115

crox/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ fn get_args(full_event: &analyzeme::Event) -> Option<FxHashMap<String, String>>
131131
}
132132
}
133133

134-
fn main() -> Result<(), Box<dyn std::error::Error>> {
134+
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
135135
let opt = Opt::from_args();
136136

137137
let chrome_file = BufWriter::new(fs::File::create("chrome_profiler.json")?);

flamegraph/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ struct Opt {
1212
file_prefix: PathBuf,
1313
}
1414

15-
fn main() -> Result<(), Box<dyn Error>> {
15+
fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
1616
let opt = Opt::from_args();
1717

1818
let profiling_data = ProfilingData::new(&opt.file_prefix)?;

0 commit comments

Comments
 (0)