Skip to content

Commit 2445ae4

Browse files
Back the largest archive by a tempfile (backed by disk)
This allows for larger archives to be written without impacting our memory usage, which typically leads to OOMs and general bad behavior.
1 parent 7a5c420 commit 2445ae4

File tree

4 files changed

+62
-5
lines changed

4 files changed

+62
-5
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ tokio = "1.24"
5959
aws-sdk-s3 = "1.7"
6060
aws-config = { version = "1", features = ["behavior-version-latest"] }
6161
thiserror = "1.0.38"
62+
nix = { version = "0.27.1", features = ["mman"] }
6263

6364
[dev-dependencies]
6465
assert_cmd = "2.0.4"

src/report/archives.rs

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
use std::fs::File;
2+
use std::num::NonZeroUsize;
3+
use std::ptr::NonNull;
4+
15
use crate::config::Config;
26
use crate::crates::Crate;
37
use crate::experiments::Experiment;
@@ -7,6 +11,49 @@ use crate::results::{EncodedLog, EncodingType, ReadResults};
711
use flate2::{write::GzEncoder, Compression};
812
use indexmap::IndexMap;
913
use tar::{Builder as TarBuilder, Header as TarHeader};
14+
use tempfile::tempfile;
15+
16+
struct TempfileBackedBuffer {
17+
_file: File,
18+
mmap: NonNull<[u8]>,
19+
}
20+
21+
impl TempfileBackedBuffer {
22+
fn new(file: File) -> Fallible<TempfileBackedBuffer> {
23+
let len = file.metadata()?.len().try_into().unwrap();
24+
unsafe {
25+
let base = nix::sys::mman::mmap(
26+
None,
27+
NonZeroUsize::new(len).unwrap(),
28+
nix::sys::mman::ProtFlags::PROT_READ,
29+
nix::sys::mman::MapFlags::MAP_PRIVATE,
30+
Some(&file),
31+
0,
32+
)?;
33+
let Some(base) = NonNull::new(base as *mut u8) else {
34+
panic!("Failed to map file");
35+
};
36+
Ok(TempfileBackedBuffer {
37+
_file: file,
38+
mmap: NonNull::slice_from_raw_parts(base, len),
39+
})
40+
}
41+
}
42+
43+
fn buffer(&self) -> &[u8] {
44+
unsafe { self.mmap.as_ref() }
45+
}
46+
}
47+
48+
impl Drop for TempfileBackedBuffer {
49+
fn drop(&mut self) {
50+
unsafe {
51+
if let Err(e) = nix::sys::mman::munmap(self.mmap.as_ptr() as *mut _, self.mmap.len()) {
52+
eprintln!("Failed to unmap temporary file: {:?}", e);
53+
}
54+
}
55+
}
56+
}
1057

1158
#[derive(Serialize)]
1259
pub struct Archive {
@@ -100,18 +147,23 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
100147
config: &Config,
101148
) -> Fallible<Archive> {
102149
for i in 1..=RETRIES {
103-
let mut all = TarBuilder::new(GzEncoder::new(Vec::new(), Compression::default()));
150+
// We write this large-ish tarball into a tempfile, which moves the I/O to disk operations
151+
// rather than keeping it in memory. This avoids complicating the code by doing incremental
152+
// writes to S3 (requiring buffer management etc) while avoiding keeping the blob entirely
153+
// in memory.
154+
let backing = tempfile()?;
155+
let mut all = TarBuilder::new(GzEncoder::new(backing, Compression::default()));
104156
for entry in iterate(db, ex, crates, config) {
105157
let entry = entry?;
106158
let mut header = entry.header();
107159
all.append_data(&mut header, &entry.path, &entry.log_bytes[..])?;
108160
}
109161

110162
let data = all.into_inner()?.finish()?;
111-
let len = data.len();
163+
let buffer = TempfileBackedBuffer::new(data)?;
112164
match dest.write_bytes(
113165
"logs-archives/all.tar.gz",
114-
&data,
166+
buffer.buffer(),
115167
&"application/gzip".parse().unwrap(),
116168
EncodingType::Plain,
117169
) {
@@ -123,7 +175,10 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
123175
std::thread::sleep(std::time::Duration::from_secs(2));
124176
warn!(
125177
"retry ({}/{}) writing logs-archives/all.tar.gz ({} bytes) (error: {:?})",
126-
i, RETRIES, len, e,
178+
i,
179+
RETRIES,
180+
buffer.buffer().len(),
181+
e,
127182
);
128183
continue;
129184
}

src/report/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ impl ReportWriter for DummyWriter {
626626
self.results
627627
.lock()
628628
.unwrap()
629-
.insert((path.as_ref().to_path_buf(), mime.clone()), b);
629+
.insert((path.as_ref().to_path_buf(), mime.clone()), b.to_vec());
630630
Ok(())
631631
}
632632

0 commit comments

Comments
 (0)