1
+ use std:: fs:: File ;
2
+ use std:: num:: NonZeroUsize ;
3
+ use std:: ptr:: NonNull ;
4
+
1
5
use crate :: config:: Config ;
2
6
use crate :: crates:: Crate ;
3
7
use crate :: experiments:: Experiment ;
@@ -7,6 +11,49 @@ use crate::results::{EncodedLog, EncodingType, ReadResults};
7
11
use flate2:: { write:: GzEncoder , Compression } ;
8
12
use indexmap:: IndexMap ;
9
13
use tar:: { Builder as TarBuilder , Header as TarHeader } ;
14
+ use tempfile:: tempfile;
15
+
16
+ struct TempfileBackedBuffer {
17
+ _file : File ,
18
+ mmap : NonNull < [ u8 ] > ,
19
+ }
20
+
21
+ impl TempfileBackedBuffer {
22
+ fn new ( file : File ) -> Fallible < TempfileBackedBuffer > {
23
+ let len = file. metadata ( ) ?. len ( ) . try_into ( ) . unwrap ( ) ;
24
+ unsafe {
25
+ let base = nix:: sys:: mman:: mmap (
26
+ None ,
27
+ NonZeroUsize :: new ( len) . unwrap ( ) ,
28
+ nix:: sys:: mman:: ProtFlags :: PROT_READ ,
29
+ nix:: sys:: mman:: MapFlags :: MAP_PRIVATE ,
30
+ Some ( & file) ,
31
+ 0 ,
32
+ ) ?;
33
+ let Some ( base) = NonNull :: new ( base as * mut u8 ) else {
34
+ panic ! ( "Failed to map file" ) ;
35
+ } ;
36
+ Ok ( TempfileBackedBuffer {
37
+ _file : file,
38
+ mmap : NonNull :: slice_from_raw_parts ( base, len) ,
39
+ } )
40
+ }
41
+ }
42
+
43
+ fn buffer ( & self ) -> & [ u8 ] {
44
+ unsafe { self . mmap . as_ref ( ) }
45
+ }
46
+ }
47
+
48
+ impl Drop for TempfileBackedBuffer {
49
+ fn drop ( & mut self ) {
50
+ unsafe {
51
+ if let Err ( e) = nix:: sys:: mman:: munmap ( self . mmap . as_ptr ( ) as * mut _ , self . mmap . len ( ) ) {
52
+ eprintln ! ( "Failed to unmap temporary file: {:?}" , e) ;
53
+ }
54
+ }
55
+ }
56
+ }
10
57
11
58
#[ derive( Serialize ) ]
12
59
pub struct Archive {
@@ -100,18 +147,23 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
100
147
config : & Config ,
101
148
) -> Fallible < Archive > {
102
149
for i in 1 ..=RETRIES {
103
- let mut all = TarBuilder :: new ( GzEncoder :: new ( Vec :: new ( ) , Compression :: default ( ) ) ) ;
150
+ // We write this large-ish tarball into a tempfile, which moves the I/O to disk operations
151
+ // rather than keeping it in memory. This avoids complicating the code by doing incremental
152
+ // writes to S3 (requiring buffer management etc) while avoiding keeping the blob entirely
153
+ // in memory.
154
+ let backing = tempfile ( ) ?;
155
+ let mut all = TarBuilder :: new ( GzEncoder :: new ( backing, Compression :: default ( ) ) ) ;
104
156
for entry in iterate ( db, ex, crates, config) {
105
157
let entry = entry?;
106
158
let mut header = entry. header ( ) ;
107
159
all. append_data ( & mut header, & entry. path , & entry. log_bytes [ ..] ) ?;
108
160
}
109
161
110
162
let data = all. into_inner ( ) ?. finish ( ) ?;
111
- let len = data . len ( ) ;
163
+ let buffer = TempfileBackedBuffer :: new ( data ) ? ;
112
164
match dest. write_bytes (
113
165
"logs-archives/all.tar.gz" ,
114
- & data ,
166
+ buffer . buffer ( ) ,
115
167
& "application/gzip" . parse ( ) . unwrap ( ) ,
116
168
EncodingType :: Plain ,
117
169
) {
@@ -123,7 +175,10 @@ fn write_all_archive<DB: ReadResults, W: ReportWriter>(
123
175
std:: thread:: sleep ( std:: time:: Duration :: from_secs ( 2 ) ) ;
124
176
warn ! (
125
177
"retry ({}/{}) writing logs-archives/all.tar.gz ({} bytes) (error: {:?})" ,
126
- i, RETRIES , len, e,
178
+ i,
179
+ RETRIES ,
180
+ buffer. buffer( ) . len( ) ,
181
+ e,
127
182
) ;
128
183
continue ;
129
184
}
0 commit comments