Skip to content

Commit e04433c

Browse files
Decompress data only once
1 parent 7d5060e commit e04433c

File tree

1 file changed

+33
-9
lines changed

1 file changed

+33
-9
lines changed

src/recompress.rs

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
use crate::Context;
1414
use rayon::prelude::*;
1515
use std::fs::{self, File};
16-
use std::io;
16+
use std::io::{self, Read, Write};
1717
use std::path::PathBuf;
1818
use std::time::Instant;
1919
use xz2::read::XzDecoder;
@@ -42,12 +42,15 @@ impl Context {
4242
println!("recompressing {}...", xz_path.display());
4343
let gz_path = xz_path.with_extension("gz");
4444

45+
let mut destinations: Vec<Box<dyn io::Write>> = Vec::new();
46+
4547
// Produce gzip if explicitly enabled or the destination file doesn't exist.
4648
if recompress_gz || !gz_path.is_file() {
47-
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
4849
let gz = File::create(gz_path)?;
49-
let mut gz = flate2::write::GzEncoder::new(gz, compression_level);
50-
io::copy(&mut xz_orig, &mut gz)?;
50+
destinations.push(Box::new(flate2::write::GzEncoder::new(
51+
gz,
52+
compression_level,
53+
)));
5154
}
5255

5356
// xz recompression with more aggressive settings than we want to take the time
@@ -60,6 +63,7 @@ impl Context {
6063
// <192MB uncompressed tarballs. In promote-release since we're recompressing
6164
// 100s of tarballs there's no need for each individual compression to be
6265
// parallel.
66+
let xz_recompressed = xz_path.with_extension("xz_recompressed");
6367
if recompress_xz {
6468
let mut filters = xz2::stream::Filters::new();
6569
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
@@ -87,12 +91,32 @@ impl Context {
8791
let stream =
8892
xz2::stream::Stream::new_stream_encoder(&filters, xz2::stream::Check::None)
8993
.unwrap();
90-
let xz_recompressed = xz_path.with_extension("xz_recompressed");
9194
let xz_out = File::create(&xz_recompressed)?;
92-
let mut xz_out =
93-
xz2::write::XzEncoder::new_stream(std::io::BufWriter::new(xz_out), stream);
94-
let mut xz_orig = XzDecoder::new(File::open(xz_path)?);
95-
io::copy(&mut xz_orig, &mut xz_out)?;
95+
destinations.push(Box::new(xz2::write::XzEncoder::new_stream(
96+
std::io::BufWriter::new(xz_out),
97+
stream,
98+
)));
99+
}
100+
101+
// We only decompress once and then write into each of the compressors before
102+
// moving on.
103+
//
104+
// This code assumes that compression with `write_all` will never fail (i.e., we
105+
// can take arbitrary amounts of data as input). That seems like a reasonable
106+
// assumption though.
107+
let mut decompressor = XzDecoder::new(File::open(xz_path)?);
108+
let mut buffer = vec![0u8; 4 * 1024 * 1024];
109+
loop {
110+
let length = decompressor.read(&mut buffer)?;
111+
if length == 0 {
112+
break;
113+
}
114+
for destination in destinations.iter_mut() {
115+
destination.write_all(&buffer[..length])?;
116+
}
117+
}
118+
119+
if recompress_xz {
96120
fs::rename(&xz_recompressed, xz_path)?;
97121
}
98122

0 commit comments

Comments
 (0)