Try to optimize runtime by sorting the recompression list

Mark-Simulacrum · Mark-Simulacrum · commit ebb399934fa9 · 2023-04-09T10:50:04.000-04:00
See comment in code for details. Currently our builds see 23/48 minutes
of the recompression runtime with less than full CPU utilization. The
hope is that this will help cut down on that time, so that we can more
efficiently make use of the available resources.

If this doesn't have any effect we'll consider the FIXME in the code as
well. xz compressed tarballs have metadata on the decompressed size
that's not currently readable by xz2 as well, in case file length is too
poor a proxy.
diff --git a/src/main.rs b/src/main.rs
@@ -391,7 +391,7 @@ impl Context {
         }
 
         // Generate recompressed artifacts from the input set.
-        self.recompress(&to_recompress)?;
+        self.recompress(to_recompress)?;
 
         Ok(())
     }
diff --git a/src/recompress.rs b/src/recompress.rs
@@ -19,7 +19,7 @@ use std::time::Instant;
 use xz2::read::XzDecoder;
 
 impl Context {
-    pub fn recompress(&self, to_recompress: &[PathBuf]) -> anyhow::Result<()> {
+    pub fn recompress(&self, mut to_recompress: Vec<PathBuf>) -> anyhow::Result<()> {
         println!(
             "starting to recompress {} files across {} threads",
             to_recompress.len(),
@@ -36,6 +36,19 @@ impl Context {
         let recompress_xz = self.config.recompress_xz;
         let compression_level = flate2::Compression::new(self.config.gzip_compression_level);
 
+        // Query the length of each file, and sort by length. This puts the smallest files
+        // toward the end of the array, which will generally deprioritize them in the parallel
+        // next parallel loop, avoiding as much of a long-tail on the compression work
+        // (smallest files are fastest to recompress typically).
+        //
+        // FIXME: Rayon's documentation on par_iter isn't very detailed in terms of whether this
+        // does any good. We may want to replace this with our own manual thread pool
+        // implementation that guarantees this property - each task is large enough that just
+        // popping from a single Mutex<Vec<...>> will be plenty fast enough.
+        to_recompress.sort_by_cached_key(|path| {
+            std::cmp::Reverse(fs::metadata(path).map(|m| m.len()).unwrap_or(0))
+        });
+
         to_recompress
             .par_iter()
             .map(|xz_path| {

Original file line number	Diff line number	Diff line change
`@@ -391,7 +391,7 @@ impl Context {`
`391`	`391`	`}`
`392`	`392`
`393`	`393`	`// Generate recompressed artifacts from the input set.`
`394`		`- self.recompress(&to_recompress)?;`
	`394`	`+ self.recompress(to_recompress)?;`
`395`	`395`
`396`	`396`	`Ok(())`
`397`	`397`	`}`