diff --git a/src/bootstrap/src/bin/main.rs b/src/bootstrap/src/bin/main.rs
index e1862a451f2dc..181d71f63c2f5 100644
--- a/src/bootstrap/src/bin/main.rs
+++ b/src/bootstrap/src/bin/main.rs
@@ -8,6 +8,7 @@
 use std::fs::{self, OpenOptions};
 use std::io::{self, BufRead, BufReader, IsTerminal, Write};
 use std::str::FromStr;
+use std::time::Instant;
 use std::{env, process};
 
 use bootstrap::{
@@ -17,11 +18,17 @@ use bootstrap::{
 #[cfg(feature = "tracing")]
 use tracing::instrument;
 
+fn is_bootstrap_profiling_enabled() -> bool {
+    env::var("BOOTSTRAP_PROFILE").is_ok_and(|v| v == "1")
+}
+
 #[cfg_attr(feature = "tracing", instrument(level = "trace", name = "main"))]
 fn main() {
     #[cfg(feature = "tracing")]
     let _guard = setup_tracing();
 
+    let start_time = Instant::now();
+
     let args = env::args().skip(1).collect::<Vec<_>>();
 
     if Flags::try_parse_verbose_help(&args) {
@@ -96,7 +103,8 @@ fn main() {
     let out_dir = config.out.clone();
 
     debug!("creating new build based on config");
-    Build::new(config).build();
+    let mut build = Build::new(config);
+    build.build();
 
     if suggest_setup {
         println!("WARNING: you have not made a `bootstrap.toml`");
@@ -147,6 +155,10 @@ fn main() {
             t!(file.write_all(lines.join("\n").as_bytes()));
         }
     }
+
+    if is_bootstrap_profiling_enabled() {
+        build.report_summary(start_time);
+    }
 }
 
 fn check_version(config: &Config) -> Option<String> {
@@ -226,7 +238,7 @@ fn setup_tracing() -> impl Drop {
     let mut chrome_layer = tracing_chrome::ChromeLayerBuilder::new().include_args(true);
 
     // Writes the Chrome profile to trace-<unix-timestamp>.json if enabled
-    if !env::var("BOOTSTRAP_PROFILE").is_ok_and(|v| v == "1") {
+    if !is_bootstrap_profiling_enabled() {
         chrome_layer = chrome_layer.writer(io::sink());
     }
 
diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs
index 0587d21ecc28d..05965ecc170cd 100644
--- a/src/bootstrap/src/core/build_steps/compile.rs
+++ b/src/bootstrap/src/core/build_steps/compile.rs
@@ -2576,7 +2576,7 @@ pub fn stream_cargo(
     }
 
     // Make sure Cargo actually succeeded after we read all of its stdout.
-    let status = t!(streaming_command.wait());
+    let status = t!(streaming_command.wait(&builder.config.exec_ctx));
     if builder.is_verbose() && !status.success() {
         eprintln!(
             "command did not execute successfully: {cmd:?}\n\
diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs
index ef5c28272b8e1..d9a4da8f3cc01 100644
--- a/src/bootstrap/src/lib.rs
+++ b/src/bootstrap/src/lib.rs
@@ -22,7 +22,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
 use std::fmt::Display;
 use std::path::{Path, PathBuf};
 use std::sync::OnceLock;
-use std::time::SystemTime;
+use std::time::{Instant, SystemTime};
 use std::{env, fs, io, str};
 
 use build_helper::ci::gha;
@@ -1928,6 +1928,10 @@ to download LLVM rather than building it.
     pub fn exec_ctx(&self) -> &ExecutionContext {
         &self.config.exec_ctx
     }
+
+    pub fn report_summary(&self, start_time: Instant) {
+        self.config.exec_ctx.profiler().report_summary(start_time);
+    }
 }
 
 impl AsRef<ExecutionContext> for Build {
diff --git a/src/bootstrap/src/utils/exec.rs b/src/bootstrap/src/utils/exec.rs
index 487077835ac6c..c5bafda88c459 100644
--- a/src/bootstrap/src/utils/exec.rs
+++ b/src/bootstrap/src/utils/exec.rs
@@ -10,13 +10,17 @@
 use std::collections::HashMap;
 use std::ffi::{OsStr, OsString};
 use std::fmt::{Debug, Formatter};
+use std::fs::File;
 use std::hash::Hash;
+use std::io::{BufWriter, Write};
 use std::panic::Location;
 use std::path::Path;
+use std::process;
 use std::process::{
     Child, ChildStderr, ChildStdout, Command, CommandArgs, CommandEnvs, ExitStatus, Output, Stdio,
 };
 use std::sync::{Arc, Mutex};
+use std::time::{Duration, Instant};
 
 use build_helper::ci::CiEnv;
 use build_helper::drop_bomb::DropBomb;
@@ -65,13 +69,157 @@ impl OutputMode {
 }
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
-pub struct CommandCacheKey {
+pub struct CommandFingerprint {
     program: OsString,
     args: Vec<OsString>,
     envs: Vec<(OsString, Option<OsString>)>,
     cwd: Option<PathBuf>,
 }
 
+impl FormatShortCmd for CommandFingerprint {
+    fn format_short_cmd(&self) -> String {
+        let program = Path::new(&self.program);
+        let mut line = vec![program.file_name().unwrap().to_str().unwrap().to_owned()];
+        line.extend(self.args.iter().map(|arg| arg.to_string_lossy().into_owned()));
+        line.extend(self.cwd.iter().map(|p| p.to_string_lossy().into_owned()));
+        line.join(" ")
+    }
+}
+
+#[derive(Default, Clone)]
+pub struct CommandProfile {
+    pub traces: Vec<ExecutionTrace>,
+}
+
+#[derive(Default)]
+pub struct CommandProfiler {
+    stats: Mutex<HashMap<CommandFingerprint, CommandProfile>>,
+}
+
+impl CommandProfiler {
+    pub fn record_execution(&self, key: CommandFingerprint, start_time: Instant) {
+        let mut stats = self.stats.lock().unwrap();
+        let entry = stats.entry(key).or_default();
+        entry.traces.push(ExecutionTrace::Executed { duration: start_time.elapsed() });
+    }
+
+    pub fn record_cache_hit(&self, key: CommandFingerprint) {
+        let mut stats = self.stats.lock().unwrap();
+        let entry = stats.entry(key).or_default();
+        entry.traces.push(ExecutionTrace::CacheHit);
+    }
+
+    pub fn report_summary(&self, start_time: Instant) {
+        let pid = process::id();
+        let filename = format!("bootstrap-profile-{pid}.txt");
+
+        let file = match File::create(&filename) {
+            Ok(f) => f,
+            Err(e) => {
+                eprintln!("Failed to create profiler output file: {e}");
+                return;
+            }
+        };
+
+        let mut writer = BufWriter::new(file);
+        let stats = self.stats.lock().unwrap();
+
+        let mut entries: Vec<_> = stats
+            .iter()
+            .map(|(key, profile)| {
+                let max_duration = profile
+                    .traces
+                    .iter()
+                    .filter_map(|trace| match trace {
+                        ExecutionTrace::Executed { duration, .. } => Some(*duration),
+                        _ => None,
+                    })
+                    .max();
+
+                (key, profile, max_duration)
+            })
+            .collect();
+
+        entries.sort_by(|a, b| b.2.cmp(&a.2));
+
+        let total_bootstrap_duration = start_time.elapsed();
+
+        let total_fingerprints = entries.len();
+        let mut total_cache_hits = 0;
+        let mut total_execution_duration = Duration::ZERO;
+        let mut total_saved_duration = Duration::ZERO;
+
+        for (key, profile, max_duration) in &entries {
+            writeln!(writer, "Command: {:?}", key.format_short_cmd()).unwrap();
+
+            let mut hits = 0;
+            let mut runs = 0;
+            let mut command_total_duration = Duration::ZERO;
+
+            for trace in &profile.traces {
+                match trace {
+                    ExecutionTrace::CacheHit => {
+                        hits += 1;
+                    }
+                    ExecutionTrace::Executed { duration, .. } => {
+                        runs += 1;
+                        command_total_duration += *duration;
+                    }
+                }
+            }
+
+            total_cache_hits += hits;
+            total_execution_duration += command_total_duration;
+            // This makes sense only in our current setup, where:
+            // - If caching is enabled, we record the timing for the initial execution,
+            //   and all subsequent runs will be cache hits.
+            // - If caching is disabled or unused, there will be no cache hits,
+            //   and we'll record timings for all executions.
+            total_saved_duration += command_total_duration * hits as u32;
+
+            let command_vs_bootstrap = if total_bootstrap_duration > Duration::ZERO {
+                100.0 * command_total_duration.as_secs_f64()
+                    / total_bootstrap_duration.as_secs_f64()
+            } else {
+                0.0
+            };
+
+            let duration_str = match max_duration {
+                Some(d) => format!("{d:.2?}"),
+                None => "-".into(),
+            };
+
+            writeln!(
+                writer,
+                "Summary: {runs} run(s), {hits} hit(s), max_duration={duration_str} total_duration: {command_total_duration:.2?} ({command_vs_bootstrap:.2?}% of total)\n"
+            )
+            .unwrap();
+        }
+
+        let overhead_time = total_bootstrap_duration
+            .checked_sub(total_execution_duration)
+            .unwrap_or(Duration::ZERO);
+
+        writeln!(writer, "\n=== Aggregated Summary ===").unwrap();
+        writeln!(writer, "Total unique commands (fingerprints): {total_fingerprints}").unwrap();
+        writeln!(writer, "Total time spent in command executions: {total_execution_duration:.2?}")
+            .unwrap();
+        writeln!(writer, "Total bootstrap time: {total_bootstrap_duration:.2?}").unwrap();
+        writeln!(writer, "Time spent outside command executions: {overhead_time:.2?}").unwrap();
+        writeln!(writer, "Total cache hits: {total_cache_hits}").unwrap();
+        writeln!(writer, "Estimated time saved due to cache hits: {total_saved_duration:.2?}")
+            .unwrap();
+
+        println!("Command profiler report saved to {filename}");
+    }
+}
+
+#[derive(Clone)]
+pub enum ExecutionTrace {
+    CacheHit,
+    Executed { duration: Duration },
+}
+
 /// Wrapper around `std::process::Command`.
 ///
 /// By default, the command will exit bootstrap if it fails.
@@ -244,12 +392,9 @@ impl<'a> BootstrapCommand {
         }
     }
 
-    pub fn cache_key(&self) -> Option<CommandCacheKey> {
-        if !self.should_cache {
-            return None;
-        }
+    pub fn fingerprint(&self) -> CommandFingerprint {
         let command = &self.command;
-        Some(CommandCacheKey {
+        CommandFingerprint {
             program: command.get_program().into(),
             args: command.get_args().map(OsStr::to_os_string).collect(),
             envs: command
@@ -257,7 +402,7 @@ impl<'a> BootstrapCommand {
                 .map(|(k, v)| (k.to_os_string(), v.map(|val| val.to_os_string())))
                 .collect(),
             cwd: command.get_current_dir().map(Path::to_path_buf),
-        })
+        }
     }
 }
 
@@ -402,7 +547,6 @@ impl Default for CommandOutput {
 
 /// Helper trait to format both Command and BootstrapCommand as a short execution line,
 /// without all the other details (e.g. environment variables).
-#[cfg(feature = "tracing")]
 pub trait FormatShortCmd {
     fn format_short_cmd(&self) -> String;
 }
@@ -431,11 +575,12 @@ pub struct ExecutionContext {
     pub fail_fast: bool,
     delayed_failures: Arc<Mutex<Vec<String>>>,
     command_cache: Arc<CommandCache>,
+    profiler: Arc<CommandProfiler>,
 }
 
 #[derive(Default)]
 pub struct CommandCache {
-    cache: Mutex<HashMap<CommandCacheKey, CommandOutput>>,
+    cache: Mutex<HashMap<CommandFingerprint, CommandOutput>>,
 }
 
 enum CommandState<'a> {
@@ -446,7 +591,8 @@ enum CommandState<'a> {
         stdout: OutputMode,
         stderr: OutputMode,
         executed_at: &'a Location<'a>,
-        cache_key: Option<CommandCacheKey>,
+        fingerprint: CommandFingerprint,
+        start_time: Instant,
     },
 }
 
@@ -454,6 +600,8 @@ pub struct StreamingCommand {
     child: Child,
     pub stdout: Option<ChildStdout>,
     pub stderr: Option<ChildStderr>,
+    fingerprint: CommandFingerprint,
+    start_time: Instant,
 }
 
 #[must_use]
@@ -462,11 +610,11 @@ pub struct DeferredCommand<'a> {
 }
 
 impl CommandCache {
-    pub fn get(&self, key: &CommandCacheKey) -> Option<CommandOutput> {
+    pub fn get(&self, key: &CommandFingerprint) -> Option<CommandOutput> {
         self.cache.lock().unwrap().get(key).cloned()
     }
 
-    pub fn insert(&self, key: CommandCacheKey, output: CommandOutput) {
+    pub fn insert(&self, key: CommandFingerprint, output: CommandOutput) {
         self.cache.lock().unwrap().insert(key, output);
     }
 }
@@ -483,6 +631,10 @@ impl ExecutionContext {
         }
     }
 
+    pub fn profiler(&self) -> &CommandProfiler {
+        &self.profiler
+    }
+
     pub fn get_dry_run(&self) -> &DryRun {
         &self.dry_run
     }
@@ -539,12 +691,12 @@ impl ExecutionContext {
         stdout: OutputMode,
         stderr: OutputMode,
     ) -> DeferredCommand<'a> {
-        let cache_key = command.cache_key();
+        let fingerprint = command.fingerprint();
 
-        if let Some(cached_output) = cache_key.as_ref().and_then(|key| self.command_cache.get(key))
-        {
+        if let Some(cached_output) = self.command_cache.get(&fingerprint) {
             command.mark_as_executed();
             self.verbose(|| println!("Cache hit: {command:?}"));
+            self.profiler.record_cache_hit(fingerprint);
             return DeferredCommand { state: CommandState::Cached(cached_output) };
         }
 
@@ -559,7 +711,8 @@ impl ExecutionContext {
                     stdout,
                     stderr,
                     executed_at,
-                    cache_key,
+                    fingerprint,
+                    start_time: Instant::now(),
                 },
             };
         }
@@ -575,6 +728,8 @@ impl ExecutionContext {
         cmd.stdout(stdout.stdio());
         cmd.stderr(stderr.stdio());
 
+        let start_time = Instant::now();
+
         let child = cmd.spawn();
 
         DeferredCommand {
@@ -584,7 +739,8 @@ impl ExecutionContext {
                 stdout,
                 stderr,
                 executed_at,
-                cache_key,
+                fingerprint,
+                start_time,
             },
         }
     }
@@ -638,6 +794,8 @@ impl ExecutionContext {
         if !command.run_in_dry_run && self.dry_run() {
             return None;
         }
+        let start_time = Instant::now();
+        let fingerprint = command.fingerprint();
         let cmd = &mut command.command;
         cmd.stdout(stdout.stdio());
         cmd.stderr(stderr.stdio());
@@ -649,7 +807,7 @@ impl ExecutionContext {
 
         let stdout = child.stdout.take();
         let stderr = child.stderr.take();
-        Some(StreamingCommand { child, stdout, stderr })
+        Some(StreamingCommand { child, stdout, stderr, fingerprint, start_time })
     }
 }
 
@@ -660,8 +818,14 @@ impl AsRef<ExecutionContext> for ExecutionContext {
 }
 
 impl StreamingCommand {
-    pub fn wait(mut self) -> Result<ExitStatus, std::io::Error> {
-        self.child.wait()
+    pub fn wait(
+        mut self,
+        exec_ctx: impl AsRef<ExecutionContext>,
+    ) -> Result<ExitStatus, std::io::Error> {
+        let exec_ctx = exec_ctx.as_ref();
+        let output = self.child.wait();
+        exec_ctx.profiler().record_execution(self.fingerprint, self.start_time);
+        output
     }
 }
 
@@ -669,16 +833,26 @@ impl<'a> DeferredCommand<'a> {
     pub fn wait_for_output(self, exec_ctx: impl AsRef<ExecutionContext>) -> CommandOutput {
         match self.state {
             CommandState::Cached(output) => output,
-            CommandState::Deferred { process, command, stdout, stderr, executed_at, cache_key } => {
+            CommandState::Deferred {
+                process,
+                command,
+                stdout,
+                stderr,
+                executed_at,
+                fingerprint,
+                start_time,
+            } => {
                 let exec_ctx = exec_ctx.as_ref();
 
                 let output =
                     Self::finish_process(process, command, stdout, stderr, executed_at, exec_ctx);
 
                 if (!exec_ctx.dry_run() || command.run_in_dry_run)
-                    && let (Some(cache_key), Some(_)) = (&cache_key, output.status())
+                    && output.status().is_some()
+                    && command.should_cache
                 {
-                    exec_ctx.command_cache.insert(cache_key.clone(), output.clone());
+                    exec_ctx.command_cache.insert(fingerprint.clone(), output.clone());
+                    exec_ctx.profiler.record_execution(fingerprint.clone(), start_time);
                 }
 
                 output
diff --git a/src/bootstrap/src/utils/render_tests.rs b/src/bootstrap/src/utils/render_tests.rs
index 934699d736b46..40006aca5c53f 100644
--- a/src/bootstrap/src/utils/render_tests.rs
+++ b/src/bootstrap/src/utils/render_tests.rs
@@ -63,7 +63,7 @@ fn run_tests(builder: &Builder<'_>, cmd: &mut BootstrapCommand, stream: bool) ->
         renderer.render_all();
     }
 
-    let status = streaming_command.wait().unwrap();
+    let status = streaming_command.wait(&builder.config.exec_ctx).unwrap();
     if !status.success() && builder.is_verbose() {
         println!(
             "\n\ncommand did not execute successfully: {cmd:?}\n\
diff --git a/src/doc/rustc-dev-guide/src/building/bootstrapping/debugging-bootstrap.md b/src/doc/rustc-dev-guide/src/building/bootstrapping/debugging-bootstrap.md
index ed26785040147..c9c0d64a604e2 100644
--- a/src/doc/rustc-dev-guide/src/building/bootstrapping/debugging-bootstrap.md
+++ b/src/doc/rustc-dev-guide/src/building/bootstrapping/debugging-bootstrap.md
@@ -168,10 +168,17 @@ For `#[instrument]`, it's recommended to:
 
 ### Profiling bootstrap
 
-You can use the `COMMAND` tracing target to trace execution of most commands spawned by bootstrap. If you also use the `BOOTSTRAP_PROFILE=1` environment variable, bootstrap will generate a Chrome JSON trace file, which can be visualized in Chrome's `chrome://tracing` page or on https://ui.perfetto.dev.
+You can set the `BOOTSTRAP_PROFILE=1` environment variable to enable command execution profiling during bootstrap. This generates:
+
+* A Chrome trace file (for visualization in `chrome://tracing` or [Perfetto](https://ui.perfetto.dev)) if tracing is enabled via `BOOTSTRAP_TRACING=COMMAND=trace`
+* A plain-text summary file, `bootstrap-profile-{pid}.txt`, listing all commands sorted by execution time (slowest first), along with cache hits and working directories
+
+Note: the `.txt` report is always generated when `BOOTSTRAP_PROFILE=1` is set — tracing is not required.
+
+Example usage:
 
 ```bash
-$ BOOTSTRAP_TRACING=COMMAND=trace BOOTSTRAP_PROFILE=1 ./x build library
+$ BOOTSTRAP_PROFILE=1 BOOTSTRAP_TRACING=COMMAND=trace ./x build library
 ```
 
 ### rust-analyzer integration?