From 1bbabb7ff744f18edaf3da30f841a287de2077d6 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 4 Apr 2024 23:36:13 +0100 Subject: [PATCH 01/38] std::net: adding `unix_socket_exclbind` feature for solaris/illumos. allows to have a tigher control over the binding exclusivness of the socket. --- library/std/src/os/illumos/mod.rs | 1 + library/std/src/os/illumos/net.rs | 50 +++++++++++++++++++ library/std/src/os/solaris/mod.rs | 1 + library/std/src/os/solaris/net.rs | 50 +++++++++++++++++++ .../std/src/sys/net/connection/socket/unix.rs | 15 ++++++ 5 files changed, 117 insertions(+) create mode 100644 library/std/src/os/illumos/net.rs create mode 100644 library/std/src/os/solaris/net.rs diff --git a/library/std/src/os/illumos/mod.rs b/library/std/src/os/illumos/mod.rs index e61926f89356a..5fbe352e7cfbd 100644 --- a/library/std/src/os/illumos/mod.rs +++ b/library/std/src/os/illumos/mod.rs @@ -3,4 +3,5 @@ #![stable(feature = "raw_ext", since = "1.1.0")] pub mod fs; +pub mod net; pub mod raw; diff --git a/library/std/src/os/illumos/net.rs b/library/std/src/os/illumos/net.rs new file mode 100644 index 0000000000000..5ef4e1ec89e36 --- /dev/null +++ b/library/std/src/os/illumos/net.rs @@ -0,0 +1,50 @@ +//! illumos-specific networking functionality. + +#![unstable(feature = "unix_socket_exclbind", issue = "123481")] + +use crate::io; +use crate::os::unix::net; +use crate::sealed::Sealed; +use crate::sys_common::AsInner; + +/// illumos-specific functionality for `AF_UNIX` sockets [`UnixDatagram`] +/// and [`UnixStream`]. +/// +/// [`UnixDatagram`]: net::UnixDatagram +/// [`UnixStream`]: net::UnixStream +#[unstable(feature = "unix_socket_exclbind", issue = "123481")] +pub trait UnixSocketExt: Sealed { + /// Enables exclusive binding on the socket. + /// + /// If true and if the socket had been set with `SO_REUSEADDR`, + /// it neutralises its effect. + /// See [`man 3 tcp`](https://docs.oracle.com/cd/E88353_01/html/E37843/setsockopt-3c.html) + #[unstable(feature = "unix_socket_exclbind", issue = "123481")] + fn so_exclbind(&self, excl: bool) -> io::Result<()>; + + /// Get the bind exclusivity bind state of the socket. + #[unstable(feature = "unix_socket_exclbind", issue = "123481")] + fn exclbind(&self) -> io::Result; +} + +#[unstable(feature = "unix_socket_exclbind", issue = "123481")] +impl UnixSocketExt for net::UnixDatagram { + fn exclbind(&self) -> io::Result { + self.as_inner().exclbind() + } + + fn so_exclbind(&self, excl: bool) -> io::Result<()> { + self.as_inner().set_exclbind(excl) + } +} + +#[unstable(feature = "unix_socket_exclbind", issue = "123481")] +impl UnixSocketExt for net::UnixStream { + fn exclbind(&self) -> io::Result { + self.as_inner().exclbind() + } + + fn so_exclbind(&self, excl: bool) -> io::Result<()> { + self.as_inner().set_exclbind(excl) + } +} diff --git a/library/std/src/os/solaris/mod.rs b/library/std/src/os/solaris/mod.rs index e4cfd53291a6e..b4e836208491c 100644 --- a/library/std/src/os/solaris/mod.rs +++ b/library/std/src/os/solaris/mod.rs @@ -3,4 +3,5 @@ #![stable(feature = "raw_ext", since = "1.1.0")] pub mod fs; +pub mod net; pub mod raw; diff --git a/library/std/src/os/solaris/net.rs b/library/std/src/os/solaris/net.rs new file mode 100644 index 0000000000000..ca841f15b0e72 --- /dev/null +++ b/library/std/src/os/solaris/net.rs @@ -0,0 +1,50 @@ +//! solaris-specific networking functionality. + +#![unstable(feature = "unix_socket_exclbind", issue = "123481")] + +use crate::io; +use crate::os::unix::net; +use crate::sealed::Sealed; +use crate::sys_common::AsInner; + +/// solaris-specific functionality for `AF_UNIX` sockets [`UnixDatagram`] +/// and [`UnixStream`]. +/// +/// [`UnixDatagram`]: net::UnixDatagram +/// [`UnixStream`]: net::UnixStream +#[unstable(feature = "unix_socket_exclbind", issue = "123481")] +pub trait UnixSocketExt: Sealed { + /// Enables exclusive binding on the socket. + /// + /// If true and if the socket had been set with `SO_REUSEADDR`, + /// it neutralises its effect. + /// See [`man 3 tcp`](https://docs.oracle.com/cd/E88353_01/html/E37843/setsockopt-3c.html) + #[unstable(feature = "unix_socket_exclbind", issue = "123481")] + fn so_exclbind(&self, excl: bool) -> io::Result<()>; + + /// Get the bind exclusivity bind state of the socket. + #[unstable(feature = "unix_socket_exclbind", issue = "123481")] + fn exclbind(&self) -> io::Result; +} + +#[unstable(feature = "unix_socket_exclbind", issue = "123481")] +impl UnixSocketExt for net::UnixDatagram { + fn exclbind(&self) -> io::Result { + self.as_inner().exclbind() + } + + fn so_exclbind(&self, excl: bool) -> io::Result<()> { + self.as_inner().set_exclbind(excl) + } +} + +#[unstable(feature = "unix_socket_exclbind", issue = "123481")] +impl UnixSocketExt for net::UnixStream { + fn exclbind(&self) -> io::Result { + self.as_inner().exclbind() + } + + fn so_exclbind(&self, excl: bool) -> io::Result<()> { + self.as_inner().set_exclbind(excl) + } +} diff --git a/library/std/src/sys/net/connection/socket/unix.rs b/library/std/src/sys/net/connection/socket/unix.rs index b35d5d2aa8418..37d4078b9639b 100644 --- a/library/std/src/sys/net/connection/socket/unix.rs +++ b/library/std/src/sys/net/connection/socket/unix.rs @@ -514,6 +514,21 @@ impl Socket { Ok(name) } + #[cfg(any(target_os = "solaris", target_os = "illumos"))] + pub fn set_exclbind(&self, excl: bool) -> io::Result<()> { + // not yet on libc crate + const SO_EXCLBIND: i32 = 0x1015; + setsockopt(self, libc::SOL_SOCKET, SO_EXCLBIND, excl) + } + + #[cfg(any(target_os = "solaris", target_os = "illumos"))] + pub fn exclbind(&self) -> io::Result { + // not yet on libc crate + const SO_EXCLBIND: i32 = 0x1015; + let raw: c_int = getsockopt(self, libc::SOL_SOCKET, SO_EXCLBIND)?; + Ok(raw != 0) + } + #[cfg(any(target_os = "android", target_os = "linux",))] pub fn set_passcred(&self, passcred: bool) -> io::Result<()> { setsockopt(self, libc::SOL_SOCKET, libc::SO_PASSCRED, passcred as libc::c_int) From de213b9f351a469e2a0eabe7fd70ffbe83935bc2 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Tue, 17 Jun 2025 04:55:16 +0000 Subject: [PATCH 02/38] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 90c6e31e9d119..69a56ffdc9b3f 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -0cbc0764380630780a275c437260e4d4d5f28c92 +55d436467c351b56253deeba209ae2553d1c243f From fc7480a1327ec583aafccb7ec0cccb685652da7e Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Wed, 18 Jun 2025 04:55:52 +0000 Subject: [PATCH 03/38] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 69a56ffdc9b3f..6a74011a36ff0 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -55d436467c351b56253deeba209ae2553d1c243f +77ec48f5642ee1aa451d270f11f308c297f55f76 From 2701dbb8e75dac68b33a3e7cd13673b3a66c9e15 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Tue, 27 May 2025 22:31:23 +0200 Subject: [PATCH 04/38] minimal ptrace setup Apply suggestions from code review Co-authored-by: Oli Scherer review comments fix possible hang --- src/tools/miri/Cargo.lock | 178 +++++++++++- src/tools/miri/Cargo.toml | 5 + src/tools/miri/README.md | 5 + src/tools/miri/src/alloc/isolated_alloc.rs | 12 + src/tools/miri/src/bin/miri.rs | 15 +- src/tools/miri/src/diagnostics.rs | 27 ++ src/tools/miri/src/eval.rs | 3 + src/tools/miri/src/lib.rs | 3 + src/tools/miri/src/shims/mod.rs | 2 + src/tools/miri/src/shims/native_lib.rs | 130 +++++++-- src/tools/miri/src/shims/trace/child.rs | 238 ++++++++++++++++ src/tools/miri/src/shims/trace/messages.rs | 57 ++++ src/tools/miri/src/shims/trace/mod.rs | 31 +++ src/tools/miri/src/shims/trace/parent.rs | 263 ++++++++++++++++++ .../native-lib/pass/ptr_read_access.stderr | 4 +- .../native-lib/pass/ptr_write_access.stderr | 4 +- 16 files changed, 938 insertions(+), 39 deletions(-) create mode 100644 src/tools/miri/src/shims/trace/child.rs create mode 100644 src/tools/miri/src/shims/trace/messages.rs create mode 100644 src/tools/miri/src/shims/trace/mod.rs create mode 100644 src/tools/miri/src/shims/trace/parent.rs diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock index 192d4f444c2ff..95124ac6fef43 100644 --- a/src/tools/miri/Cargo.lock +++ b/src/tools/miri/Cargo.lock @@ -80,6 +80,15 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "2.9.0" @@ -150,6 +159,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.40" @@ -224,7 +239,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -243,7 +258,7 @@ dependencies = [ "libc", "once_cell", "unicode-width 0.2.0", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -298,7 +313,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -314,7 +329,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -333,6 +348,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "generic-array" version = "0.14.7" @@ -400,6 +421,25 @@ dependencies = [ "generic-array", ] +[[package]] +name = "ipc-channel" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8251fb7bcd9ccd3725ed8deae9fe7db8e586495c9eb5b0c52e6233e5e75ea" +dependencies = [ + "bincode", + "crossbeam-channel", + "fnv", + "lazy_static", + "libc", + "mio", + "rand 0.8.5", + "serde", + "tempfile", + "uuid", + "windows", +] + [[package]] name = "itoa" version = "1.0.15" @@ -533,6 +573,18 @@ dependencies = [ "adler", ] +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "log", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.52.0", +] + [[package]] name = "miri" version = "0.1.0" @@ -544,19 +596,34 @@ dependencies = [ "colored", "directories", "getrandom 0.3.2", + "ipc-channel", "libc", "libffi", "libloading", "measureme", + "nix", "rand 0.9.0", "regex", "rustc_version", + "serde", "smallvec", "tempfile", "tikv-jemalloc-sys", "ui_test", ] +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -748,6 +815,8 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ + "libc", + "rand_chacha 0.3.1", "rand_core 0.6.4", ] @@ -757,11 +826,21 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ - "rand_chacha", + "rand_chacha 0.9.0", "rand_core 0.9.3", "zerocopy", ] +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + [[package]] name = "rand_chacha" version = "0.9.0" @@ -777,6 +856,9 @@ name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.15", +] [[package]] name = "rand_core" @@ -879,7 +961,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -993,7 +1075,7 @@ dependencies = [ "getrandom 0.3.2", "once_cell", "rustix", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -1147,6 +1229,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "uuid" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +dependencies = [ + "getrandom 0.3.2", +] + [[package]] name = "valuable" version = "0.1.1" @@ -1241,6 +1332,79 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-strings", + "windows-targets", +] + +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml index a314488bb2533..76018efc4a63b 100644 --- a/src/tools/miri/Cargo.toml +++ b/src/tools/miri/Cargo.toml @@ -40,6 +40,11 @@ libc = "0.2" libffi = "4.0.0" libloading = "0.8" +[target.'cfg(target_os = "linux")'.dependencies] +nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"] } +ipc-channel = "0.19.0" +serde = { version = "1.0.219", features = ["derive"] } + [dev-dependencies] ui_test = "0.29.1" colored = "2" diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md index 5554c7975ff3b..dead4931f2b15 100644 --- a/src/tools/miri/README.md +++ b/src/tools/miri/README.md @@ -419,6 +419,11 @@ to Miri failing to detect cases of undefined behavior in a program. Finally, the flag is **unsound** in the sense that Miri stops tracking details such as initialization and provenance on memory shared with native code, so it is easily possible to write code that has UB which is missed by Miri. +* `-Zmiri-force-old-native-lib-mode` disables the WIP improved native code access tracking. If for + whatever reason enabling native calls leads to odd behaviours or causes Miri to panic, disabling + the tracer *might* fix this. This will likely be removed once the tracer has been adequately + battle-tested. Note that this flag is only meaningful on Linux systems; other Unixes (currently) + exclusively use the old native-lib code. * `-Zmiri-measureme=` enables `measureme` profiling for the interpreted program. This can be used to find which parts of your program are executing slowly under Miri. The profile is written out to a file inside a directory called ``, and can be processed diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index 3a7879f372abc..123597ed88705 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -266,6 +266,18 @@ impl IsolatedAlloc { alloc::dealloc(ptr, layout); } } + + /// Returns a vector of page addresses managed by the allocator. + pub fn pages(&self) -> Vec { + let mut pages: Vec<_> = + self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance()).collect(); + for (ptr, size) in &self.huge_ptrs { + for i in 0..size / self.page_size { + pages.push(ptr.expose_provenance().strict_add(i * self.page_size)); + } + } + pages + } } #[cfg(test)] diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index d4ba7fbd6a479..d410d7bcc8757 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -227,10 +227,11 @@ impl rustc_driver::Callbacks for MiriCompilerCalls { } else { let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None) .unwrap_or_else(|| { + #[cfg(target_os = "linux")] + miri::register_retcode_sv(rustc_driver::EXIT_FAILURE); tcx.dcx().abort_if_errors(); rustc_driver::EXIT_FAILURE }); - std::process::exit(return_code); } @@ -722,6 +723,8 @@ fn main() { } else { show_error!("-Zmiri-native-lib `{}` does not exist", filename); } + } else if arg == "-Zmiri-force-old-native-lib-mode" { + miri_config.force_old_native_lib = true; } else if let Some(param) = arg.strip_prefix("-Zmiri-num-cpus=") { let num_cpus = param .parse::() @@ -792,6 +795,16 @@ fn main() { debug!("rustc arguments: {:?}", rustc_args); debug!("crate arguments: {:?}", miri_config.args); + #[cfg(target_os = "linux")] + if !miri_config.native_lib.is_empty() && !miri_config.force_old_native_lib { + // FIXME: This should display a diagnostic / warning on error + // SAFETY: If any other threads exist at this point (namely for the ctrlc + // handler), they will not interact with anything on the main rustc/Miri + // thread in an async-signal-unsafe way such as by accessing shared + // semaphores, etc.; the handler only calls `sleep()` and `exit()`, which + // are async-signal-safe, as is accessing atomics + let _ = unsafe { miri::init_sv() }; + } run_compiler_and_exit( &rustc_args, &mut MiriCompilerCalls::new(miri_config, many_seeds, genmc_config), diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs index 54a7c1407eaa2..095f4ba8c86e9 100644 --- a/src/tools/miri/src/diagnostics.rs +++ b/src/tools/miri/src/diagnostics.rs @@ -133,6 +133,7 @@ pub enum NonHaltingDiagnostic { details: bool, }, NativeCallSharedMem, + NativeCallNoTrace, WeakMemoryOutdatedLoad { ptr: Pointer, }, @@ -629,6 +630,8 @@ impl<'tcx> MiriMachine<'tcx> { Int2Ptr { .. } => ("integer-to-pointer cast".to_string(), DiagLevel::Warning), NativeCallSharedMem => ("sharing memory with a native function".to_string(), DiagLevel::Warning), + NativeCallNoTrace => + ("unable to trace native code memory accesses".to_string(), DiagLevel::Warning), ExternTypeReborrow => ("reborrow of reference to `extern type`".to_string(), DiagLevel::Warning), CreatedPointerTag(..) @@ -664,6 +667,10 @@ impl<'tcx> MiriMachine<'tcx> { format!("progress report: current operation being executed is here"), Int2Ptr { .. } => format!("integer-to-pointer cast"), NativeCallSharedMem => format!("sharing memory with a native function called via FFI"), + NativeCallNoTrace => + format!( + "sharing memory with a native function called via FFI, and unable to use ptrace" + ), WeakMemoryOutdatedLoad { ptr } => format!("weak memory emulation: outdated value returned from load at {ptr}"), ExternTypeReborrow => @@ -710,6 +717,22 @@ impl<'tcx> MiriMachine<'tcx> { v } NativeCallSharedMem => { + vec![ + note!( + "when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis" + ), + note!( + "in particular, Miri assumes that the native call initializes all memory it has written to" + ), + note!( + "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory" + ), + note!( + "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free" + ), + ] + } + NativeCallNoTrace => { vec![ note!( "when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory" @@ -723,6 +746,10 @@ impl<'tcx> MiriMachine<'tcx> { note!( "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free" ), + #[cfg(target_os = "linux")] + note!( + "this is normally partially mitigated, but either -Zmiri-force-old-native-lib-mode was passed or ptrace is disabled on your system" + ), ] } ExternTypeReborrow => { diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs index 7a5f96ec1779d..0ad20cefb9225 100644 --- a/src/tools/miri/src/eval.rs +++ b/src/tools/miri/src/eval.rs @@ -150,6 +150,8 @@ pub struct MiriConfig { pub retag_fields: RetagFields, /// The location of the shared object files to load when calling external functions pub native_lib: Vec, + /// Whether to force using the old native lib behaviour even if ptrace might be supported. + pub force_old_native_lib: bool, /// Run a garbage collector for BorTags every N basic blocks. pub gc_interval: u32, /// The number of CPUs to be reported by miri. @@ -199,6 +201,7 @@ impl Default for MiriConfig { report_progress: None, retag_fields: RetagFields::Yes, native_lib: vec![], + force_old_native_lib: false, gc_interval: 10_000, num_cpus: 1, page_size: None, diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index e96c81d5b1d16..b1c15b006f702 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -99,6 +99,9 @@ pub use rustc_const_eval::interpret::{self, AllocMap, Provenance as _}; use rustc_middle::{bug, span_bug}; use tracing::{info, trace}; +#[cfg(target_os = "linux")] +pub use crate::shims::trace::{init_sv, register_retcode_sv}; + // Type aliases that set the provenance parameter. pub type Pointer = interpret::Pointer>; pub type StrictPointer = interpret::Pointer; diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs index b498551ace34c..e7b0a784c2701 100644 --- a/src/tools/miri/src/shims/mod.rs +++ b/src/tools/miri/src/shims/mod.rs @@ -19,6 +19,8 @@ pub mod os_str; pub mod panic; pub mod time; pub mod tls; +#[cfg(target_os = "linux")] +pub mod trace; pub use self::files::FdTable; pub use self::unix::{DirTable, EpollInterestTable}; diff --git a/src/tools/miri/src/shims/native_lib.rs b/src/tools/miri/src/shims/native_lib.rs index 030c2e3672169..53d1de060f423 100644 --- a/src/tools/miri/src/shims/native_lib.rs +++ b/src/tools/miri/src/shims/native_lib.rs @@ -1,5 +1,7 @@ //! Implements calling functions from a native library. use std::ops::Deref; +#[cfg(target_os = "linux")] +use std::{cell::RefCell, rc::Rc}; use libffi::high::call as ffi; use libffi::low::CodePtr; @@ -8,8 +10,16 @@ use rustc_middle::mir::interpret::Pointer; use rustc_middle::ty::{self as ty, IntTy, UintTy}; use rustc_span::Symbol; +#[cfg(target_os = "linux")] +use crate::alloc::isolated_alloc::IsolatedAlloc; use crate::*; +#[cfg(target_os = "linux")] +type CallResult<'tcx> = + InterpResult<'tcx, (ImmTy<'tcx>, Option)>; +#[cfg(not(target_os = "linux"))] +type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option)>; + impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {} trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { /// Call native host function and return the output as an immediate. @@ -19,8 +29,13 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { dest: &MPlaceTy<'tcx>, ptr: CodePtr, libffi_args: Vec>, - ) -> InterpResult<'tcx, ImmTy<'tcx>> { + ) -> CallResult<'tcx> { let this = self.eval_context_mut(); + #[cfg(target_os = "linux")] + let alloc = this.machine.allocator.clone(); + #[cfg(not(target_os = "linux"))] + let alloc = (); + let maybe_memevents; // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value // as the specified primitive integer type @@ -30,60 +45,71 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { // Unsafe because of the call to native code. // Because this is calling a C function it is not necessarily sound, // but there is no way around this and we've checked as much as we can. - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_i8(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_i8(x.0) } ty::Int(IntTy::I16) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_i16(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_i16(x.0) } ty::Int(IntTy::I32) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_i32(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_i32(x.0) } ty::Int(IntTy::I64) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_i64(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_i64(x.0) } ty::Int(IntTy::Isize) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_target_isize(x.try_into().unwrap(), this) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_target_isize(x.0.try_into().unwrap(), this) } // uints ty::Uint(UintTy::U8) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_u8(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_u8(x.0) } ty::Uint(UintTy::U16) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_u16(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_u16(x.0) } ty::Uint(UintTy::U32) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_u32(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_u32(x.0) } ty::Uint(UintTy::U64) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_u64(x) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_u64(x.0) } ty::Uint(UintTy::Usize) => { - let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; - Scalar::from_target_usize(x.try_into().unwrap(), this) + let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + Scalar::from_target_usize(x.0.try_into().unwrap(), this) } // Functions with no declared return type (i.e., the default return) // have the output_type `Tuple([])`. ty::Tuple(t_list) if (*t_list).deref().is_empty() => { - unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) }; - return interp_ok(ImmTy::uninit(dest.layout)); + let (_, mm) = unsafe { do_native_call::<()>(ptr, libffi_args.as_slice(), alloc) }; + return interp_ok((ImmTy::uninit(dest.layout), mm)); } ty::RawPtr(..) => { - let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) }; - let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr())); + let x = unsafe { do_native_call::<*const ()>(ptr, libffi_args.as_slice(), alloc) }; + maybe_memevents = x.1; + let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.0.addr())); Scalar::from_pointer(ptr, this) } _ => throw_unsup_format!("unsupported return type for native call: {:?}", link_name), }; - interp_ok(ImmTy::from_scalar(scalar, dest.layout)) + interp_ok((ImmTy::from_scalar(scalar, dest.layout), maybe_memevents)) } /// Get the pointer to the function of the specified name in the shared object file, @@ -179,6 +205,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // The first time this happens, print a warning. if !this.machine.native_call_mem_warned.replace(true) { // Newly set, so first time we get here. + #[cfg(target_os = "linux")] + if shims::trace::Supervisor::poll() { + this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem); + } else { + this.emit_diagnostic(NonHaltingDiagnostic::NativeCallNoTrace); + } + #[cfg(not(target_os = "linux"))] this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem); } @@ -196,12 +229,55 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { .collect::>>(); // Call the function and store output, depending on return type in the function signature. - let ret = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; + let (ret, _) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; + this.write_immediate(*ret, dest)?; interp_ok(true) } } +/// Performs the actual native call, returning the result and the events that +/// the supervisor detected (if any). +/// +/// SAFETY: See `libffi::fii::call`. +#[cfg(target_os = "linux")] +unsafe fn do_native_call( + ptr: CodePtr, + args: &[ffi::Arg<'_>], + alloc: Option>>, +) -> (T, Option) { + use shims::trace::Supervisor; + + unsafe { + if let Some(alloc) = alloc { + // SAFETY: We don't touch the machine memory past this point + let (guard, stack_ptr) = Supervisor::start_ffi(alloc.clone()); + // SAFETY: Upheld by caller + let ret = ffi::call(ptr, args); + // SAFETY: We got the guard and stack pointer from start_ffi, and + // the allocator is the same + (ret, Supervisor::end_ffi(guard, alloc, stack_ptr)) + } else { + // SAFETY: Upheld by caller + (ffi::call(ptr, args), None) + } + } +} + +/// Performs the actual native call, returning the result and a `None`. +/// Placeholder for platforms that do not support the ptrace supervisor. +/// +/// SAFETY: See `libffi::fii::call`. +#[cfg(not(target_os = "linux"))] +#[inline(always)] +unsafe fn do_native_call( + ptr: CodePtr, + args: &[ffi::Arg<'_>], + _alloc: (), +) -> (T, Option) { + (unsafe { ffi::call(ptr, args) }, None) +} + #[derive(Debug, Clone)] /// Enum of supported arguments to external C functions. // We introduce this enum instead of just calling `ffi::arg` and storing a list diff --git a/src/tools/miri/src/shims/trace/child.rs b/src/tools/miri/src/shims/trace/child.rs new file mode 100644 index 0000000000000..dcfdaad748e40 --- /dev/null +++ b/src/tools/miri/src/shims/trace/child.rs @@ -0,0 +1,238 @@ +use std::cell::RefCell; +use std::rc::Rc; + +use ipc_channel::ipc; +use nix::sys::{ptrace, signal}; +use nix::unistd; + +use super::messages::{Confirmation, MemEvents, TraceRequest}; +use super::parent::{ChildListener, sv_loop}; +use super::{FAKE_STACK_SIZE, StartFfiInfo}; +use crate::alloc::isolated_alloc::IsolatedAlloc; + +static SUPERVISOR: std::sync::Mutex> = std::sync::Mutex::new(None); + +/// The main means of communication between the child and parent process, +/// allowing the former to send requests and get info from the latter. +pub struct Supervisor { + /// Sender for FFI-mode-related requests. + message_tx: ipc::IpcSender, + /// Used for synchronisation, allowing us to receive confirmation that the + /// parent process has handled the request from `message_tx`. + confirm_rx: ipc::IpcReceiver, + /// Receiver for memory acceses that ocurred during the FFI call. + event_rx: ipc::IpcReceiver, +} + +/// Marker representing that an error occurred during creation of the supervisor. +#[derive(Debug)] +pub struct SvInitError; + +impl Supervisor { + /// Returns `true` if the supervisor process exists, and `false` otherwise. + pub fn poll() -> bool { + SUPERVISOR.lock().unwrap().is_some() + } + + /// Begins preparations for doing an FFI call. This should be called at + /// the last possible moment before entering said call. `alloc` points to + /// the allocator which handed out the memory used for this machine. + /// + /// As this locks the supervisor via a mutex, no other threads may enter FFI + /// until this one returns and its guard is dropped via `end_ffi`. The + /// pointer returned should be passed to `end_ffi` to avoid a memory leak. + /// + /// SAFETY: The resulting guard must be dropped *via `end_ffi`* immediately + /// after the desired call has concluded. + pub unsafe fn start_ffi( + alloc: Rc>, + ) -> (std::sync::MutexGuard<'static, Option>, Option<*mut [u8; FAKE_STACK_SIZE]>) + { + let mut sv_guard = SUPERVISOR.lock().unwrap(); + // If the supervisor is not initialised for whatever reason, fast-fail. + // This might be desired behaviour, as even on platforms where ptracing + // is not implemented it enables us to enforce that only one FFI call + // happens at a time + let Some(sv) = sv_guard.take() else { + return (sv_guard, None); + }; + + // Get pointers to all the pages the supervisor must allow accesses in + // and prepare the fake stack + let page_ptrs = alloc.borrow().pages(); + let raw_stack_ptr: *mut [u8; FAKE_STACK_SIZE] = + Box::leak(Box::new([0u8; FAKE_STACK_SIZE])).as_mut_ptr().cast(); + let stack_ptr = raw_stack_ptr.expose_provenance(); + let start_info = StartFfiInfo { page_ptrs, stack_ptr }; + + // Send over the info. + // NB: if we do not wait to receive a blank confirmation response, it is + // possible that the supervisor is alerted of the SIGSTOP *before* it has + // actually received the start_info, thus deadlocking! This way, we can + // enforce an ordering for these events + sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap(); + sv.confirm_rx.recv().unwrap(); + *sv_guard = Some(sv); + // We need to be stopped for the supervisor to be able to make certain + // modifications to our memory - simply waiting on the recv() doesn't + // count + signal::raise(signal::SIGSTOP).unwrap(); + (sv_guard, Some(raw_stack_ptr)) + } + + /// Undoes FFI-related preparations, allowing Miri to continue as normal, then + /// gets the memory accesses and changes performed during the FFI call. Note + /// that this may include some spurious accesses done by `libffi` itself in + /// the process of executing the function call. + /// + /// SAFETY: The `sv_guard` and `raw_stack_ptr` passed must be the same ones + /// received by a prior call to `start_ffi`, and the allocator must be the + /// one passed to it also. + pub unsafe fn end_ffi( + mut sv_guard: std::sync::MutexGuard<'static, Option>, + _alloc: Rc>, + raw_stack_ptr: Option<*mut [u8; FAKE_STACK_SIZE]>, + ) -> Option { + // We can't use IPC channels here to signal that FFI mode has ended, + // since they might allocate memory which could get us stuck in a SIGTRAP + // with no easy way out! While this could be worked around, it is much + // simpler and more robust to simply use the signals which are left for + // arbitrary usage. Since this will block until we are continued by the + // supervisor, we can assume past this point that everything is back to + // normal + signal::raise(signal::SIGUSR1).unwrap(); + + // If this is `None`, then `raw_stack_ptr` is None and does not need to + // be deallocated (and there's no need to worry about the guard, since + // it contains nothing) + let sv = sv_guard.take()?; + // SAFETY: Caller upholds that this pointer was allocated as a box with + // this type + unsafe { + drop(Box::from_raw(raw_stack_ptr.unwrap())); + } + // On the off-chance something really weird happens, don't block forever + let ret = sv + .event_rx + .try_recv_timeout(std::time::Duration::from_secs(5)) + .map_err(|e| { + match e { + ipc::TryRecvError::IpcError(_) => (), + ipc::TryRecvError::Empty => + eprintln!("Waiting for accesses from supervisor timed out!"), + } + }) + .ok(); + // Do *not* leave the supervisor empty, or else we might get another fork... + *sv_guard = Some(sv); + ret + } +} + +/// Initialises the supervisor process. If this function errors, then the +/// supervisor process could not be created successfully; else, the caller +/// is now the child process and can communicate via `start_ffi`/`end_ffi`, +/// receiving back events through `get_events`. +/// +/// # Safety +/// The invariants for `fork()` must be upheld by the caller. +pub unsafe fn init_sv() -> Result<(), SvInitError> { + // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the + // relevant missing bits + + // On Linux, this will check whether ptrace is fully disabled by the Yama module. + // If Yama isn't running or we're not on Linux, we'll still error later, but + // this saves a very expensive fork call + let ptrace_status = std::fs::read_to_string("/proc/sys/kernel/yama/ptrace_scope"); + if let Ok(stat) = ptrace_status { + if let Some(stat) = stat.chars().next() { + // Fast-error if ptrace is fully disabled on the system + if stat == '3' { + return Err(SvInitError); + } + } + } + + // Initialise the supervisor if it isn't already, placing it into SUPERVISOR + let mut lock = SUPERVISOR.lock().unwrap(); + if lock.is_some() { + return Ok(()); + } + + // Prepare the IPC channels we need + let (message_tx, message_rx) = ipc::channel().unwrap(); + let (confirm_tx, confirm_rx) = ipc::channel().unwrap(); + let (event_tx, event_rx) = ipc::channel().unwrap(); + // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error + let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap(); + + unsafe { + // TODO: Maybe use clone3() instead for better signalling of when the child exits? + // SAFETY: Caller upholds that only one thread exists. + match unistd::fork().unwrap() { + unistd::ForkResult::Parent { child } => { + // If somehow another thread does exist, prevent it from accessing the lock + // and thus breaking our safety invariants + std::mem::forget(lock); + // The child process is free to unwind, so we won't to avoid doubly freeing + // system resources + let init = std::panic::catch_unwind(|| { + let listener = + ChildListener { message_rx, attached: false, override_retcode: None }; + // Trace as many things as possible, to be able to handle them as needed + let options = ptrace::Options::PTRACE_O_TRACESYSGOOD + | ptrace::Options::PTRACE_O_TRACECLONE + | ptrace::Options::PTRACE_O_TRACEFORK; + // Attach to the child process without stopping it + match ptrace::seize(child, options) { + // Ptrace works :D + Ok(_) => { + let code = sv_loop(listener, child, event_tx, confirm_tx, page_size) + .unwrap_err(); + // If a return code of 0 is not explicitly given, assume something went + // wrong and return 1 + std::process::exit(code.unwrap_or(1)) + } + // Ptrace does not work and we failed to catch that + Err(_) => { + // If we can't ptrace, Miri continues being the parent + signal::kill(child, signal::SIGKILL).unwrap(); + SvInitError + } + } + }); + match init { + // The "Ok" case means that we couldn't ptrace + Ok(e) => return Err(e), + Err(p) => { + eprintln!("Supervisor process panicked!\n{p:?}"); + std::process::exit(1); + } + } + } + unistd::ForkResult::Child => { + // Make sure we never get orphaned and stuck in SIGSTOP or similar + // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call + let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM); + assert_eq!(ret, 0); + // First make sure the parent succeeded with ptracing us! + signal::raise(signal::SIGSTOP).unwrap(); + // If we're the child process, save the supervisor info + *lock = Some(Supervisor { message_tx, confirm_rx, event_rx }); + } + } + } + Ok(()) +} + +/// Instruct the supervisor process to return a particular code. Useful if for +/// whatever reason this code fails to be intercepted normally. In the case of +/// `abort_if_errors()` used in `bin/miri.rs`, the return code is erroneously +/// given as a 0 if this is not used. +pub fn register_retcode_sv(code: i32) { + let mut sv_guard = SUPERVISOR.lock().unwrap(); + if let Some(sv) = sv_guard.take() { + sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap(); + *sv_guard = Some(sv); + } +} diff --git a/src/tools/miri/src/shims/trace/messages.rs b/src/tools/miri/src/shims/trace/messages.rs new file mode 100644 index 0000000000000..1014ca750c8e0 --- /dev/null +++ b/src/tools/miri/src/shims/trace/messages.rs @@ -0,0 +1,57 @@ +//! Houses the types that are directly sent across the IPC channels. +//! +//! The overall structure of a traced FFI call, from the child process's POV, is +//! as follows: +//! ``` +//! message_tx.send(TraceRequest::StartFfi); +//! confirm_rx.recv(); +//! raise(SIGSTOP); +//! /* do ffi call */ +//! raise(SIGUSR1); // morally equivalent to some kind of "TraceRequest::EndFfi" +//! let events = event_rx.recv(); +//! ``` +//! `TraceRequest::OverrideRetcode` can be sent at any point in the above, including +//! before or after all of them. +//! +//! NB: sending these events out of order, skipping steps, etc. will result in +//! unspecified behaviour from the supervisor process, so use the abstractions +//! in `super::child` (namely `start_ffi()` and `end_ffi()`) to handle this. It is +//! trivially easy to cause a deadlock or crash by messing this up! + +/// An IPC request sent by the child process to the parent. +/// +/// The sender for this channel should live on the child process. +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] +pub(super) enum TraceRequest { + /// Requests that tracing begins. Following this being sent, the child must + /// wait to receive a `Confirmation` on the respective channel and then + /// `raise(SIGSTOP)`. + /// + /// To avoid possible issues while allocating memory for IPC channels, ending + /// the tracing is instead done via `raise(SIGUSR1)`. + StartFfi(super::StartFfiInfo), + /// Manually overrides the code that the supervisor will return upon exiting. + /// Once set, it is permanent. This can be called again to change the value. + OverrideRetcode(i32), +} + +/// A marker type confirming that the supervisor has received the request to begin +/// tracing and is now waiting for a `SIGSTOP`. +/// +/// The sender for this channel should live on the parent process. +#[derive(serde::Serialize, serde::Deserialize, Debug)] +pub(super) struct Confirmation; + +/// The final results of an FFI trace, containing every relevant event detected +/// by the tracer. Sent by the supervisor after receiving a `SIGUSR1` signal. +/// +/// The sender for this channel should live on the parent process. +#[derive(serde::Serialize, serde::Deserialize, Debug)] +pub struct MemEvents { + /// An ordered list of memory accesses that occurred. These should be assumed + /// to be overcautious; that is, if the size of an access is uncertain it is + /// pessimistically rounded up, and if the type (read/write/both) is uncertain + /// it is reported as whatever would be safest to assume; i.e. a read + maybe-write + /// becomes a read + write, etc. + pub acc_events: Vec, +} diff --git a/src/tools/miri/src/shims/trace/mod.rs b/src/tools/miri/src/shims/trace/mod.rs new file mode 100644 index 0000000000000..a073482044eb0 --- /dev/null +++ b/src/tools/miri/src/shims/trace/mod.rs @@ -0,0 +1,31 @@ +mod child; +pub mod messages; +mod parent; + +use std::ops::Range; + +pub use self::child::{Supervisor, init_sv, register_retcode_sv}; + +/// The size used for the array into which we can move the stack pointer. +const FAKE_STACK_SIZE: usize = 1024; + +/// Information needed to begin tracing. +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] +struct StartFfiInfo { + /// A vector of page addresses. These should have been automatically obtained + /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`. + page_ptrs: Vec, + /// The address of an allocation that can serve as a temporary stack. + /// This should be a leaked `Box<[u8; FAKE_STACK_SIZE]>` cast to an int. + stack_ptr: usize, +} + +/// A single memory access, conservatively overestimated +/// in case of ambiguity. +#[derive(serde::Serialize, serde::Deserialize, Debug)] +pub enum AccessEvent { + /// A read may have occurred on no more than the specified address range. + Read(Range), + /// A write may have occurred on no more than the specified address range. + Write(Range), +} diff --git a/src/tools/miri/src/shims/trace/parent.rs b/src/tools/miri/src/shims/trace/parent.rs new file mode 100644 index 0000000000000..5eb8aa8465193 --- /dev/null +++ b/src/tools/miri/src/shims/trace/parent.rs @@ -0,0 +1,263 @@ +use ipc_channel::ipc; +use nix::sys::{ptrace, signal, wait}; +use nix::unistd; + +use super::StartFfiInfo; +use super::messages::{Confirmation, MemEvents, TraceRequest}; + +/// The flags to use when calling `waitid()`. +/// Since bitwise OR on the nix version of these flags is implemented as a trait, +/// we can't use them directly so we do it this way +const WAIT_FLAGS: wait::WaitPidFlag = + wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED); + +/// A unified event representing something happening on the child process. Wraps +/// `nix`'s `WaitStatus` and our custom signals so it can all be done with one +/// `match` statement. +pub enum ExecEvent { + /// Child process requests that we begin monitoring it. + Start(StartFfiInfo), + /// Child requests that we stop monitoring and pass over the events we + /// detected. + End, + /// The child process with the specified pid was stopped by the given signal. + Status(unistd::Pid, signal::Signal), + /// The child process with the specified pid entered or exited a syscall. + Syscall(unistd::Pid), + /// A child process exited or was killed; if we have a return code, it is + /// specified. + Died(Option), +} + +/// A listener for the FFI start info channel along with relevant state. +pub struct ChildListener { + /// The matching channel for the child's `Supervisor` struct. + pub message_rx: ipc::IpcReceiver, + /// Whether an FFI call is currently ongoing. + pub attached: bool, + /// If `Some`, overrides the return code with the given value. + pub override_retcode: Option, +} + +impl Iterator for ChildListener { + type Item = ExecEvent; + + // Allows us to monitor the child process by just iterating over the listener + // NB: This should never return None! + fn next(&mut self) -> Option { + // Do not block if the child has nothing to report for `waitid` + let opts = WAIT_FLAGS | wait::WaitPidFlag::WNOHANG; + loop { + // Listen to any child, not just the main one. Important if we want + // to allow the C code to fork further, along with being a bit of + // defensive programming since Linux sometimes assigns threads of + // the same process different PIDs with unpredictable rules... + match wait::waitid(wait::Id::All, opts) { + Ok(stat) => + match stat { + // Child exited normally with a specific code set + wait::WaitStatus::Exited(_, code) => { + let code = self.override_retcode.unwrap_or(code); + return Some(ExecEvent::Died(Some(code))); + } + // Child was killed by a signal, without giving a code + wait::WaitStatus::Signaled(_, _, _) => + return Some(ExecEvent::Died(self.override_retcode)), + // Child entered a syscall. Since we're always technically + // tracing, only pass this along if we're actively + // monitoring the child + wait::WaitStatus::PtraceSyscall(pid) => + if self.attached { + return Some(ExecEvent::Syscall(pid)); + }, + // Child with the given pid was stopped by the given signal. + // It's somewhat dubious when this is returned instead of + // WaitStatus::Stopped, but for our purposes they are the + // same thing. + wait::WaitStatus::PtraceEvent(pid, signal, _) => + if self.attached { + // This is our end-of-FFI signal! + if signal == signal::SIGUSR1 { + self.attached = false; + return Some(ExecEvent::End); + } else { + return Some(ExecEvent::Status(pid, signal)); + } + } else { + // Just pass along the signal + ptrace::cont(pid, signal).unwrap(); + }, + // Child was stopped at the given signal. Same logic as for + // WaitStatus::PtraceEvent + wait::WaitStatus::Stopped(pid, signal) => + if self.attached { + if signal == signal::SIGUSR1 { + self.attached = false; + return Some(ExecEvent::End); + } else { + return Some(ExecEvent::Status(pid, signal)); + } + } else { + ptrace::cont(pid, signal).unwrap(); + }, + _ => (), + }, + // This case should only trigger if all children died and we + // somehow missed that, but it's best we not allow any room + // for deadlocks + Err(_) => return Some(ExecEvent::Died(None)), + } + + // Similarly, do a non-blocking poll of the IPC channel + if let Ok(req) = self.message_rx.try_recv() { + match req { + TraceRequest::StartFfi(info) => + // Should never trigger - but better to panic explicitly than deadlock! + if self.attached { + panic!("Attempting to begin FFI multiple times!"); + } else { + self.attached = true; + return Some(ExecEvent::Start(info)); + }, + TraceRequest::OverrideRetcode(code) => self.override_retcode = Some(code), + } + } + + // Not ideal, but doing anything else might sacrifice performance + std::thread::yield_now(); + } + } +} + +/// An error came up while waiting on the child process to do something. +#[derive(Debug)] +enum ExecError { + /// The child process died with this return code, if we have one. + Died(Option), +} + +/// This is the main loop of the supervisor process. It runs in a separate +/// process from the rest of Miri (but because we fork, addresses for anything +/// created before the fork - like statics - are the same). +pub fn sv_loop( + listener: ChildListener, + init_pid: unistd::Pid, + event_tx: ipc::IpcSender, + confirm_tx: ipc::IpcSender, + _page_size: usize, +) -> Result> { + // Things that we return to the child process + let mut acc_events = Vec::new(); + + // Memory allocated on the MiriMachine + let mut _ch_pages = Vec::new(); + let mut _ch_stack = None; + + // The pid of the last process we interacted with, used by default if we don't have a + // reason to use a different one + let mut curr_pid = init_pid; + + // There's an initial sigstop we need to deal with + wait_for_signal(Some(curr_pid), signal::SIGSTOP, false).map_err(|e| { + match e { + ExecError::Died(code) => code, + } + })?; + ptrace::cont(curr_pid, None).unwrap(); + + for evt in listener { + match evt { + // start_ffi was called by the child, so prep memory + ExecEvent::Start(ch_info) => { + // All the pages that the child process is "allowed to" access + _ch_pages = ch_info.page_ptrs; + // And the fake stack it allocated for us to use later + _ch_stack = Some(ch_info.stack_ptr); + + // We received the signal and are no longer in the main listener loop, + // so we can let the child move on to the end of start_ffi where it will + // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in + // order to do most ptrace operations! + confirm_tx.send(Confirmation).unwrap(); + // We can't trust simply calling `Pid::this()` in the child process to give the right + // PID for us, so we get it this way + curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap(); + + ptrace::syscall(curr_pid, None).unwrap(); + } + // end_ffi was called by the child + ExecEvent::End => { + // Hand over the access info we traced + event_tx.send(MemEvents { acc_events }).unwrap(); + // And reset our values + acc_events = Vec::new(); + _ch_stack = None; + + // No need to monitor syscalls anymore, they'd just be ignored + ptrace::cont(curr_pid, None).unwrap(); + } + // Child process was stopped by a signal + ExecEvent::Status(pid, signal) => { + eprintln!("Process unexpectedly got {signal}; continuing..."); + // In case we're not tracing + if ptrace::syscall(pid, signal).is_err() { + // If *this* fails too, something really weird happened + // and it's probably best to just panic + signal::kill(pid, signal::SIGCONT).unwrap(); + } + } + // Child entered a syscall; we wait for exits inside of this, so it + // should never trigger on return from a syscall we care about + ExecEvent::Syscall(pid) => { + ptrace::syscall(pid, None).unwrap(); + } + ExecEvent::Died(code) => { + return Err(code); + } + } + } + + unreachable!() +} + +/// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`. +/// We want to avoid that in some cases, like at the beginning of FFI. +/// +/// If `pid` is `None`, only one wait will be done and `init_cont` should be false. +fn wait_for_signal( + pid: Option, + wait_signal: signal::Signal, + init_cont: bool, +) -> Result { + if init_cont { + ptrace::cont(pid.unwrap(), None).unwrap(); + } + // Repeatedly call `waitid` until we get the signal we want, or the process dies + loop { + let wait_id = match pid { + Some(pid) => wait::Id::Pid(pid), + None => wait::Id::All, + }; + let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + let (signal, pid) = match stat { + // Report the cause of death, if we know it + wait::WaitStatus::Exited(_, code) => { + return Err(ExecError::Died(Some(code))); + } + wait::WaitStatus::Signaled(_, _, _) => return Err(ExecError::Died(None)), + wait::WaitStatus::Stopped(pid, signal) => (signal, pid), + wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid), + // This covers PtraceSyscall and variants that are impossible with + // the flags set (e.g. WaitStatus::StillAlive) + _ => { + ptrace::cont(pid.unwrap(), None).unwrap(); + continue; + } + }; + if signal == wait_signal { + return Ok(pid); + } else { + ptrace::cont(pid, None).map_err(|_| ExecError::Died(None))?; + } + } +} diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr index 04a3025baefa9..e256796e7a7e5 100644 --- a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr +++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr @@ -4,8 +4,8 @@ warning: sharing memory with a native function called via FFI LL | unsafe { print_pointer(&x) }; | ^^^^^^^^^^^^^^^^^ sharing memory with a native function | - = help: when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory - = help: in particular, Miri assumes that the native call initializes all memory it has access to + = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis + = help: in particular, Miri assumes that the native call initializes all memory it has written to = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free = note: BACKTRACE: diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr index c893b0d9f6561..53cbb05a32184 100644 --- a/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr +++ b/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr @@ -4,8 +4,8 @@ warning: sharing memory with a native function called via FFI LL | unsafe { increment_int(&mut x) }; | ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function | - = help: when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory - = help: in particular, Miri assumes that the native call initializes all memory it has access to + = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis + = help: in particular, Miri assumes that the native call initializes all memory it has written to = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free = note: BACKTRACE: From 19c6f75929349d6f1cd2e058dfac25dbb5ee8b43 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Thu, 19 Jun 2025 04:55:44 +0000 Subject: [PATCH 05/38] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 6a74011a36ff0..30ba3070e1f44 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -77ec48f5642ee1aa451d270f11f308c297f55f76 +d1d8e386c5e84c4ba857f56c3291f73c27e2d62a From f3383e4942bca64ce523b7736a2bedd1b8426c11 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 12 Jun 2025 10:56:43 +0000 Subject: [PATCH 06/38] Do not include NUL-terminator in computed length --- .../src/util/caller_location.rs | 12 +++---- library/core/src/panic/location.rs | 35 ++++++++++++------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/compiler/rustc_const_eval/src/util/caller_location.rs b/compiler/rustc_const_eval/src/util/caller_location.rs index 671214002a0d8..a51a5751dc47c 100644 --- a/compiler/rustc_const_eval/src/util/caller_location.rs +++ b/compiler/rustc_const_eval/src/util/caller_location.rs @@ -22,13 +22,14 @@ fn alloc_caller_location<'tcx>( assert!(!filename.as_str().as_bytes().contains(&0)); let loc_details = ecx.tcx.sess.opts.unstable_opts.location_detail; - let file_wide_ptr = { + let filename = { let filename = if loc_details.file { filename.as_str() } else { "" }; let filename_with_nul = filename.to_owned() + "\0"; // This can fail if rustc runs out of memory right here. Trying to emit an error would be // pointless, since that would require allocating more memory than these short strings. let file_ptr = ecx.allocate_bytes_dedup(filename_with_nul.as_bytes()).unwrap(); - Immediate::new_slice(file_ptr.into(), filename_with_nul.len().try_into().unwrap(), ecx) + let file_len = u64::try_from(filename.len()).unwrap(); + Immediate::new_slice(file_ptr.into(), file_len, ecx) }; let line = if loc_details.line { Scalar::from_u32(line) } else { Scalar::from_u32(0) }; let col = if loc_details.column { Scalar::from_u32(col) } else { Scalar::from_u32(0) }; @@ -42,11 +43,8 @@ fn alloc_caller_location<'tcx>( let location = ecx.allocate(loc_layout, MemoryKind::CallerLocation).unwrap(); // Initialize fields. - ecx.write_immediate( - file_wide_ptr, - &ecx.project_field(&location, FieldIdx::from_u32(0)).unwrap(), - ) - .expect("writing to memory we just allocated cannot fail"); + ecx.write_immediate(filename, &ecx.project_field(&location, FieldIdx::from_u32(0)).unwrap()) + .expect("writing to memory we just allocated cannot fail"); ecx.write_scalar(line, &ecx.project_field(&location, FieldIdx::from_u32(1)).unwrap()) .expect("writing to memory we just allocated cannot fail"); ecx.write_scalar(col, &ecx.project_field(&location, FieldIdx::from_u32(2)).unwrap()) diff --git a/library/core/src/panic/location.rs b/library/core/src/panic/location.rs index f1eedede8aab9..1e950d6e4988b 100644 --- a/library/core/src/panic/location.rs +++ b/library/core/src/panic/location.rs @@ -1,5 +1,6 @@ use crate::ffi::CStr; use crate::fmt; +use crate::marker::PhantomData; /// A struct containing information about the location of a panic. /// @@ -33,14 +34,13 @@ use crate::fmt; #[derive(Copy, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)] #[stable(feature = "panic_hooks", since = "1.10.0")] pub struct Location<'a> { - // Note: this filename will have exactly one nul byte at its end, but otherwise - // it must never contain interior nul bytes. This is relied on for the conversion - // to `CStr` below. - // - // The prefix of the string without the trailing nul byte will be a regular UTF8 `str`. - file_bytes_with_nul: &'a [u8], + // A raw pointer is used rather than a reference because the pointer is valid for one more byte + // than the length stored in this pointer; the additional byte is the NUL-terminator used by + // `Location::file_with_nul`. + filename: *const str, line: u32, col: u32, + _filename: PhantomData<&'a str>, } #[stable(feature = "panic_hooks", since = "1.10.0")] @@ -143,10 +143,8 @@ impl<'a> Location<'a> { #[stable(feature = "panic_hooks", since = "1.10.0")] #[rustc_const_stable(feature = "const_location_fields", since = "1.79.0")] pub const fn file(&self) -> &str { - let str_len = self.file_bytes_with_nul.len() - 1; - // SAFETY: `file_bytes_with_nul` without the trailing nul byte is guaranteed to be - // valid UTF8. - unsafe { crate::str::from_raw_parts(self.file_bytes_with_nul.as_ptr(), str_len) } + // SAFETY: The filename is valid. + unsafe { &*self.filename } } /// Returns the name of the source file as a nul-terminated `CStr`. @@ -157,9 +155,15 @@ impl<'a> Location<'a> { #[unstable(feature = "file_with_nul", issue = "141727")] #[inline] pub const fn file_with_nul(&self) -> &CStr { - // SAFETY: `file_bytes_with_nul` is guaranteed to have a trailing nul byte and no - // interior nul bytes. - unsafe { CStr::from_bytes_with_nul_unchecked(self.file_bytes_with_nul) } + // SAFETY: The filename is valid for `filename_len+1` bytes, so this addition can't + // overflow. + let cstr_len = unsafe { crate::mem::size_of_val_raw(self.filename).unchecked_add(1) }; + + // SAFETY: The filename is valid for `filename_len+1` bytes. + let slice = unsafe { crate::slice::from_raw_parts(self.filename as *const _, cstr_len) }; + + // SAFETY: The filename is guaranteed to have a trailing nul byte and no interior nul bytes. + unsafe { CStr::from_bytes_with_nul_unchecked(slice) } } /// Returns the line number from which the panic originated. @@ -220,3 +224,8 @@ impl fmt::Display for Location<'_> { write!(formatter, "{}:{}:{}", self.file(), self.line, self.col) } } + +#[stable(feature = "panic_hooks", since = "1.10.0")] +unsafe impl Send for Location<'_> {} +#[stable(feature = "panic_hooks", since = "1.10.0")] +unsafe impl Sync for Location<'_> {} From a00961269107703772c4e8f071f0accbe0f1a7e5 Mon Sep 17 00:00:00 2001 From: Stypox Date: Wed, 9 Apr 2025 10:34:03 +0200 Subject: [PATCH 07/38] Allow building Miri with --features from miri-script Otherwise there was no way to pass e.g. `--features tracing` just to the `cargo` commands issued on the root repository: CARGO_EXTRA_FLAGS applies the flags to the "cargo-miri" crate, too, which does not make sense for crate-specific features. Fix install_to_sysroot doing path concatenation twice. Since the second concatenation was against an absolute path, it didn't do anything. This also makes the interface of install_to_sysroot() similar to that of cargo_cmd(). Implement --features for clippy, also fix not passing features to one of the cargo invocations for test --- src/tools/miri/miri-script/src/commands.rs | 83 +++++++++++--------- src/tools/miri/miri-script/src/coverage.rs | 7 +- src/tools/miri/miri-script/src/main.rs | 38 +++++++-- src/tools/miri/miri-script/src/util.rs | 90 +++++++++++++++++----- 4 files changed, 153 insertions(+), 65 deletions(-) diff --git a/src/tools/miri/miri-script/src/commands.rs b/src/tools/miri/miri-script/src/commands.rs index 86362145d4759..d18e9c7791fc2 100644 --- a/src/tools/miri/miri-script/src/commands.rs +++ b/src/tools/miri/miri-script/src/commands.rs @@ -32,6 +32,7 @@ impl MiriEnv { &mut self, quiet: bool, target: Option>, + features: &[String], ) -> Result { if let Some(miri_sysroot) = self.sh.var_os("MIRI_SYSROOT") { // Sysroot already set, use that. @@ -39,8 +40,8 @@ impl MiriEnv { } // Make sure everything is built. Also Miri itself. - self.build(".", &[], quiet)?; - self.build("cargo-miri", &[], quiet)?; + self.build(".", features, &[], quiet)?; + self.build("cargo-miri", &[], &[], quiet)?; let target_flag = if let Some(target) = &target { vec![OsStr::new("--target"), target.as_ref()] @@ -58,7 +59,7 @@ impl MiriEnv { } let mut cmd = self - .cargo_cmd("cargo-miri", "run") + .cargo_cmd("cargo-miri", "run", &[]) .arg("--quiet") .arg("--") .args(&["miri", "setup", "--print-sysroot"]) @@ -90,7 +91,9 @@ impl Command { Self::fmt(vec![])?; } if auto_clippy { - Self::clippy(vec![])?; + // no features for auto actions, see + // https://github.com/rust-lang/miri/pull/4396#discussion_r2149654845 + Self::clippy(vec![], vec![])?; } Ok(()) @@ -175,16 +178,16 @@ impl Command { } // Then run the actual command. match self { - Command::Install { flags } => Self::install(flags), - Command::Build { flags } => Self::build(flags), - Command::Check { flags } => Self::check(flags), - Command::Test { bless, flags, target, coverage } => - Self::test(bless, flags, target, coverage), - Command::Run { dep, verbose, target, edition, flags } => - Self::run(dep, verbose, target, edition, flags), - Command::Doc { flags } => Self::doc(flags), + Command::Install { features, flags } => Self::install(features, flags), + Command::Build { features, flags } => Self::build(features, flags), + Command::Check { features, flags } => Self::check(features, flags), + Command::Test { bless, target, coverage, features, flags } => + Self::test(bless, target, coverage, features, flags), + Command::Run { dep, verbose, target, edition, features, flags } => + Self::run(dep, verbose, target, edition, features, flags), + Command::Doc { features, flags } => Self::doc(features, flags), Command::Fmt { flags } => Self::fmt(flags), - Command::Clippy { flags } => Self::clippy(flags), + Command::Clippy { features, flags } => Self::clippy(features, flags), Command::Bench { target, no_install, save_baseline, load_baseline, benches } => Self::bench(target, no_install, save_baseline, load_baseline, benches), Command::Toolchain { flags } => Self::toolchain(flags), @@ -494,7 +497,7 @@ impl Command { if !no_install { // Make sure we have an up-to-date Miri installed and selected the right toolchain. - Self::install(vec![])?; + Self::install(vec![], vec![])?; } let results_json_dir = if save_baseline.is_some() || load_baseline.is_some() { Some(TempDir::new()?) @@ -601,47 +604,48 @@ impl Command { Ok(()) } - fn install(flags: Vec) -> Result<()> { + fn install(features: Vec, flags: Vec) -> Result<()> { let e = MiriEnv::new()?; - e.install_to_sysroot(e.miri_dir.clone(), &flags)?; - e.install_to_sysroot(path!(e.miri_dir / "cargo-miri"), &flags)?; + e.install_to_sysroot(".", &features, &flags)?; + e.install_to_sysroot("cargo-miri", &[], &flags)?; Ok(()) } - fn build(flags: Vec) -> Result<()> { + fn build(features: Vec, flags: Vec) -> Result<()> { let e = MiriEnv::new()?; - e.build(".", &flags, /* quiet */ false)?; - e.build("cargo-miri", &flags, /* quiet */ false)?; + e.build(".", &features, &flags, /* quiet */ false)?; + e.build("cargo-miri", &[], &flags, /* quiet */ false)?; Ok(()) } - fn check(flags: Vec) -> Result<()> { + fn check(features: Vec, flags: Vec) -> Result<()> { let e = MiriEnv::new()?; - e.check(".", &flags)?; - e.check("cargo-miri", &flags)?; + e.check(".", &features, &flags)?; + e.check("cargo-miri", &[], &flags)?; Ok(()) } - fn doc(flags: Vec) -> Result<()> { + fn doc(features: Vec, flags: Vec) -> Result<()> { let e = MiriEnv::new()?; - e.doc(".", &flags)?; - e.doc("cargo-miri", &flags)?; + e.doc(".", &features, &flags)?; + e.doc("cargo-miri", &[], &flags)?; Ok(()) } - fn clippy(flags: Vec) -> Result<()> { + fn clippy(features: Vec, flags: Vec) -> Result<()> { let e = MiriEnv::new()?; - e.clippy(".", &flags)?; - e.clippy("cargo-miri", &flags)?; - e.clippy("miri-script", &flags)?; + e.clippy(".", &features, &flags)?; + e.clippy("cargo-miri", &[], &flags)?; + e.clippy("miri-script", &[], &flags)?; Ok(()) } fn test( bless: bool, - mut flags: Vec, target: Option, coverage: bool, + features: Vec, + mut flags: Vec, ) -> Result<()> { let mut e = MiriEnv::new()?; @@ -652,7 +656,7 @@ impl Command { } // Prepare a sysroot. (Also builds cargo-miri, which we need.) - e.build_miri_sysroot(/* quiet */ false, target.as_deref())?; + e.build_miri_sysroot(/* quiet */ false, target.as_deref(), &features)?; // Forward information to test harness. if bless { @@ -672,10 +676,10 @@ impl Command { // Then test, and let caller control flags. // Only in root project as `cargo-miri` has no tests. - e.test(".", &flags)?; + e.test(".", &features, &flags)?; if let Some(coverage) = &coverage { - coverage.show_coverage_report(&e)?; + coverage.show_coverage_report(&e, &features)?; } Ok(()) @@ -686,14 +690,17 @@ impl Command { verbose: bool, target: Option, edition: Option, + features: Vec, flags: Vec, ) -> Result<()> { let mut e = MiriEnv::new()?; // Preparation: get a sysroot, and get the miri binary. - let miri_sysroot = e.build_miri_sysroot(/* quiet */ !verbose, target.as_deref())?; - let miri_bin = - e.build_get_binary(".").context("failed to get filename of miri executable")?; + let miri_sysroot = + e.build_miri_sysroot(/* quiet */ !verbose, target.as_deref(), &features)?; + let miri_bin = e + .build_get_binary(".", &features) + .context("failed to get filename of miri executable")?; // More flags that we will pass before `flags` // (because `flags` may contain `--`). @@ -718,7 +725,7 @@ impl Command { // The basic command that executes the Miri driver. let mut cmd = if dep { // We invoke the test suite as that has all the logic for running with dependencies. - e.cargo_cmd(".", "test") + e.cargo_cmd(".", "test", &features) .args(&["--test", "ui"]) .args(quiet_flag) .arg("--") diff --git a/src/tools/miri/miri-script/src/coverage.rs b/src/tools/miri/miri-script/src/coverage.rs index 8cafcea0d16f3..cdf2bbb93304f 100644 --- a/src/tools/miri/miri-script/src/coverage.rs +++ b/src/tools/miri/miri-script/src/coverage.rs @@ -49,7 +49,7 @@ impl CoverageReport { /// show_coverage_report will print coverage information using the artifact /// files in `self.path`. - pub fn show_coverage_report(&self, e: &MiriEnv) -> Result<()> { + pub fn show_coverage_report(&self, e: &MiriEnv, features: &[String]) -> Result<()> { let profraw_files = self.profraw_files()?; let profdata_bin = path!(e.libdir / ".." / "bin" / "llvm-profdata"); @@ -63,8 +63,9 @@ impl CoverageReport { // Create the coverage report. let cov_bin = path!(e.libdir / ".." / "bin" / "llvm-cov"); - let miri_bin = - e.build_get_binary(".").context("failed to get filename of miri executable")?; + let miri_bin = e + .build_get_binary(".", features) + .context("failed to get filename of miri executable")?; cmd!( e.sh, "{cov_bin} report --instr-profile={merged_file} --object {miri_bin} --sources src/" diff --git a/src/tools/miri/miri-script/src/main.rs b/src/tools/miri/miri-script/src/main.rs index 6aab2f79bd78c..673d658cf1d5d 100644 --- a/src/tools/miri/miri-script/src/main.rs +++ b/src/tools/miri/miri-script/src/main.rs @@ -14,24 +14,40 @@ pub enum Command { /// Sets up the rpath such that the installed binary should work in any /// working directory. Install { + /// Pass features to cargo invocations on the "miri" crate in the root. This option does + /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri". + #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)] + features: Vec, /// Flags that are passed through to `cargo install`. #[arg(trailing_var_arg = true, allow_hyphen_values = true)] flags: Vec, }, /// Build Miri. Build { + /// Pass features to cargo invocations on the "miri" crate in the root. This option does + /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri". + #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)] + features: Vec, /// Flags that are passed through to `cargo build`. #[arg(trailing_var_arg = true, allow_hyphen_values = true)] flags: Vec, }, /// Check Miri. Check { + /// Pass features to cargo invocations on the "miri" crate in the root. This option does + /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri". + #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)] + features: Vec, /// Flags that are passed through to `cargo check`. #[arg(trailing_var_arg = true, allow_hyphen_values = true)] flags: Vec, }, /// Check Miri with Clippy. Clippy { + /// Pass features to cargo invocations on the "miri" crate in the root. This option does + /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri". + #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)] + features: Vec, /// Flags that are passed through to `cargo clippy`. #[arg(trailing_var_arg = true, allow_hyphen_values = true)] flags: Vec, @@ -47,6 +63,10 @@ pub enum Command { /// Produce coverage report. #[arg(long)] coverage: bool, + /// Pass features to cargo invocations on the "miri" crate in the root. This option does + /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri". + #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)] + features: Vec, /// Flags that are passed through to the test harness. #[arg(trailing_var_arg = true, allow_hyphen_values = true)] flags: Vec, @@ -67,6 +87,10 @@ pub enum Command { /// The Rust edition. #[arg(long)] edition: Option, + /// Pass features to cargo invocations on the "miri" crate in the root. This option does + /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri". + #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)] + features: Vec, /// Flags that are passed through to `miri`. /// /// The flags set in `MIRIFLAGS` are added in front of these flags. @@ -75,6 +99,10 @@ pub enum Command { }, /// Build documentation. Doc { + /// Pass features to cargo invocations on the "miri" crate in the root. This option does + /// **not** apply to other crates, so e.g. these features won't be used on "cargo-miri". + #[arg(long, value_delimiter = ',', action = clap::ArgAction::Append)] + features: Vec, /// Flags that are passed through to `cargo doc`. #[arg(trailing_var_arg = true, allow_hyphen_values = true)] flags: Vec, @@ -144,13 +172,13 @@ impl Command { } match self { - Self::Install { flags } - | Self::Build { flags } - | Self::Check { flags } - | Self::Doc { flags } + Self::Install { flags, .. } + | Self::Build { flags, .. } + | Self::Check { flags, .. } + | Self::Doc { flags, .. } | Self::Fmt { flags } | Self::Toolchain { flags } - | Self::Clippy { flags } + | Self::Clippy { flags, .. } | Self::Run { flags, .. } | Self::Test { flags, .. } => { flags.extend(remainder); diff --git a/src/tools/miri/miri-script/src/util.rs b/src/tools/miri/miri-script/src/util.rs index 5c2a055990fdd..c100cf195ba49 100644 --- a/src/tools/miri/miri-script/src/util.rs +++ b/src/tools/miri/miri-script/src/util.rs @@ -26,6 +26,12 @@ pub fn flagsplit(flags: &str) -> Vec { flags.split(' ').map(str::trim).filter(|s| !s.is_empty()).map(str::to_string).collect() } +/// Turns a list of features into a list of arguments to pass to cargo invocations. +/// Each feature will go in its own argument, e.g. "--features feat1 --features feat2". +fn features_to_args(features: &[String]) -> impl IntoIterator { + features.iter().flat_map(|feat| ["--features", feat]) +} + /// Some extra state we track for building Miri, such as the right RUSTFLAGS. pub struct MiriEnv { /// miri_dir is the root of the miri repository checkout we are working in. @@ -116,44 +122,70 @@ impl MiriEnv { Ok(MiriEnv { miri_dir, toolchain, sh, sysroot, cargo_extra_flags, libdir }) } - pub fn cargo_cmd(&self, crate_dir: impl AsRef, cmd: &str) -> Cmd<'_> { + /// Make sure the `features` you pass here exist for the specified `crate_dir`. For example, the + /// "--features" parameter of [crate::Command]s is intended only for the "miri" root crate. + pub fn cargo_cmd( + &self, + crate_dir: impl AsRef, + cmd: &str, + features: &[String], + ) -> Cmd<'_> { let MiriEnv { toolchain, cargo_extra_flags, .. } = self; let manifest_path = path!(self.miri_dir / crate_dir.as_ref() / "Cargo.toml"); + let features = features_to_args(features); cmd!( self.sh, - "cargo +{toolchain} {cmd} {cargo_extra_flags...} --manifest-path {manifest_path}" + "cargo +{toolchain} {cmd} {cargo_extra_flags...} --manifest-path {manifest_path} {features...}" ) } + /// Make sure the `features` you pass here exist for the specified `crate_dir`. For example, the + /// "--features" parameter of [crate::Command]s is intended only for the "miri" root crate. pub fn install_to_sysroot( &self, - path: impl AsRef, + crate_dir: impl AsRef, + features: &[String], args: impl IntoIterator>, ) -> Result<()> { let MiriEnv { sysroot, toolchain, cargo_extra_flags, .. } = self; - let path = path!(self.miri_dir / path.as_ref()); + let path = path!(self.miri_dir / crate_dir.as_ref()); + let features = features_to_args(features); // Install binaries to the miri toolchain's `sysroot` so they do not interact with other toolchains. // (Not using `cargo_cmd` as `install` is special and doesn't use `--manifest-path`.) - cmd!(self.sh, "cargo +{toolchain} install {cargo_extra_flags...} --path {path} --force --root {sysroot} {args...}").run()?; + cmd!(self.sh, "cargo +{toolchain} install {cargo_extra_flags...} --path {path} --force --root {sysroot} {features...} {args...}").run()?; Ok(()) } - pub fn build(&self, crate_dir: impl AsRef, args: &[String], quiet: bool) -> Result<()> { + pub fn build( + &self, + crate_dir: impl AsRef, + features: &[String], + args: &[String], + quiet: bool, + ) -> Result<()> { let quiet_flag = if quiet { Some("--quiet") } else { None }; // We build all targets, since building *just* the bin target doesnot include // `dev-dependencies` and that changes feature resolution. This also gets us more // parallelism in `./miri test` as we build Miri and its tests together. - let mut cmd = - self.cargo_cmd(crate_dir, "build").args(&["--all-targets"]).args(quiet_flag).args(args); + let mut cmd = self + .cargo_cmd(crate_dir, "build", features) + .args(&["--all-targets"]) + .args(quiet_flag) + .args(args); cmd.set_quiet(quiet); cmd.run()?; Ok(()) } /// Returns the path to the main crate binary. Assumes that `build` has been called before. - pub fn build_get_binary(&self, crate_dir: impl AsRef) -> Result { - let cmd = - self.cargo_cmd(crate_dir, "build").args(&["--all-targets", "--message-format=json"]); + pub fn build_get_binary( + &self, + crate_dir: impl AsRef, + features: &[String], + ) -> Result { + let cmd = self + .cargo_cmd(crate_dir, "build", features) + .args(&["--all-targets", "--message-format=json"]); let output = cmd.output()?; let mut bin = None; for line in output.stdout.lines() { @@ -174,23 +206,43 @@ impl MiriEnv { bin.ok_or_else(|| anyhow!("found no binary in cargo output")) } - pub fn check(&self, crate_dir: impl AsRef, args: &[String]) -> Result<()> { - self.cargo_cmd(crate_dir, "check").arg("--all-targets").args(args).run()?; + pub fn check( + &self, + crate_dir: impl AsRef, + features: &[String], + args: &[String], + ) -> Result<()> { + self.cargo_cmd(crate_dir, "check", features).arg("--all-targets").args(args).run()?; Ok(()) } - pub fn doc(&self, crate_dir: impl AsRef, args: &[String]) -> Result<()> { - self.cargo_cmd(crate_dir, "doc").args(args).run()?; + pub fn doc( + &self, + crate_dir: impl AsRef, + features: &[String], + args: &[String], + ) -> Result<()> { + self.cargo_cmd(crate_dir, "doc", features).args(args).run()?; Ok(()) } - pub fn clippy(&self, crate_dir: impl AsRef, args: &[String]) -> Result<()> { - self.cargo_cmd(crate_dir, "clippy").arg("--all-targets").args(args).run()?; + pub fn clippy( + &self, + crate_dir: impl AsRef, + features: &[String], + args: &[String], + ) -> Result<()> { + self.cargo_cmd(crate_dir, "clippy", features).arg("--all-targets").args(args).run()?; Ok(()) } - pub fn test(&self, crate_dir: impl AsRef, args: &[String]) -> Result<()> { - self.cargo_cmd(crate_dir, "test").args(args).run()?; + pub fn test( + &self, + crate_dir: impl AsRef, + features: &[String], + args: &[String], + ) -> Result<()> { + self.cargo_cmd(crate_dir, "test", features).args(args).run()?; Ok(()) } From a9cc316954a1076621152e365f992f888a3ac2d0 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Sat, 31 May 2025 11:00:52 +0200 Subject: [PATCH 08/38] isolated_alloc: directly use mmap for allocations Update src/alloc/isolated_alloc.rs Co-authored-by: Ralf Jung Update src/alloc/isolated_alloc.rs Co-authored-by: Ralf Jung Update src/alloc/isolated_alloc.rs Co-authored-by: Ralf Jung Update src/alloc/isolated_alloc.rs Co-authored-by: Ralf Jung Update src/alloc/isolated_alloc.rs Co-authored-by: Ralf Jung address review Apply suggestions from code review Co-authored-by: Ralf Jung fix comment fix position thing dumb mistake Apply suggestions from code review Co-authored-by: Ralf Jung --- src/tools/miri/src/alloc/isolated_alloc.rs | 108 ++++++++++++++------- 1 file changed, 72 insertions(+), 36 deletions(-) diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index 7b74d17137341..67e967962189f 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -1,4 +1,4 @@ -use std::alloc::{self, Layout}; +use std::alloc::Layout; use rustc_index::bit_set::DenseBitSet; @@ -11,7 +11,7 @@ const COMPRESSION_FACTOR: usize = 4; pub struct IsolatedAlloc { /// Pointers to page-aligned memory that has been claimed by the allocator. /// Every pointer here must point to a page-sized allocation claimed via - /// the global allocator. These pointers are used for "small" allocations. + /// mmap. These pointers are used for "small" allocations. page_ptrs: Vec<*mut u8>, /// Metadata about which bytes have been allocated on each page. The length /// of this vector must be the same as that of `page_ptrs`, and the domain @@ -52,20 +52,26 @@ impl IsolatedAlloc { Layout::from_size_align(size, align).unwrap() } - /// Returns the layout used to allocate the pages that hold small allocations. + /// For greater-than-page-sized allocations, returns the allocation size we need to request + /// including the slack we need to satisfy the alignment request. #[inline] - fn page_layout(&self) -> Layout { - Layout::from_size_align(self.page_size, self.page_size).unwrap() - } - - /// If the allocation is greater than a page, then round to the nearest page #. - #[inline] - fn huge_normalized_layout(layout: Layout, page_size: usize) -> Layout { + fn huge_normalized_layout(&self, layout: Layout) -> usize { // Allocate in page-sized chunks - let size = layout.size().next_multiple_of(page_size); + let size = layout.size().next_multiple_of(self.page_size); // And make sure the align is at least one page - let align = std::cmp::max(layout.align(), page_size); - Layout::from_size_align(size, align).unwrap() + let align = std::cmp::max(layout.align(), self.page_size); + // pg_count gives us the # of pages needed to satisfy the size. For + // align > page_size where align = n * page_size, a sufficently-aligned + // address must exist somewhere in the range of + // some_page_aligned_address..some_page_aligned_address + (n-1) * page_size + // (since if some_page_aligned_address + n * page_size is sufficently aligned, + // then so is some_page_aligned_address itself per the definition of n, so we + // can avoid using that 1 extra page). + // Thus we allocate n-1 extra pages + let pg_count = size.div_ceil(self.page_size); + let extra_pages = align.strict_div(self.page_size).saturating_sub(1); + + pg_count.strict_add(extra_pages).strict_mul(self.page_size) } /// Determined whether a given normalized (size, align) should be sent to @@ -78,7 +84,7 @@ impl IsolatedAlloc { /// Allocates memory as described in `Layout`. This memory should be deallocated /// by calling `dealloc` on this same allocator. /// - /// SAFETY: See `alloc::alloc()` + /// SAFETY: See `alloc::alloc()`. pub unsafe fn alloc(&mut self, layout: Layout) -> *mut u8 { // SAFETY: Upheld by caller unsafe { self.allocate(layout, false) } @@ -86,7 +92,7 @@ impl IsolatedAlloc { /// Same as `alloc`, but zeroes out the memory. /// - /// SAFETY: See `alloc::alloc_zeroed()` + /// SAFETY: See `alloc::alloc_zeroed()`. pub unsafe fn alloc_zeroed(&mut self, layout: Layout) -> *mut u8 { // SAFETY: Upheld by caller unsafe { self.allocate(layout, true) } @@ -95,14 +101,13 @@ impl IsolatedAlloc { /// Abstracts over the logic of `alloc_zeroed` vs `alloc`, as determined by /// the `zeroed` argument. /// - /// SAFETY: See `alloc::alloc()`, with the added restriction that `page_size` - /// corresponds to the host pagesize. + /// SAFETY: See `alloc::alloc()`. unsafe fn allocate(&mut self, layout: Layout, zeroed: bool) -> *mut u8 { let layout = IsolatedAlloc::normalized_layout(layout); if self.is_huge_alloc(&layout) { // SAFETY: Validity of `layout` upheld by caller; we checked that // the size and alignment are appropriate for being a huge alloc - unsafe { self.alloc_huge(layout, zeroed) } + unsafe { self.alloc_huge(layout) } } else { for (&mut page, pinfo) in std::iter::zip(&mut self.page_ptrs, &mut self.page_infos) { // SAFETY: The value in `self.page_size` is used to allocate @@ -171,8 +176,19 @@ impl IsolatedAlloc { /// Expands the available memory pool by adding one page. fn add_page(&mut self) -> (*mut u8, &mut DenseBitSet) { - // SAFETY: The system page size, which is the layout size, cannot be 0 - let page_ptr = unsafe { alloc::alloc(self.page_layout()) }; + // SAFETY: mmap is always safe to call when requesting anonymous memory + let page_ptr = unsafe { + libc::mmap( + std::ptr::null_mut(), + self.page_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ) + .cast::() + }; + assert_ne!(page_ptr.addr(), usize::MAX, "mmap failed"); // `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page. assert!(self.page_size % COMPRESSION_FACTOR == 0); self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR)); @@ -181,15 +197,28 @@ impl IsolatedAlloc { } /// Allocates in multiples of one page on the host system. + /// Will always be zeroed. /// /// SAFETY: Same as `alloc()`. - unsafe fn alloc_huge(&mut self, layout: Layout, zeroed: bool) -> *mut u8 { - let layout = IsolatedAlloc::huge_normalized_layout(layout, self.page_size); - // SAFETY: Upheld by caller - let ret = - unsafe { if zeroed { alloc::alloc_zeroed(layout) } else { alloc::alloc(layout) } }; - self.huge_ptrs.push((ret, layout.size())); - ret + unsafe fn alloc_huge(&mut self, layout: Layout) -> *mut u8 { + let size = self.huge_normalized_layout(layout); + // SAFETY: mmap is always safe to call when requesting anonymous memory + let ret = unsafe { + libc::mmap( + std::ptr::null_mut(), + size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ) + .cast::() + }; + assert_ne!(ret.addr(), usize::MAX, "mmap failed"); + self.huge_ptrs.push((ret, size)); + // huge_normalized_layout ensures that we've overallocated enough space + // for this to be valid. + ret.map_addr(|a| a.next_multiple_of(layout.align())) } /// Deallocates a pointer from this allocator. @@ -218,15 +247,15 @@ impl IsolatedAlloc { let page_ptr = self.page_ptrs.remove(idx); // SAFETY: We checked that there are no outstanding allocations // from us pointing to this page, and we know it was allocated - // with this layout + // in add_page as exactly a single page. unsafe { - alloc::dealloc(page_ptr, self.page_layout()); + assert_eq!(libc::munmap(page_ptr.cast(), self.page_size), 0); } } } } - /// Returns the index of the page that this was deallocated from + /// Returns the index of the page that this was deallocated from. /// /// SAFETY: the pointer must have been allocated with `alloc_small`. unsafe fn dealloc_small(&mut self, ptr: *mut u8, layout: Layout) -> usize { @@ -255,18 +284,22 @@ impl IsolatedAlloc { /// SAFETY: Same as `dealloc()` with the added requirement that `layout` /// must ask for a size larger than the host pagesize. unsafe fn dealloc_huge(&mut self, ptr: *mut u8, layout: Layout) { - let layout = IsolatedAlloc::huge_normalized_layout(layout, self.page_size); - // Find the pointer matching in address with the one we got + let size = self.huge_normalized_layout(layout); + // Find the huge allocation containing `ptr`. let idx = self .huge_ptrs .iter() - .position(|pg| ptr.addr() == pg.0.addr()) + .position(|&(pg, size)| { + pg.addr() <= ptr.addr() && ptr.addr() < pg.addr().strict_add(size) + }) .expect("Freeing unallocated pages"); // And kick it from the list - self.huge_ptrs.remove(idx); - // SAFETY: Caller ensures validity of the layout + let (un_offset_ptr, size2) = self.huge_ptrs.remove(idx); + assert_eq!(size, size2, "got wrong layout in dealloc"); + // SAFETY: huge_ptrs contains allocations made with mmap with the size recorded there. unsafe { - alloc::dealloc(ptr, layout); + let ret = libc::munmap(un_offset_ptr.cast(), size); + assert_eq!(ret, 0); } } } @@ -350,12 +383,15 @@ mod tests { sizes.append(&mut vec![256; 12]); // Give it some multi-page ones too sizes.append(&mut vec![32 * 1024; 4]); + sizes.push(4 * 1024); // Matching aligns for the sizes let mut aligns = vec![16; 12]; aligns.append(&mut vec![256; 2]); aligns.append(&mut vec![64; 12]); aligns.append(&mut vec![4096; 4]); + // And one that requests align > page_size + aligns.push(64 * 1024); // Make sure we didn't mess up in the test itself! assert_eq!(sizes.len(), aligns.len()); From 422cd5d071acd2cdda65e12044f412e2bdf10a23 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Fri, 20 Jun 2025 04:55:38 +0000 Subject: [PATCH 09/38] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 30ba3070e1f44..2ef8d717d548b 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -d1d8e386c5e84c4ba857f56c3291f73c27e2d62a +255aa220821c05c3eac7605fce4ea1c9ab2cbdb4 From fdc2d52bc803ca559d6553afc178be0243846dfa Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Tue, 27 May 2025 22:31:23 +0200 Subject: [PATCH 10/38] supervisor bits of ffi ptracing --- src/tools/miri/Cargo.lock | 21 + src/tools/miri/Cargo.toml | 1 + src/tools/miri/src/alloc/isolated_alloc.rs | 46 +- src/tools/miri/src/shims/native_lib.rs | 18 +- src/tools/miri/src/shims/trace/child.rs | 67 +-- src/tools/miri/src/shims/trace/parent.rs | 534 +++++++++++++++++++-- 6 files changed, 606 insertions(+), 81 deletions(-) diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock index 95124ac6fef43..d3123caaa4788 100644 --- a/src/tools/miri/Cargo.lock +++ b/src/tools/miri/Cargo.lock @@ -121,6 +121,26 @@ dependencies = [ "serde", ] +[[package]] +name = "capstone" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a" +dependencies = [ + "capstone-sys", + "libc", +] + +[[package]] +name = "capstone-sys" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "cargo-platform" version = "0.1.9" @@ -591,6 +611,7 @@ version = "0.1.0" dependencies = [ "aes", "bitflags", + "capstone", "chrono", "chrono-tz", "colored", diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml index 76018efc4a63b..0b4b8e26bb8d0 100644 --- a/src/tools/miri/Cargo.toml +++ b/src/tools/miri/Cargo.toml @@ -44,6 +44,7 @@ libloading = "0.8" nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"] } ipc-channel = "0.19.0" serde = { version = "1.0.219", features = ["derive"] } +capstone = "0.13" [dev-dependencies] ui_test = "0.29.1" diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index 21aead329ae73..0964d69a456bc 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -1,5 +1,6 @@ use std::alloc::Layout; +use nix::sys::mman; use rustc_index::bit_set::DenseBitSet; /// How many bytes of memory each bit in the bitset represents. @@ -304,13 +305,54 @@ impl IsolatedAlloc { pub fn pages(&self) -> Vec { let mut pages: Vec<_> = self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance()).collect(); - for (ptr, size) in &self.huge_ptrs { + self.huge_ptrs.iter().for_each(|(ptr, size)| { for i in 0..size / self.page_size { pages.push(ptr.expose_provenance().strict_add(i * self.page_size)); } - } + }); pages } + + /// Protects all owned memory as `PROT_NONE`, preventing accesses. + /// + /// SAFETY: Accessing memory after this point will result in a segfault + /// unless it is first unprotected. + pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> { + let prot = mman::ProtFlags::PROT_NONE; + unsafe { self.mprotect(prot) } + } + + /// Deprotects all owned memory by setting it to RW. Erroring here is very + /// likely unrecoverable, so it may panic if applying those permissions + /// fails. + pub fn unprep_ffi(&mut self) { + let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE; + unsafe { + self.mprotect(prot).unwrap(); + } + } + + /// Applies `prot` to every page managed by the allocator. + /// + /// SAFETY: Accessing memory in violation of the protection flags will + /// trigger a segfault. + unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> { + for &pg in &self.page_ptrs { + unsafe { + // We already know only non-null ptrs are pushed to self.pages + let addr: std::ptr::NonNull = + std::ptr::NonNull::new_unchecked(pg.cast()); + mman::mprotect(addr, self.page_size, prot)?; + } + } + for &(hpg, size) in &self.huge_ptrs { + unsafe { + let addr = std::ptr::NonNull::new_unchecked(hpg.cast()); + mman::mprotect(addr, size.next_multiple_of(self.page_size), prot)?; + } + } + Ok(()) + } } #[cfg(test)] diff --git a/src/tools/miri/src/shims/native_lib.rs b/src/tools/miri/src/shims/native_lib.rs index 53d1de060f423..3f47b440f7d0c 100644 --- a/src/tools/miri/src/shims/native_lib.rs +++ b/src/tools/miri/src/shims/native_lib.rs @@ -229,7 +229,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { .collect::>>(); // Call the function and store output, depending on return type in the function signature. - let (ret, _) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; + let (ret, maybe_memevents) = + this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; + + #[cfg(target_os = "linux")] + if let Some(events) = maybe_memevents { + trace!("Registered FFI events:\n{events:#0x?}"); + } + #[cfg(not(target_os = "linux"))] + let _ = maybe_memevents; // Suppress the unused warning. this.write_immediate(*ret, dest)?; interp_ok(true) @@ -250,15 +258,15 @@ unsafe fn do_native_call( unsafe { if let Some(alloc) = alloc { - // SAFETY: We don't touch the machine memory past this point + // SAFETY: We don't touch the machine memory past this point. let (guard, stack_ptr) = Supervisor::start_ffi(alloc.clone()); - // SAFETY: Upheld by caller + // SAFETY: Upheld by caller. let ret = ffi::call(ptr, args); // SAFETY: We got the guard and stack pointer from start_ffi, and - // the allocator is the same + // the allocator is the same. (ret, Supervisor::end_ffi(guard, alloc, stack_ptr)) } else { - // SAFETY: Upheld by caller + // SAFETY: Upheld by caller. (ffi::call(ptr, args), None) } } diff --git a/src/tools/miri/src/shims/trace/child.rs b/src/tools/miri/src/shims/trace/child.rs index dcfdaad748e40..98fb1e3b9fe1d 100644 --- a/src/tools/miri/src/shims/trace/child.rs +++ b/src/tools/miri/src/shims/trace/child.rs @@ -52,30 +52,40 @@ impl Supervisor { // If the supervisor is not initialised for whatever reason, fast-fail. // This might be desired behaviour, as even on platforms where ptracing // is not implemented it enables us to enforce that only one FFI call - // happens at a time + // happens at a time. let Some(sv) = sv_guard.take() else { return (sv_guard, None); }; // Get pointers to all the pages the supervisor must allow accesses in - // and prepare the fake stack + // and prepare the fake stack. let page_ptrs = alloc.borrow().pages(); let raw_stack_ptr: *mut [u8; FAKE_STACK_SIZE] = Box::leak(Box::new([0u8; FAKE_STACK_SIZE])).as_mut_ptr().cast(); let stack_ptr = raw_stack_ptr.expose_provenance(); let start_info = StartFfiInfo { page_ptrs, stack_ptr }; + // SAFETY: We do not access machine memory past this point until the + // supervisor is ready to allow it. + unsafe { + if alloc.borrow_mut().prepare_ffi().is_err() { + // Don't mess up unwinding by maybe leaving the memory partly protected + alloc.borrow_mut().unprep_ffi(); + panic!("Cannot protect memory for FFI call!"); + } + } + // Send over the info. // NB: if we do not wait to receive a blank confirmation response, it is // possible that the supervisor is alerted of the SIGSTOP *before* it has // actually received the start_info, thus deadlocking! This way, we can - // enforce an ordering for these events + // enforce an ordering for these events. sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap(); sv.confirm_rx.recv().unwrap(); *sv_guard = Some(sv); // We need to be stopped for the supervisor to be able to make certain // modifications to our memory - simply waiting on the recv() doesn't - // count + // count. signal::raise(signal::SIGSTOP).unwrap(); (sv_guard, Some(raw_stack_ptr)) } @@ -90,7 +100,7 @@ impl Supervisor { /// one passed to it also. pub unsafe fn end_ffi( mut sv_guard: std::sync::MutexGuard<'static, Option>, - _alloc: Rc>, + alloc: Rc>, raw_stack_ptr: Option<*mut [u8; FAKE_STACK_SIZE]>, ) -> Option { // We can't use IPC channels here to signal that FFI mode has ended, @@ -99,19 +109,22 @@ impl Supervisor { // simpler and more robust to simply use the signals which are left for // arbitrary usage. Since this will block until we are continued by the // supervisor, we can assume past this point that everything is back to - // normal + // normal. signal::raise(signal::SIGUSR1).unwrap(); + // This is safe! It just sets memory to normal expected permissions. + alloc.borrow_mut().unprep_ffi(); + // If this is `None`, then `raw_stack_ptr` is None and does not need to // be deallocated (and there's no need to worry about the guard, since - // it contains nothing) + // it contains nothing). let sv = sv_guard.take()?; // SAFETY: Caller upholds that this pointer was allocated as a box with - // this type + // this type. unsafe { drop(Box::from_raw(raw_stack_ptr.unwrap())); } - // On the off-chance something really weird happens, don't block forever + // On the off-chance something really weird happens, don't block forever. let ret = sv .event_rx .try_recv_timeout(std::time::Duration::from_secs(5)) @@ -138,33 +151,34 @@ impl Supervisor { /// The invariants for `fork()` must be upheld by the caller. pub unsafe fn init_sv() -> Result<(), SvInitError> { // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the - // relevant missing bits + // relevant missing bits. // On Linux, this will check whether ptrace is fully disabled by the Yama module. // If Yama isn't running or we're not on Linux, we'll still error later, but - // this saves a very expensive fork call + // this saves a very expensive fork call. let ptrace_status = std::fs::read_to_string("/proc/sys/kernel/yama/ptrace_scope"); if let Ok(stat) = ptrace_status { if let Some(stat) = stat.chars().next() { - // Fast-error if ptrace is fully disabled on the system + // Fast-error if ptrace is fully disabled on the system. if stat == '3' { return Err(SvInitError); } } } - // Initialise the supervisor if it isn't already, placing it into SUPERVISOR + // Initialise the supervisor if it isn't already, placing it into SUPERVISOR. let mut lock = SUPERVISOR.lock().unwrap(); if lock.is_some() { return Ok(()); } - // Prepare the IPC channels we need + // Prepare the IPC channels we need. let (message_tx, message_rx) = ipc::channel().unwrap(); let (confirm_tx, confirm_rx) = ipc::channel().unwrap(); let (event_tx, event_rx) = ipc::channel().unwrap(); - // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error + // SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error. let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap(); + super::parent::PAGE_SIZE.store(page_size, std::sync::atomic::Ordering::Relaxed); unsafe { // TODO: Maybe use clone3() instead for better signalling of when the child exits? @@ -172,37 +186,36 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { match unistd::fork().unwrap() { unistd::ForkResult::Parent { child } => { // If somehow another thread does exist, prevent it from accessing the lock - // and thus breaking our safety invariants + // and thus breaking our safety invariants. std::mem::forget(lock); // The child process is free to unwind, so we won't to avoid doubly freeing - // system resources + // system resources. let init = std::panic::catch_unwind(|| { let listener = ChildListener { message_rx, attached: false, override_retcode: None }; - // Trace as many things as possible, to be able to handle them as needed + // Trace as many things as possible, to be able to handle them as needed. let options = ptrace::Options::PTRACE_O_TRACESYSGOOD | ptrace::Options::PTRACE_O_TRACECLONE | ptrace::Options::PTRACE_O_TRACEFORK; - // Attach to the child process without stopping it + // Attach to the child process without stopping it. match ptrace::seize(child, options) { // Ptrace works :D Ok(_) => { - let code = sv_loop(listener, child, event_tx, confirm_tx, page_size) - .unwrap_err(); + let code = sv_loop(listener, child, event_tx, confirm_tx).unwrap_err(); // If a return code of 0 is not explicitly given, assume something went - // wrong and return 1 + // wrong and return 1. std::process::exit(code.unwrap_or(1)) } - // Ptrace does not work and we failed to catch that + // Ptrace does not work and we failed to catch that. Err(_) => { - // If we can't ptrace, Miri continues being the parent + // If we can't ptrace, Miri continues being the parent. signal::kill(child, signal::SIGKILL).unwrap(); SvInitError } } }); match init { - // The "Ok" case means that we couldn't ptrace + // The "Ok" case means that we couldn't ptrace. Ok(e) => return Err(e), Err(p) => { eprintln!("Supervisor process panicked!\n{p:?}"); @@ -212,12 +225,12 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { } unistd::ForkResult::Child => { // Make sure we never get orphaned and stuck in SIGSTOP or similar - // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call + // SAFETY: prctl PR_SET_PDEATHSIG is always safe to call. let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM); assert_eq!(ret, 0); // First make sure the parent succeeded with ptracing us! signal::raise(signal::SIGSTOP).unwrap(); - // If we're the child process, save the supervisor info + // If we're the child process, save the supervisor info. *lock = Some(Supervisor { message_tx, confirm_rx, event_rx }); } } diff --git a/src/tools/miri/src/shims/trace/parent.rs b/src/tools/miri/src/shims/trace/parent.rs index 5eb8aa8465193..a6c19584ef630 100644 --- a/src/tools/miri/src/shims/trace/parent.rs +++ b/src/tools/miri/src/shims/trace/parent.rs @@ -1,16 +1,108 @@ +use std::sync::atomic::{AtomicPtr, AtomicUsize}; + use ipc_channel::ipc; use nix::sys::{ptrace, signal, wait}; use nix::unistd; -use super::StartFfiInfo; -use super::messages::{Confirmation, MemEvents, TraceRequest}; +use crate::shims::trace::messages::{Confirmation, MemEvents, TraceRequest}; +use crate::shims::trace::{AccessEvent, FAKE_STACK_SIZE, StartFfiInfo}; /// The flags to use when calling `waitid()`. -/// Since bitwise OR on the nix version of these flags is implemented as a trait, -/// we can't use them directly so we do it this way +/// Since bitwise or on the nix version of these flags is implemented as a trait, +/// this cannot be const directly so we do it this way. const WAIT_FLAGS: wait::WaitPidFlag = wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED); +/// Arch-specific maximum size a single access might perform. x86 value is set +/// assuming nothing bigger than AVX-512 is available. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +const ARCH_MAX_ACCESS_SIZE: usize = 64; +/// The largest arm64 simd instruction operates on 16 bytes. +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +const ARCH_MAX_ACCESS_SIZE: usize = 16; +/// The max riscv vector instruction can access 8 consecutive 32-bit values. +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +const ARCH_MAX_ACCESS_SIZE: usize = 32; + +/// The default word size on a given platform, in bytes. +#[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "riscv32"))] +const ARCH_WORD_SIZE: usize = 4; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "riscv64"))] +const ARCH_WORD_SIZE: usize = 8; + +/// The address of the page set to be edited, initialised to a sentinel null +/// pointer. +static PAGE_ADDR: AtomicPtr = AtomicPtr::new(std::ptr::null_mut()); +/// The host pagesize, initialised to a sentinel zero value. +pub static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0); +/// How many consecutive pages to unprotect. 1 by default, unlikely to be set +/// higher than 2. +static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1); + +/// Allows us to get common arguments from the `user_regs_t` across architectures. +/// Normally this would land us ABI hell, but thankfully all of our usecases +/// consist of functions with a small number of register-sized integer arguments. +/// See for sources. +trait ArchIndependentRegs { + /// Gets the address of the instruction pointer. + fn ip(&self) -> usize; + /// Set the instruction pointer; remember to also set the stack pointer, or + /// else the stack might get messed up! + fn set_ip(&mut self, ip: usize); + /// Set the stack pointer, ideally to a zeroed-out area. + fn set_sp(&mut self, sp: usize); +} + +// It's fine / desirable behaviour for values to wrap here, we care about just +// preserving the bit pattern. +#[cfg(target_arch = "x86_64")] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.rip as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.rip = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ } +} + +#[cfg(target_arch = "x86")] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.eip as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.eip = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.esp = sp as _ } +} + +#[cfg(target_arch = "aarch64")] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.pc as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.pc = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.sp = sp as _ } +} + +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +#[expect(clippy::as_conversions)] +#[rustfmt::skip] +impl ArchIndependentRegs for libc::user_regs_struct { + #[inline] + fn ip(&self) -> usize { self.pc as _ } + #[inline] + fn set_ip(&mut self, ip: usize) { self.pc = ip as _ } + #[inline] + fn set_sp(&mut self, sp: usize) { self.sp = sp as _ } +} + /// A unified event representing something happening on the child process. Wraps /// `nix`'s `WaitStatus` and our custom signals so it can all be done with one /// `match` statement. @@ -22,7 +114,7 @@ pub enum ExecEvent { End, /// The child process with the specified pid was stopped by the given signal. Status(unistd::Pid, signal::Signal), - /// The child process with the specified pid entered or exited a syscall. + /// The child process with the specified pid entered or existed a syscall. Syscall(unistd::Pid), /// A child process exited or was killed; if we have a return code, it is /// specified. @@ -42,10 +134,10 @@ pub struct ChildListener { impl Iterator for ChildListener { type Item = ExecEvent; - // Allows us to monitor the child process by just iterating over the listener + // Allows us to monitor the child process by just iterating over the listener. // NB: This should never return None! fn next(&mut self) -> Option { - // Do not block if the child has nothing to report for `waitid` + // Do not block if the child has nothing to report for `waitid`. let opts = WAIT_FLAGS | wait::WaitPidFlag::WNOHANG; loop { // Listen to any child, not just the main one. Important if we want @@ -55,17 +147,17 @@ impl Iterator for ChildListener { match wait::waitid(wait::Id::All, opts) { Ok(stat) => match stat { - // Child exited normally with a specific code set + // Child exited normally with a specific code set. wait::WaitStatus::Exited(_, code) => { let code = self.override_retcode.unwrap_or(code); return Some(ExecEvent::Died(Some(code))); } - // Child was killed by a signal, without giving a code + // Child was killed by a signal, without giving a code. wait::WaitStatus::Signaled(_, _, _) => return Some(ExecEvent::Died(self.override_retcode)), // Child entered a syscall. Since we're always technically // tracing, only pass this along if we're actively - // monitoring the child + // monitoring the child. wait::WaitStatus::PtraceSyscall(pid) => if self.attached { return Some(ExecEvent::Syscall(pid)); @@ -84,11 +176,11 @@ impl Iterator for ChildListener { return Some(ExecEvent::Status(pid, signal)); } } else { - // Just pass along the signal + // Just pass along the signal. ptrace::cont(pid, signal).unwrap(); }, // Child was stopped at the given signal. Same logic as for - // WaitStatus::PtraceEvent + // WaitStatus::PtraceEvent. wait::WaitStatus::Stopped(pid, signal) => if self.attached { if signal == signal::SIGUSR1 { @@ -104,11 +196,11 @@ impl Iterator for ChildListener { }, // This case should only trigger if all children died and we // somehow missed that, but it's best we not allow any room - // for deadlocks + // for deadlocks. Err(_) => return Some(ExecEvent::Died(None)), } - // Similarly, do a non-blocking poll of the IPC channel + // Similarly, do a non-blocking poll of the IPC channel. if let Ok(req) = self.message_rx.try_recv() { match req { TraceRequest::StartFfi(info) => @@ -123,7 +215,7 @@ impl Iterator for ChildListener { } } - // Not ideal, but doing anything else might sacrifice performance + // Not ideal, but doing anything else might sacrifice performance. std::thread::yield_now(); } } @@ -134,6 +226,8 @@ impl Iterator for ChildListener { enum ExecError { /// The child process died with this return code, if we have one. Died(Option), + /// Something errored, but we should ignore it and proceed. + Shrug, } /// This is the main loop of the supervisor process. It runs in a separate @@ -144,35 +238,42 @@ pub fn sv_loop( init_pid: unistd::Pid, event_tx: ipc::IpcSender, confirm_tx: ipc::IpcSender, - _page_size: usize, ) -> Result> { - // Things that we return to the child process + // Get the pagesize set and make sure it isn't still on the zero sentinel value! + let page_size = PAGE_SIZE.load(std::sync::atomic::Ordering::Relaxed); + assert_ne!(page_size, 0); + + // Things that we return to the child process. let mut acc_events = Vec::new(); - // Memory allocated on the MiriMachine - let mut _ch_pages = Vec::new(); - let mut _ch_stack = None; + // Memory allocated for the MiriMachine. + let mut ch_pages = Vec::new(); + let mut ch_stack = None; + + // An instance of the Capstone disassembler, so we don't spawn one on every access. + let cs = get_disasm(); // The pid of the last process we interacted with, used by default if we don't have a - // reason to use a different one + // reason to use a different one. let mut curr_pid = init_pid; - // There's an initial sigstop we need to deal with + // There's an initial sigstop we need to deal with. wait_for_signal(Some(curr_pid), signal::SIGSTOP, false).map_err(|e| { match e { ExecError::Died(code) => code, + ExecError::Shrug => None, } })?; ptrace::cont(curr_pid, None).unwrap(); for evt in listener { match evt { - // start_ffi was called by the child, so prep memory + // start_ffi was called by the child, so prep memory. ExecEvent::Start(ch_info) => { - // All the pages that the child process is "allowed to" access - _ch_pages = ch_info.page_ptrs; - // And the fake stack it allocated for us to use later - _ch_stack = Some(ch_info.stack_ptr); + // All the pages that the child process is "allowed to" access. + ch_pages = ch_info.page_ptrs; + // And the fake stack it allocated for us to use later. + ch_stack = Some(ch_info.stack_ptr); // We received the signal and are no longer in the main listener loop, // so we can let the child move on to the end of start_ffi where it will @@ -180,34 +281,56 @@ pub fn sv_loop( // order to do most ptrace operations! confirm_tx.send(Confirmation).unwrap(); // We can't trust simply calling `Pid::this()` in the child process to give the right - // PID for us, so we get it this way + // PID for us, so we get it this way. curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap(); ptrace::syscall(curr_pid, None).unwrap(); } - // end_ffi was called by the child + // end_ffi was called by the child. ExecEvent::End => { - // Hand over the access info we traced + // Hand over the access info we traced. event_tx.send(MemEvents { acc_events }).unwrap(); - // And reset our values + // And reset our values. acc_events = Vec::new(); - _ch_stack = None; + ch_stack = None; - // No need to monitor syscalls anymore, they'd just be ignored + // No need to monitor syscalls anymore, they'd just be ignored. ptrace::cont(curr_pid, None).unwrap(); } // Child process was stopped by a signal - ExecEvent::Status(pid, signal) => { - eprintln!("Process unexpectedly got {signal}; continuing..."); - // In case we're not tracing - if ptrace::syscall(pid, signal).is_err() { - // If *this* fails too, something really weird happened - // and it's probably best to just panic - signal::kill(pid, signal::SIGCONT).unwrap(); - } - } + ExecEvent::Status(pid, signal) => + match signal { + // If it was a segfault, check if it was an artificial one + // caused by it trying to access the MiriMachine memory. + signal::SIGSEGV => + match handle_segfault( + pid, + &ch_pages, + ch_stack.unwrap(), + page_size, + &cs, + &mut acc_events, + ) { + Err(e) => + match e { + ExecError::Died(code) => return Err(code), + ExecError::Shrug => continue, + }, + _ => (), + }, + // Something weird happened. + _ => { + eprintln!("Process unexpectedly got {signal}; continuing..."); + // In case we're not tracing + if ptrace::syscall(pid, None).is_err() { + // If *this* fails too, something really weird happened + // and it's probably best to just panic. + signal::kill(pid, signal::SIGCONT).unwrap(); + } + } + }, // Child entered a syscall; we wait for exits inside of this, so it - // should never trigger on return from a syscall we care about + // should never trigger on return from a syscall we care about. ExecEvent::Syscall(pid) => { ptrace::syscall(pid, None).unwrap(); } @@ -220,6 +343,30 @@ pub fn sv_loop( unreachable!() } +/// Spawns a Capstone disassembler for the host architecture. +#[rustfmt::skip] +fn get_disasm() -> capstone::Capstone { + use capstone::prelude::*; + let cs_pre = Capstone::new(); + { + #[cfg(target_arch = "x86_64")] + {cs_pre.x86().mode(arch::x86::ArchMode::Mode64)} + #[cfg(target_arch = "x86")] + {cs_pre.x86().mode(arch::x86::ArchMode::Mode32)} + #[cfg(target_arch = "aarch64")] + {cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)} + #[cfg(target_arch = "arm")] + {cs_pre.arm().mode(arch::arm::ArchMode::Arm)} + #[cfg(target_arch = "riscv64")] + {cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV64)} + #[cfg(target_arch = "riscv32")] + {cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV32)} + } + .detail(true) + .build() + .unwrap() +} + /// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`. /// We want to avoid that in some cases, like at the beginning of FFI. /// @@ -232,7 +379,7 @@ fn wait_for_signal( if init_cont { ptrace::cont(pid.unwrap(), None).unwrap(); } - // Repeatedly call `waitid` until we get the signal we want, or the process dies + // Repeatedly call `waitid` until we get the signal we want, or the process dies. loop { let wait_id = match pid { Some(pid) => wait::Id::Pid(pid), @@ -240,7 +387,7 @@ fn wait_for_signal( }; let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; let (signal, pid) = match stat { - // Report the cause of death, if we know it + // Report the cause of death, if we know it. wait::WaitStatus::Exited(_, code) => { return Err(ExecError::Died(Some(code))); } @@ -248,7 +395,7 @@ fn wait_for_signal( wait::WaitStatus::Stopped(pid, signal) => (signal, pid), wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid), // This covers PtraceSyscall and variants that are impossible with - // the flags set (e.g. WaitStatus::StillAlive) + // the flags set (e.g. WaitStatus::StillAlive). _ => { ptrace::cont(pid.unwrap(), None).unwrap(); continue; @@ -257,7 +404,300 @@ fn wait_for_signal( if signal == wait_signal { return Ok(pid); } else { - ptrace::cont(pid, None).map_err(|_| ExecError::Died(None))?; + ptrace::cont(pid, signal).map_err(|_| ExecError::Died(None))?; + } + } +} + +/// Grabs the access that caused a segfault and logs it down if it's to our memory, +/// or kills the child and returns the appropriate error otherwise. +fn handle_segfault( + pid: unistd::Pid, + ch_pages: &[usize], + ch_stack: usize, + page_size: usize, + cs: &capstone::Capstone, + acc_events: &mut Vec, +) -> Result<(), ExecError> { + /// This is just here to not pollute the main namespace with `capstone::prelude::*`. + #[inline] + fn capstone_disassemble( + instr: &[u8], + addr: usize, + cs: &capstone::Capstone, + acc_events: &mut Vec, + ) -> capstone::CsResult<()> { + use capstone::prelude::*; + + // The arch_detail is what we care about, but it relies on these temporaries + // that we can't drop. 0x1000 is the default base address for Captsone, and + // we're expecting 1 instruction. + let insns = cs.disasm_count(instr, 0x1000, 1)?; + let ins_detail = cs.insn_detail(&insns[0])?; + let arch_detail = ins_detail.arch_detail(); + + for op in arch_detail.operands() { + match op { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + arch::ArchOperand::X86Operand(x86_operand) => { + match x86_operand.op_type { + // We only care about memory accesses + arch::x86::X86OperandType::Mem(_) => { + let push = addr..addr.strict_add(usize::from(x86_operand.size)); + // It's called a "RegAccessType" but it also applies to memory + let acc_ty = x86_operand.access.unwrap(); + if acc_ty.is_readable() { + acc_events.push(AccessEvent::Read(push.clone())); + } + if acc_ty.is_writable() { + acc_events.push(AccessEvent::Write(push)); + } + } + _ => (), + } + } + #[cfg(target_arch = "aarch64")] + arch::ArchOperand::Arm64Operand(arm64_operand) => { + // Annoyingly, we don't always get the size here, so just be pessimistic for now. + match arm64_operand.op_type { + arch::arm64::Arm64OperandType::Mem(_) => { + // B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes. + let size = match arm64_operand.vas { + // Not an fp/simd instruction. + arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE, + // 1 byte. + arch::arm64::Arm64Vas::ARM64_VAS_1B => 1, + // 2 bytes. + arch::arm64::Arm64Vas::ARM64_VAS_1H => 2, + // 4 bytes. + arch::arm64::Arm64Vas::ARM64_VAS_4B + | arch::arm64::Arm64Vas::ARM64_VAS_2H + | arch::arm64::Arm64Vas::ARM64_VAS_1S => 4, + // 8 bytes. + arch::arm64::Arm64Vas::ARM64_VAS_8B + | arch::arm64::Arm64Vas::ARM64_VAS_4H + | arch::arm64::Arm64Vas::ARM64_VAS_2S + | arch::arm64::Arm64Vas::ARM64_VAS_1D => 8, + // 16 bytes. + arch::arm64::Arm64Vas::ARM64_VAS_16B + | arch::arm64::Arm64Vas::ARM64_VAS_8H + | arch::arm64::Arm64Vas::ARM64_VAS_4S + | arch::arm64::Arm64Vas::ARM64_VAS_2D + | arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16, + }; + let push = addr..addr.strict_add(size); + // FIXME: This now has access type info in the latest + // git version of capstone because this pissed me off + // and I added it. Change this when it updates. + acc_events.push(AccessEvent::Read(push.clone())); + acc_events.push(AccessEvent::Write(push)); + } + _ => (), + } + } + #[cfg(target_arch = "arm")] + arch::ArchOperand::ArmOperand(arm_operand) => + match arm_operand.op_type { + arch::arm::ArmOperandType::Mem(_) => { + // We don't get info on the size of the access, but + // we're at least told if it's a vector instruction. + let size = if arm_operand.vector_index.is_some() { + ARCH_MAX_ACCESS_SIZE + } else { + ARCH_WORD_SIZE + }; + let push = addr..addr.strict_add(size); + let acc_ty = arm_operand.access.unwrap(); + if acc_ty.is_readable() { + acc_events.push(AccessEvent::Read(push.clone())); + } + if acc_ty.is_writable() { + acc_events.push(AccessEvent::Write(push)); + } + } + _ => (), + }, + #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] + arch::ArchOperand::RiscVOperand(risc_voperand) => { + match risc_voperand { + arch::riscv::RiscVOperand::Mem(_) => { + // We get basically no info here. + let push = addr..addr.strict_add(size); + acc_events.push(AccessEvent::Read(push.clone())); + acc_events.push(AccessEvent::Write(push)); + } + _ => (), + } + } + _ => unimplemented!(), + } + } + + Ok(()) + } + + // Get information on what caused the segfault. This contains the address + // that triggered it. + let siginfo = ptrace::getsiginfo(pid).map_err(|_| ExecError::Shrug)?; + // All x86, ARM, etc. instructions only have at most one memory operand + // (thankfully!) + // SAFETY: si_addr is safe to call. + let addr = unsafe { siginfo.si_addr().addr() }; + let page_addr = addr.strict_sub(addr.strict_rem(page_size)); + + if ch_pages.iter().any(|pg| (*pg..pg.strict_add(page_size)).contains(&addr)) { + // Overall structure: + // - Get the address that caused the segfault + // - Unprotect the memory + // - Step 1 instruction + // - Parse executed code to estimate size & type of access + // - Reprotect the memory + // - Continue + + // Ensure the stack is properly zeroed out! + for a in (ch_stack..ch_stack.strict_add(page_size)).step_by(ARCH_WORD_SIZE) { + ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); + } + + // Guard against both architectures with upwards and downwards-growing stacks. + let stack_ptr = ch_stack.strict_add(FAKE_STACK_SIZE / 2); + let regs_bak = ptrace::getregs(pid).unwrap(); + let mut new_regs = regs_bak; + let ip_prestep = regs_bak.ip(); + + // Move the instr ptr into the deprotection code. + #[expect(clippy::as_conversions)] + new_regs.set_ip(mempr_off as usize); + // Don't mess up the stack by accident! + new_regs.set_sp(stack_ptr); + + // Modify the PAGE_ADDR global on the child process to point to the page + // that we want unprotected. + ptrace::write( + pid, + (&raw const PAGE_ADDR).cast_mut().cast(), + libc::c_long::try_from(page_addr).unwrap(), + ) + .unwrap(); + + // Check if we also own the next page, and if so unprotect it in case + // the access spans the page boundary. + if ch_pages.contains(&page_addr.strict_add(page_size)) { + ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), 2).unwrap(); + } else { + ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), 1).unwrap(); } + + ptrace::setregs(pid, new_regs).unwrap(); + + // Our mempr_* functions end with a raise(SIGSTOP). + wait_for_signal(Some(pid), signal::SIGSTOP, true)?; + + // Step 1 instruction. + ptrace::setregs(pid, regs_bak).unwrap(); + ptrace::step(pid, None).unwrap(); + // Don't use wait_for_signal here since 1 instruction doesn't give room + // for any uncertainty + we don't want it `cont()`ing randomly by accident + // Also, don't let it continue with unprotected memory if something errors! + let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + + // Save registers and grab the bytes that were executed. This would + // be really nasty if it was a jump or similar but those thankfully + // won't do memory accesses and so can't trigger this! + let regs_bak = ptrace::getregs(pid).unwrap(); + new_regs = regs_bak; + let ip_poststep = regs_bak.ip(); + // We need to do reads/writes in word-sized chunks. + let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE); + let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| { + // This only needs to be a valid pointer in the child process, not ours. + ret.append( + &mut ptrace::read(pid, std::ptr::without_provenance_mut(ip)) + .unwrap() + .to_ne_bytes() + .to_vec(), + ); + ret + }); + + // Now figure out the size + type of access and log it down + // This will mark down e.g. the same area being read multiple times, + // since it's more efficient to compress the accesses at the end. + if capstone_disassemble(&instr, addr, cs, acc_events).is_err() { + // Read goes first because we need to be pessimistic. + acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); + acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); + } + + // Reprotect everything and continue. + #[expect(clippy::as_conversions)] + new_regs.set_ip(mempr_on as usize); + new_regs.set_sp(stack_ptr); + ptrace::setregs(pid, new_regs).unwrap(); + wait_for_signal(Some(pid), signal::SIGSTOP, true)?; + + ptrace::setregs(pid, regs_bak).unwrap(); + ptrace::syscall(pid, None).unwrap(); + Ok(()) + } else { + // This was a real segfault, so print some debug info and quit. + let regs = ptrace::getregs(pid).unwrap(); + eprintln!("Segfault occurred during FFI at {addr:#018x}"); + eprintln!("Expected access on pages: {ch_pages:#018x?}"); + eprintln!("Register dump: {regs:#x?}"); + ptrace::kill(pid).unwrap(); + Err(ExecError::Died(None)) + } +} + +// We only get dropped into these functions via offsetting the instr pointer +// manually, so we *must not ever* unwind from them. + +/// Disables protections on the page whose address is currently in `PAGE_ADDR`. +/// +/// SAFETY: `PAGE_ADDR` should be set to a page-aligned pointer to an owned page, +/// `PAGE_SIZE` should be the host pagesize, and the range from `PAGE_ADDR` to +/// `PAGE_SIZE` * `PAGE_COUNT` must be owned and allocated memory. No other threads +/// should be running. +pub unsafe extern "C" fn mempr_off() { + use std::sync::atomic::Ordering; + + // Again, cannot allow unwinds to happen here. + let len = PAGE_SIZE.load(Ordering::Relaxed).saturating_mul(PAGE_COUNT.load(Ordering::Relaxed)); + // SAFETY: Upheld by "caller". + unsafe { + // It's up to the caller to make sure this doesn't actually overflow, but + // we mustn't unwind from here, so... + if libc::mprotect( + PAGE_ADDR.load(Ordering::Relaxed).cast(), + len, + libc::PROT_READ | libc::PROT_WRITE, + ) != 0 + { + // Can't return or unwind, but we can do this. + std::process::exit(-1); + } + } + // If this fails somehow we're doomed. + if signal::raise(signal::SIGSTOP).is_err() { + std::process::exit(-1); + } +} + +/// Reenables protection on the page set by `PAGE_ADDR`. +/// +/// SAFETY: See `mempr_off()`. +pub unsafe extern "C" fn mempr_on() { + use std::sync::atomic::Ordering; + + let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed)); + // SAFETY: Upheld by "caller". + unsafe { + if libc::mprotect(PAGE_ADDR.load(Ordering::Relaxed).cast(), len, libc::PROT_NONE) != 0 { + std::process::exit(-1); + } + } + if signal::raise(signal::SIGSTOP).is_err() { + std::process::exit(-1); } } From d20f3a83c2dcd36904a0c45d17be1a9d4747deae Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Thu, 19 Jun 2025 15:47:47 +0200 Subject: [PATCH 11/38] fix dumb mistake --- src/tools/miri/src/shims/trace/parent.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tools/miri/src/shims/trace/parent.rs b/src/tools/miri/src/shims/trace/parent.rs index a6c19584ef630..d00d77b7b0fcd 100644 --- a/src/tools/miri/src/shims/trace/parent.rs +++ b/src/tools/miri/src/shims/trace/parent.rs @@ -555,7 +555,7 @@ fn handle_segfault( // - Continue // Ensure the stack is properly zeroed out! - for a in (ch_stack..ch_stack.strict_add(page_size)).step_by(ARCH_WORD_SIZE) { + for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); } @@ -601,6 +601,11 @@ fn handle_segfault( // Also, don't let it continue with unprotected memory if something errors! let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + // Zero out again to be safe + for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { + ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); + } + // Save registers and grab the bytes that were executed. This would // be really nasty if it was a jump or similar but those thankfully // won't do memory accesses and so can't trigger this! From 6a0976a5211b4132c079475d79e33bf8ebabc832 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Fri, 20 Jun 2025 22:14:25 +0200 Subject: [PATCH 12/38] cfg if --- src/tools/miri/src/shims/native_lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/tools/miri/src/shims/native_lib.rs b/src/tools/miri/src/shims/native_lib.rs index 3f47b440f7d0c..033e4f3671620 100644 --- a/src/tools/miri/src/shims/native_lib.rs +++ b/src/tools/miri/src/shims/native_lib.rs @@ -232,12 +232,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let (ret, maybe_memevents) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; - #[cfg(target_os = "linux")] - if let Some(events) = maybe_memevents { + if cfg!(target_os = "linux") && let Some(events) = maybe_memevents { trace!("Registered FFI events:\n{events:#0x?}"); } - #[cfg(not(target_os = "linux"))] - let _ = maybe_memevents; // Suppress the unused warning. this.write_immediate(*ret, dest)?; interp_ok(true) From a536d89775c46192d4d68f0fa10f4e190b325d51 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Fri, 20 Jun 2025 22:25:17 +0200 Subject: [PATCH 13/38] nonnulls --- src/tools/miri/src/alloc/isolated_alloc.rs | 40 ++++++++++------------ 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index 0964d69a456bc..0b3218832b61f 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -1,4 +1,4 @@ -use std::alloc::Layout; +use std::{alloc::Layout, ptr::NonNull}; use nix::sys::mman; use rustc_index::bit_set::DenseBitSet; @@ -13,7 +13,7 @@ pub struct IsolatedAlloc { /// Pointers to page-aligned memory that has been claimed by the allocator. /// Every pointer here must point to a page-sized allocation claimed via /// mmap. These pointers are used for "small" allocations. - page_ptrs: Vec<*mut u8>, + page_ptrs: Vec>, /// Metadata about which bytes have been allocated on each page. The length /// of this vector must be the same as that of `page_ptrs`, and the domain /// size of the bitset must be exactly `page_size / COMPRESSION_FACTOR`. @@ -25,7 +25,7 @@ pub struct IsolatedAlloc { page_infos: Vec>, /// Pointers to multiple-page-sized allocations. These must also be page-aligned, /// with their size stored as the second element of the vector. - huge_ptrs: Vec<(*mut u8, usize)>, + huge_ptrs: Vec<(NonNull, usize)>, /// The host (not emulated) page size. page_size: usize, } @@ -138,7 +138,7 @@ impl IsolatedAlloc { unsafe fn alloc_small( page_size: usize, layout: Layout, - page: *mut u8, + page: NonNull, pinfo: &mut DenseBitSet, zeroed: bool, ) -> Option<*mut u8> { @@ -165,7 +165,7 @@ impl IsolatedAlloc { // zero out, even if we allocated more ptr.write_bytes(0, layout.size()); } - return Some(ptr); + return Some(ptr.as_ptr()); } } } @@ -173,7 +173,7 @@ impl IsolatedAlloc { } /// Expands the available memory pool by adding one page. - fn add_page(&mut self) -> (*mut u8, &mut DenseBitSet) { + fn add_page(&mut self) -> (NonNull, &mut DenseBitSet) { // SAFETY: mmap is always safe to call when requesting anonymous memory let page_ptr = unsafe { libc::mmap( @@ -190,8 +190,8 @@ impl IsolatedAlloc { // `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page. assert!(self.page_size % COMPRESSION_FACTOR == 0); self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR)); - self.page_ptrs.push(page_ptr); - (page_ptr, self.page_infos.last_mut().unwrap()) + self.page_ptrs.push(NonNull::new(page_ptr).unwrap()); + (NonNull::new(page_ptr).unwrap(), self.page_infos.last_mut().unwrap()) } /// Allocates in multiples of one page on the host system. @@ -213,7 +213,7 @@ impl IsolatedAlloc { .cast::() }; assert_ne!(ret.addr(), usize::MAX, "mmap failed"); - self.huge_ptrs.push((ret, size)); + self.huge_ptrs.push((NonNull::new(ret).unwrap(), size)); // huge_normalized_layout ensures that we've overallocated enough space // for this to be valid. ret.map_addr(|a| a.next_multiple_of(layout.align())) @@ -247,7 +247,7 @@ impl IsolatedAlloc { // from us pointing to this page, and we know it was allocated // in add_page as exactly a single page. unsafe { - assert_eq!(libc::munmap(page_ptr.cast(), self.page_size), 0); + assert_eq!(libc::munmap(page_ptr.as_ptr().cast(), self.page_size), 0); } } } @@ -266,7 +266,7 @@ impl IsolatedAlloc { // This could be made faster if the list was sorted -- the allocator isn't fully optimized at the moment. let pinfo = std::iter::zip(&mut self.page_ptrs, &mut self.page_infos) .enumerate() - .find(|(_, (page, _))| page.addr() == page_addr); + .find(|(_, (page, _))| page.addr().get() == page_addr); let Some((idx_of_pinfo, (_, pinfo))) = pinfo else { panic!("Freeing in an unallocated page: {ptr:?}\nHolding pages {:?}", self.page_ptrs) }; @@ -288,7 +288,7 @@ impl IsolatedAlloc { .huge_ptrs .iter() .position(|&(pg, size)| { - pg.addr() <= ptr.addr() && ptr.addr() < pg.addr().strict_add(size) + pg.addr().get() <= ptr.addr() && ptr.addr() < pg.addr().get().strict_add(size) }) .expect("Freeing unallocated pages"); // And kick it from the list @@ -296,18 +296,18 @@ impl IsolatedAlloc { assert_eq!(size, size2, "got wrong layout in dealloc"); // SAFETY: huge_ptrs contains allocations made with mmap with the size recorded there. unsafe { - let ret = libc::munmap(un_offset_ptr.cast(), size); + let ret = libc::munmap(un_offset_ptr.as_ptr().cast(), size); assert_eq!(ret, 0); } } /// Returns a vector of page addresses managed by the allocator. pub fn pages(&self) -> Vec { - let mut pages: Vec<_> = - self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance()).collect(); + let mut pages: Vec = + self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance().get()).collect(); self.huge_ptrs.iter().for_each(|(ptr, size)| { for i in 0..size / self.page_size { - pages.push(ptr.expose_provenance().strict_add(i * self.page_size)); + pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size)); } }); pages @@ -339,16 +339,12 @@ impl IsolatedAlloc { unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> { for &pg in &self.page_ptrs { unsafe { - // We already know only non-null ptrs are pushed to self.pages - let addr: std::ptr::NonNull = - std::ptr::NonNull::new_unchecked(pg.cast()); - mman::mprotect(addr, self.page_size, prot)?; + mman::mprotect(pg.cast(), self.page_size, prot)?; } } for &(hpg, size) in &self.huge_ptrs { unsafe { - let addr = std::ptr::NonNull::new_unchecked(hpg.cast()); - mman::mprotect(addr, size.next_multiple_of(self.page_size), prot)?; + mman::mprotect(hpg.cast(), size.next_multiple_of(self.page_size), prot)?; } } Ok(()) From d59518d9ba15faf698a41f0235df35e55412b69f Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Fri, 20 Jun 2025 22:31:11 +0200 Subject: [PATCH 14/38] error rework --- src/tools/miri/src/shims/trace/child.rs | 2 +- src/tools/miri/src/shims/trace/parent.rs | 48 ++++++++---------------- 2 files changed, 17 insertions(+), 33 deletions(-) diff --git a/src/tools/miri/src/shims/trace/child.rs b/src/tools/miri/src/shims/trace/child.rs index 98fb1e3b9fe1d..c320537b94495 100644 --- a/src/tools/miri/src/shims/trace/child.rs +++ b/src/tools/miri/src/shims/trace/child.rs @@ -204,7 +204,7 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> { let code = sv_loop(listener, child, event_tx, confirm_tx).unwrap_err(); // If a return code of 0 is not explicitly given, assume something went // wrong and return 1. - std::process::exit(code.unwrap_or(1)) + std::process::exit(code.0.unwrap_or(1)) } // Ptrace does not work and we failed to catch that. Err(_) => { diff --git a/src/tools/miri/src/shims/trace/parent.rs b/src/tools/miri/src/shims/trace/parent.rs index d00d77b7b0fcd..2151a7726b4d7 100644 --- a/src/tools/miri/src/shims/trace/parent.rs +++ b/src/tools/miri/src/shims/trace/parent.rs @@ -222,13 +222,9 @@ impl Iterator for ChildListener { } /// An error came up while waiting on the child process to do something. +/// It likely died, with this return code if we have one. #[derive(Debug)] -enum ExecError { - /// The child process died with this return code, if we have one. - Died(Option), - /// Something errored, but we should ignore it and proceed. - Shrug, -} +pub struct ExecEnd(pub Option); /// This is the main loop of the supervisor process. It runs in a separate /// process from the rest of Miri (but because we fork, addresses for anything @@ -238,7 +234,7 @@ pub fn sv_loop( init_pid: unistd::Pid, event_tx: ipc::IpcSender, confirm_tx: ipc::IpcSender, -) -> Result> { +) -> Result { // Get the pagesize set and make sure it isn't still on the zero sentinel value! let page_size = PAGE_SIZE.load(std::sync::atomic::Ordering::Relaxed); assert_ne!(page_size, 0); @@ -258,12 +254,7 @@ pub fn sv_loop( let mut curr_pid = init_pid; // There's an initial sigstop we need to deal with. - wait_for_signal(Some(curr_pid), signal::SIGSTOP, false).map_err(|e| { - match e { - ExecError::Died(code) => code, - ExecError::Shrug => None, - } - })?; + wait_for_signal(Some(curr_pid), signal::SIGSTOP, false)?; ptrace::cont(curr_pid, None).unwrap(); for evt in listener { @@ -303,21 +294,14 @@ pub fn sv_loop( // If it was a segfault, check if it was an artificial one // caused by it trying to access the MiriMachine memory. signal::SIGSEGV => - match handle_segfault( + handle_segfault( pid, &ch_pages, ch_stack.unwrap(), page_size, &cs, &mut acc_events, - ) { - Err(e) => - match e { - ExecError::Died(code) => return Err(code), - ExecError::Shrug => continue, - }, - _ => (), - }, + )?, // Something weird happened. _ => { eprintln!("Process unexpectedly got {signal}; continuing..."); @@ -335,7 +319,7 @@ pub fn sv_loop( ptrace::syscall(pid, None).unwrap(); } ExecEvent::Died(code) => { - return Err(code); + return Err(ExecEnd(code)); } } } @@ -375,7 +359,7 @@ fn wait_for_signal( pid: Option, wait_signal: signal::Signal, init_cont: bool, -) -> Result { +) -> Result { if init_cont { ptrace::cont(pid.unwrap(), None).unwrap(); } @@ -385,13 +369,13 @@ fn wait_for_signal( Some(pid) => wait::Id::Pid(pid), None => wait::Id::All, }; - let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecEnd(None))?; let (signal, pid) = match stat { // Report the cause of death, if we know it. wait::WaitStatus::Exited(_, code) => { - return Err(ExecError::Died(Some(code))); + return Err(ExecEnd(Some(code))); } - wait::WaitStatus::Signaled(_, _, _) => return Err(ExecError::Died(None)), + wait::WaitStatus::Signaled(_, _, _) => return Err(ExecEnd(None)), wait::WaitStatus::Stopped(pid, signal) => (signal, pid), wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid), // This covers PtraceSyscall and variants that are impossible with @@ -404,7 +388,7 @@ fn wait_for_signal( if signal == wait_signal { return Ok(pid); } else { - ptrace::cont(pid, signal).map_err(|_| ExecError::Died(None))?; + ptrace::cont(pid, signal).map_err(|_| ExecEnd(None))?; } } } @@ -418,7 +402,7 @@ fn handle_segfault( page_size: usize, cs: &capstone::Capstone, acc_events: &mut Vec, -) -> Result<(), ExecError> { +) -> Result<(), ExecEnd> { /// This is just here to not pollute the main namespace with `capstone::prelude::*`. #[inline] fn capstone_disassemble( @@ -538,7 +522,7 @@ fn handle_segfault( // Get information on what caused the segfault. This contains the address // that triggered it. - let siginfo = ptrace::getsiginfo(pid).map_err(|_| ExecError::Shrug)?; + let siginfo = ptrace::getsiginfo(pid).unwrap(); // All x86, ARM, etc. instructions only have at most one memory operand // (thankfully!) // SAFETY: si_addr is safe to call. @@ -599,7 +583,7 @@ fn handle_segfault( // Don't use wait_for_signal here since 1 instruction doesn't give room // for any uncertainty + we don't want it `cont()`ing randomly by accident // Also, don't let it continue with unprotected memory if something errors! - let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecError::Died(None))?; + let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecEnd(None))?; // Zero out again to be safe for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { @@ -651,7 +635,7 @@ fn handle_segfault( eprintln!("Expected access on pages: {ch_pages:#018x?}"); eprintln!("Register dump: {regs:#x?}"); ptrace::kill(pid).unwrap(); - Err(ExecError::Died(None)) + Err(ExecEnd(None)) } } From 02df2b11b647278067321723ce091d82277943c2 Mon Sep 17 00:00:00 2001 From: Nia Date: Fri, 20 Jun 2025 22:31:30 +0200 Subject: [PATCH 15/38] Update src/shims/trace/parent.rs Co-authored-by: Oli Scherer --- src/tools/miri/src/shims/trace/parent.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/tools/miri/src/shims/trace/parent.rs b/src/tools/miri/src/shims/trace/parent.rs index 2151a7726b4d7..84ea6a10061dc 100644 --- a/src/tools/miri/src/shims/trace/parent.rs +++ b/src/tools/miri/src/shims/trace/parent.rs @@ -566,11 +566,12 @@ fn handle_segfault( // Check if we also own the next page, and if so unprotect it in case // the access spans the page boundary. - if ch_pages.contains(&page_addr.strict_add(page_size)) { - ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), 2).unwrap(); + let flag = if ch_pages.contains(&page_addr.strict_add(page_size)) { + 2 } else { - ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), 1).unwrap(); - } + 1 + }; + ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), flag).unwrap(); ptrace::setregs(pid, new_regs).unwrap(); From 733284fc8788d118fa7cdc6d645652ac54fddd97 Mon Sep 17 00:00:00 2001 From: Nia Espera Date: Fri, 20 Jun 2025 22:38:06 +0200 Subject: [PATCH 16/38] fmt --- src/tools/miri/src/alloc/isolated_alloc.rs | 3 ++- src/tools/miri/src/shims/native_lib.rs | 4 +++- src/tools/miri/src/shims/trace/parent.rs | 6 +----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index 0b3218832b61f..ef3dd98233710 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -1,4 +1,5 @@ -use std::{alloc::Layout, ptr::NonNull}; +use std::alloc::Layout; +use std::ptr::NonNull; use nix::sys::mman; use rustc_index::bit_set::DenseBitSet; diff --git a/src/tools/miri/src/shims/native_lib.rs b/src/tools/miri/src/shims/native_lib.rs index 033e4f3671620..3e455e17386cf 100644 --- a/src/tools/miri/src/shims/native_lib.rs +++ b/src/tools/miri/src/shims/native_lib.rs @@ -232,7 +232,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let (ret, maybe_memevents) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; - if cfg!(target_os = "linux") && let Some(events) = maybe_memevents { + if cfg!(target_os = "linux") + && let Some(events) = maybe_memevents + { trace!("Registered FFI events:\n{events:#0x?}"); } diff --git a/src/tools/miri/src/shims/trace/parent.rs b/src/tools/miri/src/shims/trace/parent.rs index 84ea6a10061dc..cb9a9fc8da5b8 100644 --- a/src/tools/miri/src/shims/trace/parent.rs +++ b/src/tools/miri/src/shims/trace/parent.rs @@ -566,11 +566,7 @@ fn handle_segfault( // Check if we also own the next page, and if so unprotect it in case // the access spans the page boundary. - let flag = if ch_pages.contains(&page_addr.strict_add(page_size)) { - 2 - } else { - 1 - }; + let flag = if ch_pages.contains(&page_addr.strict_add(page_size)) { 2 } else { 1 }; ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), flag).unwrap(); ptrace::setregs(pid, new_regs).unwrap(); From 6a1b7df4854b0023e34b18e2196d3069906da796 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Mon, 23 Jun 2025 13:45:56 +0000 Subject: [PATCH 17/38] Use a NonNull pointer --- library/core/src/panic/location.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/library/core/src/panic/location.rs b/library/core/src/panic/location.rs index 1e950d6e4988b..972270208852a 100644 --- a/library/core/src/panic/location.rs +++ b/library/core/src/panic/location.rs @@ -1,6 +1,7 @@ use crate::ffi::CStr; use crate::fmt; use crate::marker::PhantomData; +use crate::ptr::NonNull; /// A struct containing information about the location of a panic. /// @@ -37,7 +38,7 @@ pub struct Location<'a> { // A raw pointer is used rather than a reference because the pointer is valid for one more byte // than the length stored in this pointer; the additional byte is the NUL-terminator used by // `Location::file_with_nul`. - filename: *const str, + filename: NonNull, line: u32, col: u32, _filename: PhantomData<&'a str>, @@ -144,7 +145,7 @@ impl<'a> Location<'a> { #[rustc_const_stable(feature = "const_location_fields", since = "1.79.0")] pub const fn file(&self) -> &str { // SAFETY: The filename is valid. - unsafe { &*self.filename } + unsafe { self.filename.as_ref() } } /// Returns the name of the source file as a nul-terminated `CStr`. @@ -155,12 +156,14 @@ impl<'a> Location<'a> { #[unstable(feature = "file_with_nul", issue = "141727")] #[inline] pub const fn file_with_nul(&self) -> &CStr { + let filename = self.filename.as_ptr(); + // SAFETY: The filename is valid for `filename_len+1` bytes, so this addition can't // overflow. - let cstr_len = unsafe { crate::mem::size_of_val_raw(self.filename).unchecked_add(1) }; + let cstr_len = unsafe { crate::mem::size_of_val_raw(filename).unchecked_add(1) }; // SAFETY: The filename is valid for `filename_len+1` bytes. - let slice = unsafe { crate::slice::from_raw_parts(self.filename as *const _, cstr_len) }; + let slice = unsafe { crate::slice::from_raw_parts(filename.cast(), cstr_len) }; // SAFETY: The filename is guaranteed to have a trailing nul byte and no interior nul bytes. unsafe { CStr::from_bytes_with_nul_unchecked(slice) } From e0f8e865dbf9f1dc9d1d917efc04320e2eb123f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Tue, 24 Jun 2025 15:51:08 +0200 Subject: [PATCH 18/38] Skip unnecessary components in x64 try builds --- src/tools/opt-dist/src/exec.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/tools/opt-dist/src/exec.rs b/src/tools/opt-dist/src/exec.rs index 64ce5cc377522..75569eacacdea 100644 --- a/src/tools/opt-dist/src/exec.rs +++ b/src/tools/opt-dist/src/exec.rs @@ -113,7 +113,7 @@ impl Bootstrap { "library/std", ]) .env("RUST_BACKTRACE", "full"); - let cmd = add_shared_x_flags(env, cmd); + let mut cmd = add_shared_x_flags(env, cmd); Self { cmd, metrics_path } } @@ -122,7 +122,12 @@ impl Bootstrap { let metrics_path = env.build_root().join("build").join("metrics.json"); let args = dist_args.iter().map(|arg| arg.as_str()).collect::>(); let cmd = cmd(&args).env("RUST_BACKTRACE", "full"); - let cmd = add_shared_x_flags(env, cmd); + let mut cmd = add_shared_x_flags(env, cmd); + if env.is_fast_try_build() { + // We set build.extended=false for fast try builds, but we still need Cargo + cmd = cmd.arg("cargo"); + } + Self { cmd, metrics_path } } @@ -188,6 +193,19 @@ impl Bootstrap { } } -fn add_shared_x_flags(env: &Environment, cmd: CmdBuilder) -> CmdBuilder { - if env.is_fast_try_build() { cmd.arg("--set").arg("rust.deny-warnings=false") } else { cmd } +fn add_shared_x_flags(env: &Environment, mut cmd: CmdBuilder) -> CmdBuilder { + if env.is_fast_try_build() { + // Skip things that cannot be skipped through `x ... --skip` + cmd.arg("--set") + .arg("rust.llvm-bitcode-linker=false") + // Skip wasm-component-ld. This also skips cargo, which we need to re-enable for dist + .arg("--set") + .arg("build.extended=false") + .arg("--set") + .arg("rust.codegen-backends=['llvm']") + .arg("--set") + .arg("rust.deny-warnings=false") + } else { + cmd + } } From 25dee4e56e0ad075044e7e465ae513086c78f45a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Tue, 24 Jun 2025 15:57:25 +0200 Subject: [PATCH 19/38] Skip more dist components --- src/tools/opt-dist/src/main.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tools/opt-dist/src/main.rs b/src/tools/opt-dist/src/main.rs index d2827ec01ca7d..9c8a6637a3b10 100644 --- a/src/tools/opt-dist/src/main.rs +++ b/src/tools/opt-dist/src/main.rs @@ -407,13 +407,18 @@ fn main() -> anyhow::Result<()> { for target in [ "rust-docs", "rustc-docs", + "rustc-dev", + "rust-dev", "rust-docs-json", "rust-analyzer", "rustc-src", + "extended", "clippy", "miri", "rustfmt", "gcc", + "generate-copyright", + "bootstrap", ] { build_args.extend(["--skip".to_string(), target.to_string()]); } From 803415478cfc69d9e6c02b313560d6a2e433e0f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Tue, 24 Jun 2025 16:04:56 +0200 Subject: [PATCH 20/38] Do not build GCC in fast try builds --- src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh b/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh index 064ac5b0a5e42..924bdbc761505 100755 --- a/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh +++ b/src/ci/docker/host-x86_64/dist-x86_64-linux/dist.sh @@ -10,4 +10,7 @@ python3 ../x.py build --set rust.debug=true opt-dist build-manifest bootstrap # Use GCC for building GCC, as it seems to behave badly when built with Clang -CC=/rustroot/bin/cc CXX=/rustroot/bin/c++ python3 ../x.py dist gcc +# Only build GCC on full builds, not try builds +if [ "${DIST_TRY_BUILD:-0}" == "0" ]; then + CC=/rustroot/bin/cc CXX=/rustroot/bin/c++ python3 ../x.py dist gcc +fi From 58bc1dcb73b75d8fd0117d76e9a9f61ff297cd4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Wed, 25 Jun 2025 16:24:30 +0200 Subject: [PATCH 21/38] Remove `mut` --- src/tools/opt-dist/src/exec.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/opt-dist/src/exec.rs b/src/tools/opt-dist/src/exec.rs index 75569eacacdea..0dc6e56b9d5f4 100644 --- a/src/tools/opt-dist/src/exec.rs +++ b/src/tools/opt-dist/src/exec.rs @@ -113,7 +113,7 @@ impl Bootstrap { "library/std", ]) .env("RUST_BACKTRACE", "full"); - let mut cmd = add_shared_x_flags(env, cmd); + let cmd = add_shared_x_flags(env, cmd); Self { cmd, metrics_path } } @@ -193,7 +193,7 @@ impl Bootstrap { } } -fn add_shared_x_flags(env: &Environment, mut cmd: CmdBuilder) -> CmdBuilder { +fn add_shared_x_flags(env: &Environment, cmd: CmdBuilder) -> CmdBuilder { if env.is_fast_try_build() { // Skip things that cannot be skipped through `x ... --skip` cmd.arg("--set") From 508021aa4320f1feb4c7a783ee16c2feebc9249a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= Date: Thu, 26 Jun 2025 01:31:12 +0200 Subject: [PATCH 22/38] Add windows-gnullvm hosts to the manifest --- src/tools/build-manifest/src/main.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs index 4c53ea42793cd..0520eff0fa277 100644 --- a/src/tools/build-manifest/src/main.rs +++ b/src/tools/build-manifest/src/main.rs @@ -14,6 +14,7 @@ use crate::versions::{PkgType, Versions}; static HOSTS: &[&str] = &[ "aarch64-apple-darwin", + "aarch64-pc-windows-gnullvm", "aarch64-pc-windows-msvc", "aarch64-unknown-linux-gnu", "aarch64-unknown-linux-musl", @@ -44,6 +45,7 @@ static HOSTS: &[&str] = &[ "x86_64-apple-darwin", "x86_64-pc-solaris", "x86_64-pc-windows-gnu", + "x86_64-pc-windows-gnullvm", "x86_64-pc-windows-msvc", "x86_64-unknown-freebsd", "x86_64-unknown-illumos", @@ -470,7 +472,7 @@ impl Builder { } // so is rust-mingw if it's available for the target PkgType::RustMingw => { - if host.contains("pc-windows-gnu") { + if host.ends_with("pc-windows-gnu") { components.push(host_component(pkg)); } } From bfa933a46aca8a9aaa704227656f43ab163a5d44 Mon Sep 17 00:00:00 2001 From: Patrick-6 Date: Thu, 19 Jun 2025 17:17:29 +0200 Subject: [PATCH 23/38] Delay writing of return value of a join until after the join is successful. --- src/tools/miri/src/concurrency/thread.rs | 182 +++++++++--------- .../miri/src/shims/unix/foreign_items.rs | 3 +- src/tools/miri/src/shims/unix/thread.rs | 14 +- .../miri/src/shims/windows/foreign_items.rs | 3 +- src/tools/miri/src/shims/windows/thread.rs | 13 +- 5 files changed, 111 insertions(+), 104 deletions(-) diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs index 38b5d4c0f06ec..c8a408fd8ace8 100644 --- a/src/tools/miri/src/concurrency/thread.rs +++ b/src/tools/miri/src/concurrency/thread.rs @@ -582,88 +582,6 @@ impl<'tcx> ThreadManager<'tcx> { interp_ok(()) } - /// Mark that the active thread tries to join the thread with `joined_thread_id`. - fn join_thread( - &mut self, - joined_thread_id: ThreadId, - data_race_handler: &mut GlobalDataRaceHandler, - ) -> InterpResult<'tcx> { - if self.threads[joined_thread_id].join_status == ThreadJoinStatus::Detached { - // On Windows this corresponds to joining on a closed handle. - throw_ub_format!("trying to join a detached thread"); - } - - fn after_join<'tcx>( - threads: &mut ThreadManager<'_>, - joined_thread_id: ThreadId, - data_race_handler: &mut GlobalDataRaceHandler, - ) -> InterpResult<'tcx> { - match data_race_handler { - GlobalDataRaceHandler::None => {} - GlobalDataRaceHandler::Vclocks(data_race) => - data_race.thread_joined(threads, joined_thread_id), - GlobalDataRaceHandler::Genmc(genmc_ctx) => - genmc_ctx.handle_thread_join(threads.active_thread, joined_thread_id)?, - } - interp_ok(()) - } - - // Mark the joined thread as being joined so that we detect if other - // threads try to join it. - self.threads[joined_thread_id].join_status = ThreadJoinStatus::Joined; - if !self.threads[joined_thread_id].state.is_terminated() { - trace!( - "{:?} blocked on {:?} when trying to join", - self.active_thread, joined_thread_id - ); - // The joined thread is still running, we need to wait for it. - // Unce we get unblocked, perform the appropriate synchronization. - self.block_thread( - BlockReason::Join(joined_thread_id), - None, - callback!( - @capture<'tcx> { - joined_thread_id: ThreadId, - } - |this, unblock: UnblockKind| { - assert_eq!(unblock, UnblockKind::Ready); - after_join(&mut this.machine.threads, joined_thread_id, &mut this.machine.data_race) - } - ), - ); - } else { - // The thread has already terminated - establish happens-before - after_join(self, joined_thread_id, data_race_handler)?; - } - interp_ok(()) - } - - /// Mark that the active thread tries to exclusively join the thread with `joined_thread_id`. - /// If the thread is already joined by another thread, it will throw UB - fn join_thread_exclusive( - &mut self, - joined_thread_id: ThreadId, - data_race_handler: &mut GlobalDataRaceHandler, - ) -> InterpResult<'tcx> { - if self.threads[joined_thread_id].join_status == ThreadJoinStatus::Joined { - throw_ub_format!("trying to join an already joined thread"); - } - - if joined_thread_id == self.active_thread { - throw_ub_format!("trying to join itself"); - } - - // Sanity check `join_status`. - assert!( - self.threads - .iter() - .all(|thread| { !thread.state.is_blocked_on(BlockReason::Join(joined_thread_id)) }), - "this thread already has threads waiting for its termination" - ); - - self.join_thread(joined_thread_id, data_race_handler) - } - /// Set the name of the given thread. pub fn set_thread_name(&mut self, thread: ThreadId, new_thread_name: Vec) { self.threads[thread].thread_name = Some(new_thread_name); @@ -1114,20 +1032,102 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this.machine.threads.detach_thread(thread_id, allow_terminated_joined) } - #[inline] - fn join_thread(&mut self, joined_thread_id: ThreadId) -> InterpResult<'tcx> { + /// Mark that the active thread tries to join the thread with `joined_thread_id`. + /// + /// When the join is successful (immediately, or as soon as the joined thread finishes), `success_retval` will be written to `return_dest`. + fn join_thread( + &mut self, + joined_thread_id: ThreadId, + success_retval: Scalar, + return_dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); - this.machine.threads.join_thread(joined_thread_id, &mut this.machine.data_race)?; + let thread_mgr = &mut this.machine.threads; + if thread_mgr.threads[joined_thread_id].join_status == ThreadJoinStatus::Detached { + // On Windows this corresponds to joining on a closed handle. + throw_ub_format!("trying to join a detached thread"); + } + + fn after_join<'tcx>( + this: &mut InterpCx<'tcx, MiriMachine<'tcx>>, + joined_thread_id: ThreadId, + success_retval: Scalar, + return_dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { + let threads = &this.machine.threads; + match &mut this.machine.data_race { + GlobalDataRaceHandler::None => {} + GlobalDataRaceHandler::Vclocks(data_race) => + data_race.thread_joined(threads, joined_thread_id), + GlobalDataRaceHandler::Genmc(genmc_ctx) => + genmc_ctx.handle_thread_join(threads.active_thread, joined_thread_id)?, + } + this.write_scalar(success_retval, return_dest)?; + interp_ok(()) + } + + // Mark the joined thread as being joined so that we detect if other + // threads try to join it. + thread_mgr.threads[joined_thread_id].join_status = ThreadJoinStatus::Joined; + if !thread_mgr.threads[joined_thread_id].state.is_terminated() { + trace!( + "{:?} blocked on {:?} when trying to join", + thread_mgr.active_thread, joined_thread_id + ); + // The joined thread is still running, we need to wait for it. + // Once we get unblocked, perform the appropriate synchronization and write the return value. + let dest = return_dest.clone(); + thread_mgr.block_thread( + BlockReason::Join(joined_thread_id), + None, + callback!( + @capture<'tcx> { + joined_thread_id: ThreadId, + dest: MPlaceTy<'tcx>, + success_retval: Scalar, + } + |this, unblock: UnblockKind| { + assert_eq!(unblock, UnblockKind::Ready); + after_join(this, joined_thread_id, success_retval, &dest) + } + ), + ); + } else { + // The thread has already terminated - establish happens-before and write the return value. + after_join(this, joined_thread_id, success_retval, return_dest)?; + } interp_ok(()) } - #[inline] - fn join_thread_exclusive(&mut self, joined_thread_id: ThreadId) -> InterpResult<'tcx> { + /// Mark that the active thread tries to exclusively join the thread with `joined_thread_id`. + /// If the thread is already joined by another thread, it will throw UB. + /// + /// When the join is successful (immediately, or as soon as the joined thread finishes), `success_retval` will be written to `return_dest`. + fn join_thread_exclusive( + &mut self, + joined_thread_id: ThreadId, + success_retval: Scalar, + return_dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); - this.machine - .threads - .join_thread_exclusive(joined_thread_id, &mut this.machine.data_race)?; - interp_ok(()) + let threads = &this.machine.threads.threads; + if threads[joined_thread_id].join_status == ThreadJoinStatus::Joined { + throw_ub_format!("trying to join an already joined thread"); + } + + if joined_thread_id == this.machine.threads.active_thread { + throw_ub_format!("trying to join itself"); + } + + // Sanity check `join_status`. + assert!( + threads + .iter() + .all(|thread| { !thread.state.is_blocked_on(BlockReason::Join(joined_thread_id)) }), + "this thread already has threads waiting for its termination" + ); + + this.join_thread(joined_thread_id, success_retval, return_dest) } #[inline] diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs index 9106ef94c435b..f34b95e730b11 100644 --- a/src/tools/miri/src/shims/unix/foreign_items.rs +++ b/src/tools/miri/src/shims/unix/foreign_items.rs @@ -946,8 +946,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } "pthread_join" => { let [thread, retval] = this.check_shim(abi, CanonAbi::C, link_name, args)?; - let res = this.pthread_join(thread, retval)?; - this.write_scalar(res, dest)?; + this.pthread_join(thread, retval, dest)?; } "pthread_detach" => { let [thread] = this.check_shim(abi, CanonAbi::C, link_name, args)?; diff --git a/src/tools/miri/src/shims/unix/thread.rs b/src/tools/miri/src/shims/unix/thread.rs index 4b6615b3ea82c..a438e71a41d91 100644 --- a/src/tools/miri/src/shims/unix/thread.rs +++ b/src/tools/miri/src/shims/unix/thread.rs @@ -41,7 +41,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { &mut self, thread: &OpTy<'tcx>, retval: &OpTy<'tcx>, - ) -> InterpResult<'tcx, Scalar> { + return_dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); if !this.ptr_is_null(this.read_pointer(retval)?)? { @@ -51,12 +52,15 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let thread = this.read_scalar(thread)?.to_int(this.libc_ty_layout("pthread_t").size)?; let Ok(thread) = this.thread_id_try_from(thread) else { - return interp_ok(this.eval_libc("ESRCH")); + this.write_scalar(this.eval_libc("ESRCH"), return_dest)?; + return interp_ok(()); }; - this.join_thread_exclusive(thread)?; - - interp_ok(Scalar::from_u32(0)) + this.join_thread_exclusive( + thread, + /* success_retval */ Scalar::from_u32(0), + return_dest, + ) } fn pthread_detach(&mut self, thread: &OpTy<'tcx>) -> InterpResult<'tcx, Scalar> { diff --git a/src/tools/miri/src/shims/windows/foreign_items.rs b/src/tools/miri/src/shims/windows/foreign_items.rs index 10f6df67ad47a..de10357f5faee 100644 --- a/src/tools/miri/src/shims/windows/foreign_items.rs +++ b/src/tools/miri/src/shims/windows/foreign_items.rs @@ -573,8 +573,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { "WaitForSingleObject" => { let [handle, timeout] = this.check_shim(abi, sys_conv, link_name, args)?; - let ret = this.WaitForSingleObject(handle, timeout)?; - this.write_scalar(ret, dest)?; + this.WaitForSingleObject(handle, timeout, dest)?; } "GetCurrentProcess" => { let [] = this.check_shim(abi, sys_conv, link_name, args)?; diff --git a/src/tools/miri/src/shims/windows/thread.rs b/src/tools/miri/src/shims/windows/thread.rs index d5f9ed4e968ea..981742391b98e 100644 --- a/src/tools/miri/src/shims/windows/thread.rs +++ b/src/tools/miri/src/shims/windows/thread.rs @@ -59,13 +59,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { &mut self, handle_op: &OpTy<'tcx>, timeout_op: &OpTy<'tcx>, - ) -> InterpResult<'tcx, Scalar> { + return_dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); let handle = this.read_handle(handle_op, "WaitForSingleObject")?; let timeout = this.read_scalar(timeout_op)?.to_u32()?; - let thread = match handle { + let joined_thread_id = match handle { Handle::Thread(thread) => thread, // Unlike on posix, the outcome of joining the current thread is not documented. // On current Windows, it just deadlocks. @@ -77,8 +78,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { throw_unsup_format!("`WaitForSingleObject` with non-infinite timeout"); } - this.join_thread(thread)?; + this.join_thread( + joined_thread_id, + /* success_retval */ this.eval_windows("c", "WAIT_OBJECT_0"), + return_dest, + )?; - interp_ok(this.eval_windows("c", "WAIT_OBJECT_0")) + interp_ok(()) } } From b2be01c81567e89142577f9474a810e3a8676cfc Mon Sep 17 00:00:00 2001 From: binarycat Date: Tue, 24 Jun 2025 17:47:24 -0500 Subject: [PATCH 24/38] rustdoc: show attributes on enum variants mostly for #[non_exhaustive] --- src/librustdoc/html/render/print_item.rs | 1 + .../rustdoc/enum/enum-variant-non_exhaustive.rs | 17 +++++++++++++++++ ...-variant-non_exhaustive.type-alias-code.html | 4 ++++ .../enum-variant-non_exhaustive.type-code.html | 4 ++++ 4 files changed, 26 insertions(+) create mode 100644 tests/rustdoc/enum/enum-variant-non_exhaustive.rs create mode 100644 tests/rustdoc/enum/enum-variant-non_exhaustive.type-alias-code.html create mode 100644 tests/rustdoc/enum/enum-variant-non_exhaustive.type-code.html diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs index 515424cbef19b..5456a09dce72c 100644 --- a/src/librustdoc/html/render/print_item.rs +++ b/src/librustdoc/html/render/print_item.rs @@ -1702,6 +1702,7 @@ fn render_enum_fields( if v.is_stripped() { continue; } + write!(w, "{}", render_attributes_in_pre(v, TAB, cx))?; w.write_str(TAB)?; match v.kind { clean::VariantItem(ref var) => match var.kind { diff --git a/tests/rustdoc/enum/enum-variant-non_exhaustive.rs b/tests/rustdoc/enum/enum-variant-non_exhaustive.rs new file mode 100644 index 0000000000000..ea0234a49f640 --- /dev/null +++ b/tests/rustdoc/enum/enum-variant-non_exhaustive.rs @@ -0,0 +1,17 @@ +// regression test for https://github.com/rust-lang/rust/issues/142599 + +#![crate_name = "foo"] + +//@ snapshot type-code 'foo/enum.Type.html' '//pre[@class="rust item-decl"]/code' +pub enum Type { + #[non_exhaustive] + // attribute that should not be shown + #[warn(unsafe_code)] + Variant, +} + +// we would love to use the `following-sibling::` axis +// (along with an `h2[@id="aliased-type"]` query), +// but unfortunately python doesn't implement that. +//@ snapshot type-alias-code 'foo/type.TypeAlias.html' '//pre[@class="rust item-decl"][2]/code' +pub type TypeAlias = Type; diff --git a/tests/rustdoc/enum/enum-variant-non_exhaustive.type-alias-code.html b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-alias-code.html new file mode 100644 index 0000000000000..04eea709079d7 --- /dev/null +++ b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-alias-code.html @@ -0,0 +1,4 @@ +pub enum TypeAlias { + #[non_exhaustive] + Variant, +} \ No newline at end of file diff --git a/tests/rustdoc/enum/enum-variant-non_exhaustive.type-code.html b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-code.html new file mode 100644 index 0000000000000..6c8851ea5df47 --- /dev/null +++ b/tests/rustdoc/enum/enum-variant-non_exhaustive.type-code.html @@ -0,0 +1,4 @@ +pub enum Type { + #[non_exhaustive] + Variant, +} \ No newline at end of file From 7a79454de465774f8445a88640ffdc5792fc8a13 Mon Sep 17 00:00:00 2001 From: Deadbeef Date: Fri, 27 Jun 2025 18:24:42 +0800 Subject: [PATCH 25/38] update internal `send_signal` comment --- library/std/src/sys/process/unix/unix.rs | 4 ++-- library/std/src/sys/process/unix/vxworks.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/library/std/src/sys/process/unix/unix.rs b/library/std/src/sys/process/unix/unix.rs index 1fe80c13b81c8..bbd03e2b0c4e9 100644 --- a/library/std/src/sys/process/unix/unix.rs +++ b/library/std/src/sys/process/unix/unix.rs @@ -968,8 +968,8 @@ impl Process { } pub(crate) fn send_signal(&self, signal: i32) -> io::Result<()> { - // If we've already waited on this process then the pid can be recycled - // and used for another process, and we probably shouldn't be signaling + // If we've already waited on this process then the pid can be recycled and + // used for another process, and we probably shouldn't be sending signals to // random processes, so return Ok because the process has exited already. if self.status.is_some() { return Ok(()); diff --git a/library/std/src/sys/process/unix/vxworks.rs b/library/std/src/sys/process/unix/vxworks.rs index 51ae8c56bdb9b..2275cbb946a9c 100644 --- a/library/std/src/sys/process/unix/vxworks.rs +++ b/library/std/src/sys/process/unix/vxworks.rs @@ -151,8 +151,8 @@ impl Process { } pub fn send_signal(&self, signal: i32) -> io::Result<()> { - // If we've already waited on this process then the pid can be recycled - // and used for another process, and we probably shouldn't be killing + // If we've already waited on this process then the pid can be recycled and + // used for another process, and we probably shouldn't be sending signals to // random processes, so return Ok because the process has exited already. if self.status.is_some() { Ok(()) From cd1713ebba83a87e80a7366fdfe6fc28cabe6053 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Fri, 27 Jun 2025 11:01:59 -0700 Subject: [PATCH 26/38] BTreeSet: remove duplicated code by reusing `from_sorted_iter` The method `BTreeSet::from_sorted_iter` was introduced in 49ccb7519f55bd117d2ab50b7a030637f380aec6, but it was not consistently used throughout the codebase. As a result, some code redundantly reimplemented its logic. This commit fixes the problem. --- library/alloc/src/collections/btree/set.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/library/alloc/src/collections/btree/set.rs b/library/alloc/src/collections/btree/set.rs index aa9e5fce1d4cc..d50ce02bda743 100644 --- a/library/alloc/src/collections/btree/set.rs +++ b/library/alloc/src/collections/btree/set.rs @@ -1517,9 +1517,7 @@ impl From<[T; N]> for BTreeSet { // use stable sort to preserve the insertion order. arr.sort(); - let iter = IntoIterator::into_iter(arr).map(|k| (k, SetValZST::default())); - let map = BTreeMap::bulk_build_from_sorted_iter(iter, Global); - BTreeSet { map } + BTreeSet::from_sorted_iter(IntoIterator::into_iter(arr), Global) } } From 9090199f49bf681aaf68322b3beedc37acea7b5f Mon Sep 17 00:00:00 2001 From: leopardracer <136604165+leopardracer@users.noreply.github.com> Date: Fri, 27 Jun 2025 22:38:52 +0300 Subject: [PATCH 27/38] Update ui.rs --- src/tools/test-float-parse/src/ui.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/test-float-parse/src/ui.rs b/src/tools/test-float-parse/src/ui.rs index 73473eef0bfdb..1a9ba0dc1d99c 100644 --- a/src/tools/test-float-parse/src/ui.rs +++ b/src/tools/test-float-parse/src/ui.rs @@ -118,7 +118,7 @@ pub fn finish_all(tests: &[TestInfo], total_elapsed: Duration, cfg: &Config) -> match result { Ok(FinishedAll) => (), Err(EarlyExit::Timeout) => { - println!(" exited early; exceded {:?} timeout", cfg.timeout) + println!(" exited early; exceeded {:?} timeout", cfg.timeout) } Err(EarlyExit::MaxFailures) => { println!(" exited early; exceeded {:?} max failures", cfg.max_failures) From 3a34dac6c36f4e725d7e16022d12c9f58ebc6c2a Mon Sep 17 00:00:00 2001 From: leopardracer <136604165+leopardracer@users.noreply.github.com> Date: Fri, 27 Jun 2025 22:39:12 +0300 Subject: [PATCH 28/38] Update README.md --- src/tools/miri/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md index 5554c7975ff3b..3031d2c744a34 100644 --- a/src/tools/miri/README.md +++ b/src/tools/miri/README.md @@ -592,7 +592,7 @@ Definite bugs found: * [Occasional memory leak in `std::mpsc` channels](https://github.com/rust-lang/rust/issues/121582) (original code in [crossbeam](https://github.com/crossbeam-rs/crossbeam/pull/1084)) * [Weak-memory-induced memory leak in Windows thread-local storage](https://github.com/rust-lang/rust/pull/124281) * [A bug in the new `RwLock::downgrade` implementation](https://rust-lang.zulipchat.com/#narrow/channel/269128-miri/topic/Miri.20error.20library.20test) (caught by Miri before it landed in the Rust repo) -* [Mockall reading unintialized memory when mocking `std::io::Read::read`, even if all expectations are satisfied](https://github.com/asomers/mockall/issues/647) (caught by Miri running Tokio's test suite) +* [Mockall reading uninitialized memory when mocking `std::io::Read::read`, even if all expectations are satisfied](https://github.com/asomers/mockall/issues/647) (caught by Miri running Tokio's test suite) * [`ReentrantLock` not correctly dealing with reuse of addresses for TLS storage of different threads](https://github.com/rust-lang/rust/pull/141248) Violations of [Stacked Borrows] found that are likely bugs (but Stacked Borrows is currently just an experiment): From 39092cc10b1a9c9395b5876ad40999e1a1eda585 Mon Sep 17 00:00:00 2001 From: leopardracer <136604165+leopardracer@users.noreply.github.com> Date: Fri, 27 Jun 2025 22:39:40 +0300 Subject: [PATCH 29/38] Update dangling_pointer_to_raw_pointer.rs --- .../fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs b/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs index c63e926376d45..9646cdb206591 100644 --- a/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs +++ b/src/tools/miri/tests/fail/dangling_pointers/dangling_pointer_to_raw_pointer.rs @@ -6,7 +6,7 @@ fn direct_raw(x: *const (i32, i32)) -> *const i32 { // Ensure that if a raw pointer is created via an intermediate // reference, we catch that. (Just in case someone decides to -// desugar this differenly or so.) +// desugar this differently or so.) fn via_ref(x: *const (i32, i32)) -> *const i32 { unsafe { &(*x).0 as *const i32 } //~ERROR: dangling pointer } From 34949e30d30e1751a070606f87cb0cc8bd478625 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Sat, 28 Jun 2025 04:55:04 +0000 Subject: [PATCH 30/38] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 2ef8d717d548b..b2ab17fd76ad0 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -255aa220821c05c3eac7605fce4ea1c9ab2cbdb4 +d41e12f1f4e4884c356f319b881921aa37040de5 From cff5a7cb2974ca7c98073b0b3be263afc4759dbc Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Sat, 28 Jun 2025 05:03:27 +0000 Subject: [PATCH 31/38] fmt --- src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index 99171b0349ed3..a0761cb07a1da 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -468,10 +468,8 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // - when `extern type` is involved we use the size of the known prefix, // - if the pointer is not reborrowed (raw pointer) then we override the size // to do a zero-length reborrow. - let reborrow_size = this - .size_and_align_of_val(place)? - .map(|(size, _)| size) - .unwrap_or(place.layout.size); + let reborrow_size = + this.size_and_align_of_val(place)?.map(|(size, _)| size).unwrap_or(place.layout.size); trace!("Creating new permission: {:?} with size {:?}", new_perm, reborrow_size); // This new tag is not guaranteed to actually be used. From 35e6def4879ff3b30862d827665ce9da052ed730 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 28 Jun 2025 09:12:51 +0200 Subject: [PATCH 32/38] clippy --- src/tools/miri/src/alloc/isolated_alloc.rs | 2 +- src/tools/miri/src/alloc_addresses/reuse_pool.rs | 2 +- src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs | 2 +- src/tools/miri/src/machine.rs | 4 ++-- src/tools/miri/src/shims/unix/linux/mem.rs | 3 +-- src/tools/miri/src/shims/unix/mem.rs | 3 +-- src/tools/miri/src/shims/unix/sync.rs | 2 +- 7 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index ef3dd98233710..c3ea7270e8a6b 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -189,7 +189,7 @@ impl IsolatedAlloc { }; assert_ne!(page_ptr.addr(), usize::MAX, "mmap failed"); // `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page. - assert!(self.page_size % COMPRESSION_FACTOR == 0); + assert!(self.page_size.is_multiple_of(COMPRESSION_FACTOR)); self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR)); self.page_ptrs.push(NonNull::new(page_ptr).unwrap()); (NonNull::new(page_ptr).unwrap(), self.page_infos.last_mut().unwrap()) diff --git a/src/tools/miri/src/alloc_addresses/reuse_pool.rs b/src/tools/miri/src/alloc_addresses/reuse_pool.rs index ab6aaed5e3e1c..b6cc017f77230 100644 --- a/src/tools/miri/src/alloc_addresses/reuse_pool.rs +++ b/src/tools/miri/src/alloc_addresses/reuse_pool.rs @@ -129,7 +129,7 @@ impl ReusePool { let idx = rng.random_range(begin..end); // Remove it from the pool and return. let (chosen_addr, chosen_size, chosen_thread, clock) = subpool.remove(idx); - debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0); + debug_assert!(chosen_size >= size && chosen_addr.is_multiple_of(align.bytes())); debug_assert!(cross_thread_reuse || chosen_thread == thread); // No synchronization needed if we reused from the current thread. Some((chosen_addr, if chosen_thread == thread { None } else { Some(clock) })) diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs index dcd5a6cb02305..ad0a565dfd857 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/unimap.rs @@ -327,7 +327,7 @@ mod tests { for i in 0..1000 { i.hash(&mut hasher); let rng = hasher.finish(); - let op = rng % 3 == 0; + let op = rng.is_multiple_of(3); let key = (rng / 2) % 50; let val = (rng / 100) % 1000; if op { diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index b4d7db34efa7b..3a748c4c68726 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1056,7 +1056,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { // What's the offset between us and the promised alignment? let distance = offset.bytes().wrapping_sub(promised_offset.bytes()); // That must also be aligned. - if distance % align.bytes() == 0 { + if distance.is_multiple_of(align.bytes()) { // All looking good! None } else { @@ -1612,7 +1612,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { ecx.machine.since_gc += 1; // Possibly report our progress. This will point at the terminator we are about to execute. if let Some(report_progress) = ecx.machine.report_progress { - if ecx.machine.basic_block_count % u64::from(report_progress) == 0 { + if ecx.machine.basic_block_count.is_multiple_of(u64::from(report_progress)) { ecx.emit_diagnostic(NonHaltingDiagnostic::ProgressReport { block_count: ecx.machine.basic_block_count, }); diff --git a/src/tools/miri/src/shims/unix/linux/mem.rs b/src/tools/miri/src/shims/unix/linux/mem.rs index 8e5a3021b1c03..47732f811f1a3 100644 --- a/src/tools/miri/src/shims/unix/linux/mem.rs +++ b/src/tools/miri/src/shims/unix/linux/mem.rs @@ -22,8 +22,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let flags = this.read_scalar(flags)?.to_i32()?; // old_address must be a multiple of the page size - #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero - if old_address.addr().bytes() % this.machine.page_size != 0 || new_size == 0 { + if !old_address.addr().bytes().is_multiple_of(this.machine.page_size) || new_size == 0 { this.set_last_error(LibcError("EINVAL"))?; return interp_ok(this.eval_libc("MAP_FAILED")); } diff --git a/src/tools/miri/src/shims/unix/mem.rs b/src/tools/miri/src/shims/unix/mem.rs index aefeee6f7a3a3..4bbbbc69c08a3 100644 --- a/src/tools/miri/src/shims/unix/mem.rs +++ b/src/tools/miri/src/shims/unix/mem.rs @@ -130,8 +130,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // addr must be a multiple of the page size, but apart from that munmap is just implemented // as a dealloc. - #[expect(clippy::arithmetic_side_effects)] // PAGE_SIZE is nonzero - if addr.addr().bytes() % this.machine.page_size != 0 { + if !addr.addr().bytes().is_multiple_of(this.machine.page_size) { return this.set_last_error_and_return_i32(LibcError("EINVAL")); } diff --git a/src/tools/miri/src/shims/unix/sync.rs b/src/tools/miri/src/shims/unix/sync.rs index eee2bbcb903da..50eb4d922891b 100644 --- a/src/tools/miri/src/shims/unix/sync.rs +++ b/src/tools/miri/src/shims/unix/sync.rs @@ -17,7 +17,7 @@ fn bytewise_equal_atomic_relaxed<'tcx>( // We do this in chunks of 4, so that we are okay to race with (sufficiently aligned) // 4-byte atomic accesses. - assert!(size.bytes() % 4 == 0); + assert!(size.bytes().is_multiple_of(4)); for i in 0..(size.bytes() / 4) { let offset = Size::from_bytes(i.strict_mul(4)); let load = |place: &MPlaceTy<'tcx>| { From 00b64f8851e9f56239a48e4306efd597d7be972c Mon Sep 17 00:00:00 2001 From: Yotam Ofek Date: Fri, 27 Jun 2025 17:58:36 +0000 Subject: [PATCH 33/38] Use tidy to sort `sym::*` items --- compiler/rustc_macros/src/symbols.rs | 12 -- compiler/rustc_macros/src/symbols/tests.rs | 15 --- compiler/rustc_span/src/symbol.rs | 131 ++++++++++----------- 3 files changed, 64 insertions(+), 94 deletions(-) diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 2b00b7dd27aaf..78a4d47ca3346 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -190,17 +190,6 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { let mut symbols_stream = quote! {}; let mut prefill_stream = quote! {}; let mut entries = Entries::with_capacity(input.keywords.len() + input.symbols.len() + 10); - let mut prev_key: Option<(Span, String)> = None; - - let mut check_order = |span: Span, s: &str, errors: &mut Errors| { - if let Some((prev_span, ref prev_str)) = prev_key { - if s < prev_str { - errors.error(span, format!("Symbol `{s}` must precede `{prev_str}`")); - errors.error(prev_span, format!("location of previous symbol `{prev_str}`")); - } - } - prev_key = Some((span, s.to_string())); - }; // Generate the listed keywords. for keyword in input.keywords.iter() { @@ -219,7 +208,6 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { // Generate the listed symbols. for symbol in input.symbols.iter() { let name = &symbol.name; - check_order(symbol.name.span(), &name.to_string(), &mut errors); let value = match &symbol.value { Value::SameAsName => name.to_string(), diff --git a/compiler/rustc_macros/src/symbols/tests.rs b/compiler/rustc_macros/src/symbols/tests.rs index 9c53453df5b54..f0a7a2106be49 100644 --- a/compiler/rustc_macros/src/symbols/tests.rs +++ b/compiler/rustc_macros/src/symbols/tests.rs @@ -84,18 +84,3 @@ fn check_dup_symbol_and_keyword() { }; test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]); } - -#[test] -fn check_symbol_order() { - let input = quote! { - Keywords {} - Symbols { - zebra, - aardvark, - } - }; - test_symbols_macro( - input, - &["Symbol `aardvark` must precede `zebra`", "location of previous symbol `zebra`"], - ); -} diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 4be7c7ba51093..c5bc8e85009b6 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -150,14 +150,10 @@ symbols! { // As well as the symbols listed, there are symbols for the strings // "0", "1", ..., "9", which are accessible via `sym::integer`. // - // The proc macro will abort if symbols are not in alphabetical order (as - // defined by `impl Ord for str`) or if any symbols are duplicated. Vim - // users can sort the list by selecting it and executing the command - // `:'<,'>!LC_ALL=C sort`. - // // There is currently no checking that all symbols are used; that would be // nice to have. Symbols { + // tidy-alphabetical-start Abi, AcqRel, Acquire, @@ -175,18 +171,18 @@ symbols! { AsyncGenPending, AsyncGenReady, AtomicBool, - AtomicI128, + AtomicI8, AtomicI16, AtomicI32, AtomicI64, - AtomicI8, + AtomicI128, AtomicIsize, AtomicPtr, - AtomicU128, + AtomicU8, AtomicU16, AtomicU32, AtomicU64, - AtomicU8, + AtomicU128, AtomicUsize, BTreeEntry, BTreeMap, @@ -607,10 +603,10 @@ symbols! { catch_unwind, cause, cdylib, - ceilf128, ceilf16, ceilf32, ceilf64, + ceilf128, cfg, cfg_accessible, cfg_attr, @@ -747,10 +743,10 @@ symbols! { copy, copy_closures, copy_nonoverlapping, - copysignf128, copysignf16, copysignf32, copysignf64, + copysignf128, core, core_panic, core_panic_2015_macro, @@ -763,10 +759,10 @@ symbols! { coroutine_state, coroutine_yield, coroutines, - cosf128, cosf16, cosf32, cosf64, + cosf128, count, coverage, coverage_attribute, @@ -874,8 +870,8 @@ symbols! { dotdot_in_tuple_patterns, dotdoteq_in_patterns, dreg, - dreg_low16, dreg_low8, + dreg_low16, drop, drop_in_place, drop_types_in_const, @@ -928,16 +924,16 @@ symbols! { exhaustive_integer_patterns, exhaustive_patterns, existential_type, - exp2f128, exp2f16, exp2f32, exp2f64, + exp2f128, expect, expected, - expf128, expf16, expf32, expf64, + expf128, explicit_extern_abis, explicit_generic_args_with_impl_trait, explicit_tail_calls, @@ -958,9 +954,6 @@ symbols! { external, external_doc, f, - f128, - f128_epsilon, - f128_nan, f16, f16_epsilon, f16_nan, @@ -999,10 +992,13 @@ symbols! { f64_legacy_const_neg_infinity, f64_legacy_const_radix, f64_nan, - fabsf128, + f128, + f128_epsilon, + f128_nan, fabsf16, fabsf32, fabsf64, + fabsf128, fadd_algebraic, fadd_fast, fake_variadic, @@ -1024,22 +1020,22 @@ symbols! { flags, float, float_to_int_unchecked, - floorf128, floorf16, floorf32, floorf64, - fmaf128, + floorf128, fmaf16, fmaf32, fmaf64, + fmaf128, fmt, fmt_debug, fmul_algebraic, fmul_fast, - fmuladdf128, fmuladdf16, fmuladdf32, fmuladdf64, + fmuladdf128, fn_align, fn_body, fn_delegation, @@ -1140,13 +1136,12 @@ symbols! { html_root_url, hwaddress, i, - i128, - i128_legacy_const_max, - i128_legacy_const_min, - i128_legacy_fn_max_value, - i128_legacy_fn_min_value, - i128_legacy_mod, - i128_type, + i8, + i8_legacy_const_max, + i8_legacy_const_min, + i8_legacy_fn_max_value, + i8_legacy_fn_min_value, + i8_legacy_mod, i16, i16_legacy_const_max, i16_legacy_const_min, @@ -1165,12 +1160,13 @@ symbols! { i64_legacy_fn_max_value, i64_legacy_fn_min_value, i64_legacy_mod, - i8, - i8_legacy_const_max, - i8_legacy_const_min, - i8_legacy_fn_max_value, - i8_legacy_fn_min_value, - i8_legacy_mod, + i128, + i128_legacy_const_max, + i128_legacy_const_min, + i128_legacy_fn_max_value, + i128_legacy_fn_min_value, + i128_legacy_mod, + i128_type, ident, if_let, if_let_guard, @@ -1292,19 +1288,19 @@ symbols! { loaded_from_disk, local, local_inner_macros, - log10f128, - log10f16, - log10f32, - log10f64, - log2f128, log2f16, log2f32, log2f64, + log2f128, + log10f16, + log10f32, + log10f64, + log10f128, log_syntax, - logf128, logf16, logf32, logf64, + logf128, loongarch_target_feature, loop_break_value, loop_match, @@ -1334,14 +1330,14 @@ symbols! { match_beginning_vert, match_default_bindings, matches_macro, - maximumf128, maximumf16, maximumf32, maximumf64, - maxnumf128, + maximumf128, maxnumf16, maxnumf32, maxnumf64, + maxnumf128, may_dangle, may_unwind, maybe_uninit, @@ -1372,14 +1368,14 @@ symbols! { min_generic_const_args, min_specialization, min_type_alias_impl_trait, - minimumf128, minimumf16, minimumf32, minimumf64, - minnumf128, + minimumf128, minnumf16, minnumf32, minnumf64, + minnumf128, mips_target_feature, mir_assume, mir_basic_block, @@ -1633,14 +1629,14 @@ symbols! { post_dash_lto: "post-lto", postfix_match, powerpc_target_feature, - powf128, powf16, powf32, powf64, - powif128, + powf128, powif16, powif32, powif64, + powif128, pre_dash_lto: "pre-lto", precise_capturing, precise_capturing_in_traits, @@ -1785,14 +1781,14 @@ symbols! { ropi_rwpi: "ropi-rwpi", rotate_left, rotate_right, - round_ties_even_f128, round_ties_even_f16, round_ties_even_f32, round_ties_even_f64, - roundf128, + round_ties_even_f128, roundf16, roundf32, roundf64, + roundf128, rt, rtm_target_feature, rust, @@ -1972,8 +1968,8 @@ symbols! { simd_fexp2, simd_ffi, simd_flog, - simd_flog10, simd_flog2, + simd_flog10, simd_floor, simd_fma, simd_fmax, @@ -2021,10 +2017,10 @@ symbols! { simd_with_exposed_provenance, simd_xor, since, - sinf128, sinf16, sinf32, sinf64, + sinf128, size, size_of, size_of_val, @@ -2046,10 +2042,10 @@ symbols! { specialization, speed, spotlight, - sqrtf128, sqrtf16, sqrtf32, sqrtf64, + sqrtf128, sreg, sreg_low16, sse, @@ -2127,10 +2123,10 @@ symbols! { target_has_atomic, target_has_atomic_equal_alignment, target_has_atomic_load_store, - target_has_reliable_f128, - target_has_reliable_f128_math, target_has_reliable_f16, target_has_reliable_f16_math, + target_has_reliable_f128, + target_has_reliable_f128_math, target_os, target_pointer_width, target_thread_local, @@ -2173,10 +2169,10 @@ symbols! { transparent_enums, transparent_unions, trivial_bounds, - truncf128, truncf16, truncf32, truncf64, + truncf128, try_blocks, try_capture, try_from, @@ -2205,12 +2201,12 @@ symbols! { type_name, type_privacy_lints, typed_swap_nonoverlapping, - u128, - u128_legacy_const_max, - u128_legacy_const_min, - u128_legacy_fn_max_value, - u128_legacy_fn_min_value, - u128_legacy_mod, + u8, + u8_legacy_const_max, + u8_legacy_const_min, + u8_legacy_fn_max_value, + u8_legacy_fn_min_value, + u8_legacy_mod, u16, u16_legacy_const_max, u16_legacy_const_min, @@ -2229,12 +2225,12 @@ symbols! { u64_legacy_fn_max_value, u64_legacy_fn_min_value, u64_legacy_mod, - u8, - u8_legacy_const_max, - u8_legacy_const_min, - u8_legacy_fn_max_value, - u8_legacy_fn_min_value, - u8_legacy_mod, + u128, + u128_legacy_const_max, + u128_legacy_const_min, + u128_legacy_fn_max_value, + u128_legacy_fn_min_value, + u128_legacy_mod, ub_checks, unaligned_volatile_load, unaligned_volatile_store, @@ -2387,6 +2383,7 @@ symbols! { zfh, zfhmin, zmm_reg, + // tidy-alphabetical-end } } From 62bb6216ea2a01dc050bed2f40d9cbea631d6d00 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 28 Jun 2025 11:13:11 +0200 Subject: [PATCH 34/38] various minor native-lib-tracing tweaks, and disable naive-lib-tracing mode by default --- src/tools/miri/README.md | 8 +- src/tools/miri/src/alloc/isolated_alloc.rs | 6 +- src/tools/miri/src/bin/miri.rs | 10 +- src/tools/miri/src/diagnostics.rs | 87 ++++--- src/tools/miri/src/eval.rs | 6 +- src/tools/miri/src/lib.rs | 4 +- src/tools/miri/src/shims/mod.rs | 4 +- .../{native_lib.rs => native_lib/mod.rs} | 221 ++++++++---------- .../src/shims/{ => native_lib}/trace/child.rs | 6 +- .../shims/{ => native_lib}/trace/messages.rs | 0 .../src/shims/{ => native_lib}/trace/mod.rs | 0 .../shims/{ => native_lib}/trace/parent.rs | 11 +- .../native-lib/pass/ptr_read_access.stderr | 4 +- .../native-lib/pass/ptr_write_access.stderr | 4 +- 14 files changed, 165 insertions(+), 206 deletions(-) rename src/tools/miri/src/shims/{native_lib.rs => native_lib/mod.rs} (64%) rename src/tools/miri/src/shims/{ => native_lib}/trace/child.rs (98%) rename src/tools/miri/src/shims/{ => native_lib}/trace/messages.rs (100%) rename src/tools/miri/src/shims/{ => native_lib}/trace/mod.rs (100%) rename src/tools/miri/src/shims/{ => native_lib}/trace/parent.rs (98%) diff --git a/src/tools/miri/README.md b/src/tools/miri/README.md index dead4931f2b15..4452442cd4f78 100644 --- a/src/tools/miri/README.md +++ b/src/tools/miri/README.md @@ -419,11 +419,9 @@ to Miri failing to detect cases of undefined behavior in a program. Finally, the flag is **unsound** in the sense that Miri stops tracking details such as initialization and provenance on memory shared with native code, so it is easily possible to write code that has UB which is missed by Miri. -* `-Zmiri-force-old-native-lib-mode` disables the WIP improved native code access tracking. If for - whatever reason enabling native calls leads to odd behaviours or causes Miri to panic, disabling - the tracer *might* fix this. This will likely be removed once the tracer has been adequately - battle-tested. Note that this flag is only meaningful on Linux systems; other Unixes (currently) - exclusively use the old native-lib code. +* `-Zmiri-native-lib-enable-tracing` enables the WIP detailed tracing mode for invoking native code. + Note that this flag is only meaningful on Linux systems; other Unixes (currently) do not support + tracing mode. * `-Zmiri-measureme=` enables `measureme` profiling for the interpreted program. This can be used to find which parts of your program are executing slowly under Miri. The profile is written out to a file inside a directory called ``, and can be processed diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index c3ea7270e8a6b..a7bb9b4da7580 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -305,12 +305,12 @@ impl IsolatedAlloc { /// Returns a vector of page addresses managed by the allocator. pub fn pages(&self) -> Vec { let mut pages: Vec = - self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance().get()).collect(); - self.huge_ptrs.iter().for_each(|(ptr, size)| { + self.page_ptrs.iter().map(|p| p.expose_provenance().get()).collect(); + for (ptr, size) in self.huge_ptrs.iter() { for i in 0..size / self.page_size { pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size)); } - }); + } pages } diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index 2e82dbee34d94..b80327d9d1a44 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -228,7 +228,7 @@ impl rustc_driver::Callbacks for MiriCompilerCalls { let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None) .unwrap_or_else(|| { #[cfg(target_os = "linux")] - miri::register_retcode_sv(rustc_driver::EXIT_FAILURE); + miri::native_lib::register_retcode_sv(rustc_driver::EXIT_FAILURE); tcx.dcx().abort_if_errors(); rustc_driver::EXIT_FAILURE }); @@ -724,8 +724,8 @@ fn main() { } else { show_error!("-Zmiri-native-lib `{}` does not exist", filename); } - } else if arg == "-Zmiri-force-old-native-lib-mode" { - miri_config.force_old_native_lib = true; + } else if arg == "-Zmiri-native-lib-enable-tracing" { + miri_config.native_lib_enable_tracing = true; } else if let Some(param) = arg.strip_prefix("-Zmiri-num-cpus=") { let num_cpus = param .parse::() @@ -797,14 +797,14 @@ fn main() { debug!("rustc arguments: {:?}", rustc_args); debug!("crate arguments: {:?}", miri_config.args); #[cfg(target_os = "linux")] - if !miri_config.native_lib.is_empty() && !miri_config.force_old_native_lib { + if !miri_config.native_lib.is_empty() && miri_config.native_lib_enable_tracing { // FIXME: This should display a diagnostic / warning on error // SAFETY: If any other threads exist at this point (namely for the ctrlc // handler), they will not interact with anything on the main rustc/Miri // thread in an async-signal-unsafe way such as by accessing shared // semaphores, etc.; the handler only calls `sleep()` and `exit()`, which // are async-signal-safe, as is accessing atomics - let _ = unsafe { miri::init_sv() }; + let _ = unsafe { miri::native_lib::init_sv() }; } run_compiler_and_exit( &rustc_args, diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs index 095f4ba8c86e9..9ecbd31c5b9f9 100644 --- a/src/tools/miri/src/diagnostics.rs +++ b/src/tools/miri/src/diagnostics.rs @@ -132,8 +132,9 @@ pub enum NonHaltingDiagnostic { Int2Ptr { details: bool, }, - NativeCallSharedMem, - NativeCallNoTrace, + NativeCallSharedMem { + tracing: bool, + }, WeakMemoryOutdatedLoad { ptr: Pointer, }, @@ -628,10 +629,8 @@ impl<'tcx> MiriMachine<'tcx> { RejectedIsolatedOp(_) => ("operation rejected by isolation".to_string(), DiagLevel::Warning), Int2Ptr { .. } => ("integer-to-pointer cast".to_string(), DiagLevel::Warning), - NativeCallSharedMem => + NativeCallSharedMem { .. } => ("sharing memory with a native function".to_string(), DiagLevel::Warning), - NativeCallNoTrace => - ("unable to trace native code memory accesses".to_string(), DiagLevel::Warning), ExternTypeReborrow => ("reborrow of reference to `extern type`".to_string(), DiagLevel::Warning), CreatedPointerTag(..) @@ -666,11 +665,8 @@ impl<'tcx> MiriMachine<'tcx> { ProgressReport { .. } => format!("progress report: current operation being executed is here"), Int2Ptr { .. } => format!("integer-to-pointer cast"), - NativeCallSharedMem => format!("sharing memory with a native function called via FFI"), - NativeCallNoTrace => - format!( - "sharing memory with a native function called via FFI, and unable to use ptrace" - ), + NativeCallSharedMem { .. } => + format!("sharing memory with a native function called via FFI"), WeakMemoryOutdatedLoad { ptr } => format!("weak memory emulation: outdated value returned from load at {ptr}"), ExternTypeReborrow => @@ -716,42 +712,41 @@ impl<'tcx> MiriMachine<'tcx> { } v } - NativeCallSharedMem => { - vec![ - note!( - "when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis" - ), - note!( - "in particular, Miri assumes that the native call initializes all memory it has written to" - ), - note!( - "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory" - ), - note!( - "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free" - ), - ] - } - NativeCallNoTrace => { - vec![ - note!( - "when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory" - ), - note!( - "in particular, Miri assumes that the native call initializes all memory it has access to" - ), - note!( - "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory" - ), - note!( - "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free" - ), - #[cfg(target_os = "linux")] - note!( - "this is normally partially mitigated, but either -Zmiri-force-old-native-lib-mode was passed or ptrace is disabled on your system" - ), - ] - } + NativeCallSharedMem { tracing } => + if *tracing { + vec![ + note!( + "when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis" + ), + note!( + "in particular, Miri assumes that the native call initializes all memory it has written to" + ), + note!( + "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory" + ), + note!( + "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free" + ), + note!( + "tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here" + ), + ] + } else { + vec![ + note!( + "when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory" + ), + note!( + "in particular, Miri assumes that the native call initializes all memory it has access to" + ), + note!( + "Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory" + ), + note!( + "what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free" + ), + ] + }, ExternTypeReborrow => { assert!(self.borrow_tracker.as_ref().is_some_and(|b| { matches!( diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs index c0dc484187a8e..44612dae1ee54 100644 --- a/src/tools/miri/src/eval.rs +++ b/src/tools/miri/src/eval.rs @@ -150,8 +150,8 @@ pub struct MiriConfig { pub retag_fields: RetagFields, /// The location of the shared object files to load when calling external functions pub native_lib: Vec, - /// Whether to force using the old native lib behaviour even if ptrace might be supported. - pub force_old_native_lib: bool, + /// Whether to enable the new native lib tracing system. + pub native_lib_enable_tracing: bool, /// Run a garbage collector for BorTags every N basic blocks. pub gc_interval: u32, /// The number of CPUs to be reported by miri. @@ -201,7 +201,7 @@ impl Default for MiriConfig { report_progress: None, retag_fields: RetagFields::Yes, native_lib: vec![], - force_old_native_lib: false, + native_lib_enable_tracing: false, gc_interval: 10_000, num_cpus: 1, page_size: None, diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index b1c15b006f702..850478b516c9b 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -100,7 +100,9 @@ use rustc_middle::{bug, span_bug}; use tracing::{info, trace}; #[cfg(target_os = "linux")] -pub use crate::shims::trace::{init_sv, register_retcode_sv}; +pub mod native_lib { + pub use crate::shims::{init_sv, register_retcode_sv}; +} // Type aliases that set the provenance parameter. pub type Pointer = interpret::Pointer>; diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs index e7b0a784c2701..44e145dd5d7ca 100644 --- a/src/tools/miri/src/shims/mod.rs +++ b/src/tools/miri/src/shims/mod.rs @@ -19,10 +19,10 @@ pub mod os_str; pub mod panic; pub mod time; pub mod tls; -#[cfg(target_os = "linux")] -pub mod trace; pub use self::files::FdTable; +#[cfg(target_os = "linux")] +pub use self::native_lib::trace::{init_sv, register_retcode_sv}; pub use self::unix::{DirTable, EpollInterestTable}; /// What needs to be done after emulating an item (a shim or an intrinsic) is done. diff --git a/src/tools/miri/src/shims/native_lib.rs b/src/tools/miri/src/shims/native_lib/mod.rs similarity index 64% rename from src/tools/miri/src/shims/native_lib.rs rename to src/tools/miri/src/shims/native_lib/mod.rs index 3e455e17386cf..f6f4766144e66 100644 --- a/src/tools/miri/src/shims/native_lib.rs +++ b/src/tools/miri/src/shims/native_lib/mod.rs @@ -1,7 +1,9 @@ //! Implements calling functions from a native library. -use std::ops::Deref; + #[cfg(target_os = "linux")] -use std::{cell::RefCell, rc::Rc}; +pub mod trace; + +use std::ops::Deref; use libffi::high::call as ffi; use libffi::low::CodePtr; @@ -11,12 +13,11 @@ use rustc_middle::ty::{self as ty, IntTy, UintTy}; use rustc_span::Symbol; #[cfg(target_os = "linux")] -use crate::alloc::isolated_alloc::IsolatedAlloc; +use self::trace::Supervisor; use crate::*; #[cfg(target_os = "linux")] -type CallResult<'tcx> = - InterpResult<'tcx, (ImmTy<'tcx>, Option)>; +type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option)>; #[cfg(not(target_os = "linux"))] type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option)>; @@ -32,84 +33,90 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { ) -> CallResult<'tcx> { let this = self.eval_context_mut(); #[cfg(target_os = "linux")] - let alloc = this.machine.allocator.clone(); - #[cfg(not(target_os = "linux"))] - let alloc = (); - let maybe_memevents; + let alloc = this.machine.allocator.as_ref().unwrap(); + + // SAFETY: We don't touch the machine memory past this point. + #[cfg(target_os = "linux")] + let (guard, stack_ptr) = unsafe { Supervisor::start_ffi(alloc) }; // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value // as the specified primitive integer type - let scalar = match dest.layout.ty.kind() { - // ints - ty::Int(IntTy::I8) => { - // Unsafe because of the call to native code. - // Because this is calling a C function it is not necessarily sound, - // but there is no way around this and we've checked as much as we can. - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_i8(x.0) - } - ty::Int(IntTy::I16) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_i16(x.0) - } - ty::Int(IntTy::I32) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_i32(x.0) - } - ty::Int(IntTy::I64) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_i64(x.0) - } - ty::Int(IntTy::Isize) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_target_isize(x.0.try_into().unwrap(), this) - } - // uints - ty::Uint(UintTy::U8) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_u8(x.0) - } - ty::Uint(UintTy::U16) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_u16(x.0) - } - ty::Uint(UintTy::U32) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_u32(x.0) - } - ty::Uint(UintTy::U64) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_u64(x.0) - } - ty::Uint(UintTy::Usize) => { - let x = unsafe { do_native_call::(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - Scalar::from_target_usize(x.0.try_into().unwrap(), this) - } - // Functions with no declared return type (i.e., the default return) - // have the output_type `Tuple([])`. - ty::Tuple(t_list) if (*t_list).deref().is_empty() => { - let (_, mm) = unsafe { do_native_call::<()>(ptr, libffi_args.as_slice(), alloc) }; - return interp_ok((ImmTy::uninit(dest.layout), mm)); - } - ty::RawPtr(..) => { - let x = unsafe { do_native_call::<*const ()>(ptr, libffi_args.as_slice(), alloc) }; - maybe_memevents = x.1; - let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.0.addr())); - Scalar::from_pointer(ptr, this) - } - _ => throw_unsup_format!("unsupported return type for native call: {:?}", link_name), + let res = 'res: { + let scalar = match dest.layout.ty.kind() { + // ints + ty::Int(IntTy::I8) => { + // Unsafe because of the call to native code. + // Because this is calling a C function it is not necessarily sound, + // but there is no way around this and we've checked as much as we can. + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_i8(x) + } + ty::Int(IntTy::I16) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_i16(x) + } + ty::Int(IntTy::I32) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_i32(x) + } + ty::Int(IntTy::I64) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_i64(x) + } + ty::Int(IntTy::Isize) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_target_isize(x.try_into().unwrap(), this) + } + // uints + ty::Uint(UintTy::U8) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_u8(x) + } + ty::Uint(UintTy::U16) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_u16(x) + } + ty::Uint(UintTy::U32) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_u32(x) + } + ty::Uint(UintTy::U64) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_u64(x) + } + ty::Uint(UintTy::Usize) => { + let x = unsafe { ffi::call::(ptr, libffi_args.as_slice()) }; + Scalar::from_target_usize(x.try_into().unwrap(), this) + } + // Functions with no declared return type (i.e., the default return) + // have the output_type `Tuple([])`. + ty::Tuple(t_list) if (*t_list).deref().is_empty() => { + unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) }; + break 'res interp_ok(ImmTy::uninit(dest.layout)); + } + ty::RawPtr(..) => { + let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) }; + let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr())); + Scalar::from_pointer(ptr, this) + } + _ => + break 'res Err(err_unsup_format!( + "unsupported return type for native call: {:?}", + link_name + )) + .into(), + }; + interp_ok(ImmTy::from_scalar(scalar, dest.layout)) }; - interp_ok((ImmTy::from_scalar(scalar, dest.layout), maybe_memevents)) + + // SAFETY: We got the guard and stack pointer from start_ffi, and + // the allocator is the same + #[cfg(target_os = "linux")] + let events = unsafe { Supervisor::end_ffi(alloc, guard, stack_ptr) }; + #[cfg(not(target_os = "linux"))] + let events = None; + + interp_ok((res?, events)) } /// Get the pointer to the function of the specified name in the shared object file, @@ -205,14 +212,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // The first time this happens, print a warning. if !this.machine.native_call_mem_warned.replace(true) { // Newly set, so first time we get here. - #[cfg(target_os = "linux")] - if shims::trace::Supervisor::poll() { - this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem); - } else { - this.emit_diagnostic(NonHaltingDiagnostic::NativeCallNoTrace); - } - #[cfg(not(target_os = "linux"))] - this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem); + this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { + #[cfg(target_os = "linux")] + tracing: self::trace::Supervisor::is_enabled(), + #[cfg(not(target_os = "linux"))] + tracing: false, + }); } this.expose_provenance(prov)?; @@ -243,48 +248,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } } -/// Performs the actual native call, returning the result and the events that -/// the supervisor detected (if any). -/// -/// SAFETY: See `libffi::fii::call`. -#[cfg(target_os = "linux")] -unsafe fn do_native_call( - ptr: CodePtr, - args: &[ffi::Arg<'_>], - alloc: Option>>, -) -> (T, Option) { - use shims::trace::Supervisor; - - unsafe { - if let Some(alloc) = alloc { - // SAFETY: We don't touch the machine memory past this point. - let (guard, stack_ptr) = Supervisor::start_ffi(alloc.clone()); - // SAFETY: Upheld by caller. - let ret = ffi::call(ptr, args); - // SAFETY: We got the guard and stack pointer from start_ffi, and - // the allocator is the same. - (ret, Supervisor::end_ffi(guard, alloc, stack_ptr)) - } else { - // SAFETY: Upheld by caller. - (ffi::call(ptr, args), None) - } - } -} - -/// Performs the actual native call, returning the result and a `None`. -/// Placeholder for platforms that do not support the ptrace supervisor. -/// -/// SAFETY: See `libffi::fii::call`. -#[cfg(not(target_os = "linux"))] -#[inline(always)] -unsafe fn do_native_call( - ptr: CodePtr, - args: &[ffi::Arg<'_>], - _alloc: (), -) -> (T, Option) { - (unsafe { ffi::call(ptr, args) }, None) -} - #[derive(Debug, Clone)] /// Enum of supported arguments to external C functions. // We introduce this enum instead of just calling `ffi::arg` and storing a list diff --git a/src/tools/miri/src/shims/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs similarity index 98% rename from src/tools/miri/src/shims/trace/child.rs rename to src/tools/miri/src/shims/native_lib/trace/child.rs index c320537b94495..370d9fa837c70 100644 --- a/src/tools/miri/src/shims/trace/child.rs +++ b/src/tools/miri/src/shims/native_lib/trace/child.rs @@ -30,7 +30,7 @@ pub struct SvInitError; impl Supervisor { /// Returns `true` if the supervisor process exists, and `false` otherwise. - pub fn poll() -> bool { + pub fn is_enabled() -> bool { SUPERVISOR.lock().unwrap().is_some() } @@ -45,7 +45,7 @@ impl Supervisor { /// SAFETY: The resulting guard must be dropped *via `end_ffi`* immediately /// after the desired call has concluded. pub unsafe fn start_ffi( - alloc: Rc>, + alloc: &Rc>, ) -> (std::sync::MutexGuard<'static, Option>, Option<*mut [u8; FAKE_STACK_SIZE]>) { let mut sv_guard = SUPERVISOR.lock().unwrap(); @@ -99,8 +99,8 @@ impl Supervisor { /// received by a prior call to `start_ffi`, and the allocator must be the /// one passed to it also. pub unsafe fn end_ffi( + alloc: &Rc>, mut sv_guard: std::sync::MutexGuard<'static, Option>, - alloc: Rc>, raw_stack_ptr: Option<*mut [u8; FAKE_STACK_SIZE]>, ) -> Option { // We can't use IPC channels here to signal that FFI mode has ended, diff --git a/src/tools/miri/src/shims/trace/messages.rs b/src/tools/miri/src/shims/native_lib/trace/messages.rs similarity index 100% rename from src/tools/miri/src/shims/trace/messages.rs rename to src/tools/miri/src/shims/native_lib/trace/messages.rs diff --git a/src/tools/miri/src/shims/trace/mod.rs b/src/tools/miri/src/shims/native_lib/trace/mod.rs similarity index 100% rename from src/tools/miri/src/shims/trace/mod.rs rename to src/tools/miri/src/shims/native_lib/trace/mod.rs diff --git a/src/tools/miri/src/shims/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs similarity index 98% rename from src/tools/miri/src/shims/trace/parent.rs rename to src/tools/miri/src/shims/native_lib/trace/parent.rs index cb9a9fc8da5b8..0fec14a129029 100644 --- a/src/tools/miri/src/shims/trace/parent.rs +++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs @@ -4,8 +4,8 @@ use ipc_channel::ipc; use nix::sys::{ptrace, signal, wait}; use nix::unistd; -use crate::shims::trace::messages::{Confirmation, MemEvents, TraceRequest}; -use crate::shims::trace::{AccessEvent, FAKE_STACK_SIZE, StartFfiInfo}; +use super::messages::{Confirmation, MemEvents, TraceRequest}; +use super::{AccessEvent, FAKE_STACK_SIZE, StartFfiInfo}; /// The flags to use when calling `waitid()`. /// Since bitwise or on the nix version of these flags is implemented as a trait, @@ -532,10 +532,11 @@ fn handle_segfault( if ch_pages.iter().any(|pg| (*pg..pg.strict_add(page_size)).contains(&addr)) { // Overall structure: // - Get the address that caused the segfault - // - Unprotect the memory + // - Unprotect the memory: we force the child to execute `mempr_off`, passing + // parameters via global atomic variables. // - Step 1 instruction // - Parse executed code to estimate size & type of access - // - Reprotect the memory + // - Reprotect the memory by executing `mempr_on` in the child. // - Continue // Ensure the stack is properly zeroed out! @@ -606,7 +607,7 @@ fn handle_segfault( ret }); - // Now figure out the size + type of access and log it down + // Now figure out the size + type of access and log it down. // This will mark down e.g. the same area being read multiple times, // since it's more efficient to compress the accesses at the end. if capstone_disassemble(&instr, addr, cs, acc_events).is_err() { diff --git a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr index e256796e7a7e5..04a3025baefa9 100644 --- a/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr +++ b/src/tools/miri/tests/native-lib/pass/ptr_read_access.stderr @@ -4,8 +4,8 @@ warning: sharing memory with a native function called via FFI LL | unsafe { print_pointer(&x) }; | ^^^^^^^^^^^^^^^^^ sharing memory with a native function | - = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis - = help: in particular, Miri assumes that the native call initializes all memory it has written to + = help: when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory + = help: in particular, Miri assumes that the native call initializes all memory it has access to = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free = note: BACKTRACE: diff --git a/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr b/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr index 53cbb05a32184..c893b0d9f6561 100644 --- a/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr +++ b/src/tools/miri/tests/native-lib/pass/ptr_write_access.stderr @@ -4,8 +4,8 @@ warning: sharing memory with a native function called via FFI LL | unsafe { increment_int(&mut x) }; | ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function | - = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis - = help: in particular, Miri assumes that the native call initializes all memory it has written to + = help: when memory is shared with a native function call, Miri stops tracking initialization and provenance for that memory + = help: in particular, Miri assumes that the native call initializes all memory it has access to = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free = note: BACKTRACE: From cde10120828ca805af2a5b012a5829d3468d36fd Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 28 Jun 2025 11:36:02 +0200 Subject: [PATCH 35/38] de-intend some code, and extend comments --- .../miri/src/shims/native_lib/trace/child.rs | 12 +- .../miri/src/shims/native_lib/trace/mod.rs | 6 +- .../miri/src/shims/native_lib/trace/parent.rs | 205 +++++++++--------- 3 files changed, 112 insertions(+), 111 deletions(-) diff --git a/src/tools/miri/src/shims/native_lib/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs index 370d9fa837c70..f4b0371f8d0a4 100644 --- a/src/tools/miri/src/shims/native_lib/trace/child.rs +++ b/src/tools/miri/src/shims/native_lib/trace/child.rs @@ -7,7 +7,7 @@ use nix::unistd; use super::messages::{Confirmation, MemEvents, TraceRequest}; use super::parent::{ChildListener, sv_loop}; -use super::{FAKE_STACK_SIZE, StartFfiInfo}; +use super::{CALLBACK_STACK_SIZE, StartFfiInfo}; use crate::alloc::isolated_alloc::IsolatedAlloc; static SUPERVISOR: std::sync::Mutex> = std::sync::Mutex::new(None); @@ -46,7 +46,7 @@ impl Supervisor { /// after the desired call has concluded. pub unsafe fn start_ffi( alloc: &Rc>, - ) -> (std::sync::MutexGuard<'static, Option>, Option<*mut [u8; FAKE_STACK_SIZE]>) + ) -> (std::sync::MutexGuard<'static, Option>, Option<*mut [u8; CALLBACK_STACK_SIZE]>) { let mut sv_guard = SUPERVISOR.lock().unwrap(); // If the supervisor is not initialised for whatever reason, fast-fail. @@ -58,10 +58,10 @@ impl Supervisor { }; // Get pointers to all the pages the supervisor must allow accesses in - // and prepare the fake stack. + // and prepare the callback stack. let page_ptrs = alloc.borrow().pages(); - let raw_stack_ptr: *mut [u8; FAKE_STACK_SIZE] = - Box::leak(Box::new([0u8; FAKE_STACK_SIZE])).as_mut_ptr().cast(); + let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] = + Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast(); let stack_ptr = raw_stack_ptr.expose_provenance(); let start_info = StartFfiInfo { page_ptrs, stack_ptr }; @@ -101,7 +101,7 @@ impl Supervisor { pub unsafe fn end_ffi( alloc: &Rc>, mut sv_guard: std::sync::MutexGuard<'static, Option>, - raw_stack_ptr: Option<*mut [u8; FAKE_STACK_SIZE]>, + raw_stack_ptr: Option<*mut [u8; CALLBACK_STACK_SIZE]>, ) -> Option { // We can't use IPC channels here to signal that FFI mode has ended, // since they might allocate memory which could get us stuck in a SIGTRAP diff --git a/src/tools/miri/src/shims/native_lib/trace/mod.rs b/src/tools/miri/src/shims/native_lib/trace/mod.rs index a073482044eb0..8ff96151600d5 100644 --- a/src/tools/miri/src/shims/native_lib/trace/mod.rs +++ b/src/tools/miri/src/shims/native_lib/trace/mod.rs @@ -6,8 +6,8 @@ use std::ops::Range; pub use self::child::{Supervisor, init_sv, register_retcode_sv}; -/// The size used for the array into which we can move the stack pointer. -const FAKE_STACK_SIZE: usize = 1024; +/// The size of the temporary stack we use for callbacks that the server executes in the client. +const CALLBACK_STACK_SIZE: usize = 1024; /// Information needed to begin tracing. #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] @@ -16,7 +16,7 @@ struct StartFfiInfo { /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`. page_ptrs: Vec, /// The address of an allocation that can serve as a temporary stack. - /// This should be a leaked `Box<[u8; FAKE_STACK_SIZE]>` cast to an int. + /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int. stack_ptr: usize, } diff --git a/src/tools/miri/src/shims/native_lib/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs index 0fec14a129029..0a0415c6e1087 100644 --- a/src/tools/miri/src/shims/native_lib/trace/parent.rs +++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs @@ -5,7 +5,7 @@ use nix::sys::{ptrace, signal, wait}; use nix::unistd; use super::messages::{Confirmation, MemEvents, TraceRequest}; -use super::{AccessEvent, FAKE_STACK_SIZE, StartFfiInfo}; +use super::{AccessEvent, CALLBACK_STACK_SIZE, StartFfiInfo}; /// The flags to use when calling `waitid()`. /// Since bitwise or on the nix version of these flags is implemented as a trait, @@ -263,7 +263,7 @@ pub fn sv_loop( ExecEvent::Start(ch_info) => { // All the pages that the child process is "allowed to" access. ch_pages = ch_info.page_ptrs; - // And the fake stack it allocated for us to use later. + // And the temporary callback stack it allocated for us to use later. ch_stack = Some(ch_info.stack_ptr); // We received the signal and are no longer in the main listener loop, @@ -529,112 +529,113 @@ fn handle_segfault( let addr = unsafe { siginfo.si_addr().addr() }; let page_addr = addr.strict_sub(addr.strict_rem(page_size)); - if ch_pages.iter().any(|pg| (*pg..pg.strict_add(page_size)).contains(&addr)) { - // Overall structure: - // - Get the address that caused the segfault - // - Unprotect the memory: we force the child to execute `mempr_off`, passing - // parameters via global atomic variables. - // - Step 1 instruction - // - Parse executed code to estimate size & type of access - // - Reprotect the memory by executing `mempr_on` in the child. - // - Continue - - // Ensure the stack is properly zeroed out! - for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { - ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); - } - - // Guard against both architectures with upwards and downwards-growing stacks. - let stack_ptr = ch_stack.strict_add(FAKE_STACK_SIZE / 2); - let regs_bak = ptrace::getregs(pid).unwrap(); - let mut new_regs = regs_bak; - let ip_prestep = regs_bak.ip(); - - // Move the instr ptr into the deprotection code. - #[expect(clippy::as_conversions)] - new_regs.set_ip(mempr_off as usize); - // Don't mess up the stack by accident! - new_regs.set_sp(stack_ptr); - - // Modify the PAGE_ADDR global on the child process to point to the page - // that we want unprotected. - ptrace::write( - pid, - (&raw const PAGE_ADDR).cast_mut().cast(), - libc::c_long::try_from(page_addr).unwrap(), - ) - .unwrap(); - - // Check if we also own the next page, and if so unprotect it in case - // the access spans the page boundary. - let flag = if ch_pages.contains(&page_addr.strict_add(page_size)) { 2 } else { 1 }; - ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), flag).unwrap(); - - ptrace::setregs(pid, new_regs).unwrap(); - - // Our mempr_* functions end with a raise(SIGSTOP). - wait_for_signal(Some(pid), signal::SIGSTOP, true)?; - - // Step 1 instruction. - ptrace::setregs(pid, regs_bak).unwrap(); - ptrace::step(pid, None).unwrap(); - // Don't use wait_for_signal here since 1 instruction doesn't give room - // for any uncertainty + we don't want it `cont()`ing randomly by accident - // Also, don't let it continue with unprotected memory if something errors! - let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecEnd(None))?; - - // Zero out again to be safe - for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { - ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); - } - - // Save registers and grab the bytes that were executed. This would - // be really nasty if it was a jump or similar but those thankfully - // won't do memory accesses and so can't trigger this! - let regs_bak = ptrace::getregs(pid).unwrap(); - new_regs = regs_bak; - let ip_poststep = regs_bak.ip(); - // We need to do reads/writes in word-sized chunks. - let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE); - let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| { - // This only needs to be a valid pointer in the child process, not ours. - ret.append( - &mut ptrace::read(pid, std::ptr::without_provenance_mut(ip)) - .unwrap() - .to_ne_bytes() - .to_vec(), - ); - ret - }); - - // Now figure out the size + type of access and log it down. - // This will mark down e.g. the same area being read multiple times, - // since it's more efficient to compress the accesses at the end. - if capstone_disassemble(&instr, addr, cs, acc_events).is_err() { - // Read goes first because we need to be pessimistic. - acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); - acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); - } - - // Reprotect everything and continue. - #[expect(clippy::as_conversions)] - new_regs.set_ip(mempr_on as usize); - new_regs.set_sp(stack_ptr); - ptrace::setregs(pid, new_regs).unwrap(); - wait_for_signal(Some(pid), signal::SIGSTOP, true)?; - - ptrace::setregs(pid, regs_bak).unwrap(); - ptrace::syscall(pid, None).unwrap(); - Ok(()) - } else { - // This was a real segfault, so print some debug info and quit. + if !ch_pages.iter().any(|pg| (*pg..pg.strict_add(page_size)).contains(&addr)) { + // This was a real segfault (not one of the Miri memory pages), so print some debug info and + // quit. let regs = ptrace::getregs(pid).unwrap(); eprintln!("Segfault occurred during FFI at {addr:#018x}"); eprintln!("Expected access on pages: {ch_pages:#018x?}"); eprintln!("Register dump: {regs:#x?}"); ptrace::kill(pid).unwrap(); - Err(ExecEnd(None)) + return Err(ExecEnd(None)); } + + // Overall structure: + // - Get the address that caused the segfault + // - Unprotect the memory: we force the child to execute `mempr_off`, passing parameters via + // global atomic variables. This is what we use the temporary callback stack for. + // - Step 1 instruction + // - Parse executed code to estimate size & type of access + // - Reprotect the memory by executing `mempr_on` in the child. + // - Continue + + // Ensure the stack is properly zeroed out! + for a in (ch_stack..ch_stack.strict_add(CALLBACK_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { + ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); + } + + // Guard against both architectures with upwards and downwards-growing stacks. + let stack_ptr = ch_stack.strict_add(CALLBACK_STACK_SIZE / 2); + let regs_bak = ptrace::getregs(pid).unwrap(); + let mut new_regs = regs_bak; + let ip_prestep = regs_bak.ip(); + + // Move the instr ptr into the deprotection code. + #[expect(clippy::as_conversions)] + new_regs.set_ip(mempr_off as usize); + // Don't mess up the stack by accident! + new_regs.set_sp(stack_ptr); + + // Modify the PAGE_ADDR global on the child process to point to the page + // that we want unprotected. + ptrace::write( + pid, + (&raw const PAGE_ADDR).cast_mut().cast(), + libc::c_long::try_from(page_addr).unwrap(), + ) + .unwrap(); + + // Check if we also own the next page, and if so unprotect it in case + // the access spans the page boundary. + let flag = if ch_pages.contains(&page_addr.strict_add(page_size)) { 2 } else { 1 }; + ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), flag).unwrap(); + + ptrace::setregs(pid, new_regs).unwrap(); + + // Our mempr_* functions end with a raise(SIGSTOP). + wait_for_signal(Some(pid), signal::SIGSTOP, true)?; + + // Step 1 instruction. + ptrace::setregs(pid, regs_bak).unwrap(); + ptrace::step(pid, None).unwrap(); + // Don't use wait_for_signal here since 1 instruction doesn't give room + // for any uncertainty + we don't want it `cont()`ing randomly by accident + // Also, don't let it continue with unprotected memory if something errors! + let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecEnd(None))?; + + // Zero out again to be safe + for a in (ch_stack..ch_stack.strict_add(CALLBACK_STACK_SIZE)).step_by(ARCH_WORD_SIZE) { + ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap(); + } + + // Save registers and grab the bytes that were executed. This would + // be really nasty if it was a jump or similar but those thankfully + // won't do memory accesses and so can't trigger this! + let regs_bak = ptrace::getregs(pid).unwrap(); + new_regs = regs_bak; + let ip_poststep = regs_bak.ip(); + // We need to do reads/writes in word-sized chunks. + let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE); + let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| { + // This only needs to be a valid pointer in the child process, not ours. + ret.append( + &mut ptrace::read(pid, std::ptr::without_provenance_mut(ip)) + .unwrap() + .to_ne_bytes() + .to_vec(), + ); + ret + }); + + // Now figure out the size + type of access and log it down. + // This will mark down e.g. the same area being read multiple times, + // since it's more efficient to compress the accesses at the end. + if capstone_disassemble(&instr, addr, cs, acc_events).is_err() { + // Read goes first because we need to be pessimistic. + acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); + acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE))); + } + + // Reprotect everything and continue. + #[expect(clippy::as_conversions)] + new_regs.set_ip(mempr_on as usize); + new_regs.set_sp(stack_ptr); + ptrace::setregs(pid, new_regs).unwrap(); + wait_for_signal(Some(pid), signal::SIGSTOP, true)?; + + ptrace::setregs(pid, regs_bak).unwrap(); + ptrace::syscall(pid, None).unwrap(); + Ok(()) } // We only get dropped into these functions via offsetting the instr pointer From 5e14d0f193d59c139bcde2c2c18cd7f5612c3c2b Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 28 Jun 2025 11:55:41 +0200 Subject: [PATCH 36/38] move all the message types into one place --- .../miri/src/shims/native_lib/trace/child.rs | 4 +-- .../src/shims/native_lib/trace/messages.rs | 31 ++++++++++++++++--- .../miri/src/shims/native_lib/trace/mod.rs | 23 -------------- .../miri/src/shims/native_lib/trace/parent.rs | 4 +-- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/tools/miri/src/shims/native_lib/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs index f4b0371f8d0a4..4961e875c7751 100644 --- a/src/tools/miri/src/shims/native_lib/trace/child.rs +++ b/src/tools/miri/src/shims/native_lib/trace/child.rs @@ -5,9 +5,9 @@ use ipc_channel::ipc; use nix::sys::{ptrace, signal}; use nix::unistd; -use super::messages::{Confirmation, MemEvents, TraceRequest}; +use super::CALLBACK_STACK_SIZE; +use super::messages::{Confirmation, MemEvents, StartFfiInfo, TraceRequest}; use super::parent::{ChildListener, sv_loop}; -use super::{CALLBACK_STACK_SIZE, StartFfiInfo}; use crate::alloc::isolated_alloc::IsolatedAlloc; static SUPERVISOR: std::sync::Mutex> = std::sync::Mutex::new(None); diff --git a/src/tools/miri/src/shims/native_lib/trace/messages.rs b/src/tools/miri/src/shims/native_lib/trace/messages.rs index 1014ca750c8e0..8a83dab5c09d1 100644 --- a/src/tools/miri/src/shims/native_lib/trace/messages.rs +++ b/src/tools/miri/src/shims/native_lib/trace/messages.rs @@ -18,29 +18,42 @@ //! in `super::child` (namely `start_ffi()` and `end_ffi()`) to handle this. It is //! trivially easy to cause a deadlock or crash by messing this up! +use std::ops::Range; + /// An IPC request sent by the child process to the parent. /// /// The sender for this channel should live on the child process. #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] -pub(super) enum TraceRequest { +pub enum TraceRequest { /// Requests that tracing begins. Following this being sent, the child must /// wait to receive a `Confirmation` on the respective channel and then /// `raise(SIGSTOP)`. /// /// To avoid possible issues while allocating memory for IPC channels, ending /// the tracing is instead done via `raise(SIGUSR1)`. - StartFfi(super::StartFfiInfo), + StartFfi(StartFfiInfo), /// Manually overrides the code that the supervisor will return upon exiting. /// Once set, it is permanent. This can be called again to change the value. OverrideRetcode(i32), } +/// Information needed to begin tracing. +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] +pub struct StartFfiInfo { + /// A vector of page addresses. These should have been automatically obtained + /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`. + pub page_ptrs: Vec, + /// The address of an allocation that can serve as a temporary stack. + /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int. + pub stack_ptr: usize, +} + /// A marker type confirming that the supervisor has received the request to begin /// tracing and is now waiting for a `SIGSTOP`. /// /// The sender for this channel should live on the parent process. #[derive(serde::Serialize, serde::Deserialize, Debug)] -pub(super) struct Confirmation; +pub struct Confirmation; /// The final results of an FFI trace, containing every relevant event detected /// by the tracer. Sent by the supervisor after receiving a `SIGUSR1` signal. @@ -53,5 +66,15 @@ pub struct MemEvents { /// pessimistically rounded up, and if the type (read/write/both) is uncertain /// it is reported as whatever would be safest to assume; i.e. a read + maybe-write /// becomes a read + write, etc. - pub acc_events: Vec, + pub acc_events: Vec, +} + +/// A single memory access, conservatively overestimated +/// in case of ambiguity. +#[derive(serde::Serialize, serde::Deserialize, Debug)] +pub enum AccessEvent { + /// A read may have occurred on no more than the specified address range. + Read(Range), + /// A write may have occurred on no more than the specified address range. + Write(Range), } diff --git a/src/tools/miri/src/shims/native_lib/trace/mod.rs b/src/tools/miri/src/shims/native_lib/trace/mod.rs index 8ff96151600d5..174b06b3ac56c 100644 --- a/src/tools/miri/src/shims/native_lib/trace/mod.rs +++ b/src/tools/miri/src/shims/native_lib/trace/mod.rs @@ -2,30 +2,7 @@ mod child; pub mod messages; mod parent; -use std::ops::Range; - pub use self::child::{Supervisor, init_sv, register_retcode_sv}; /// The size of the temporary stack we use for callbacks that the server executes in the client. const CALLBACK_STACK_SIZE: usize = 1024; - -/// Information needed to begin tracing. -#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] -struct StartFfiInfo { - /// A vector of page addresses. These should have been automatically obtained - /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`. - page_ptrs: Vec, - /// The address of an allocation that can serve as a temporary stack. - /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int. - stack_ptr: usize, -} - -/// A single memory access, conservatively overestimated -/// in case of ambiguity. -#[derive(serde::Serialize, serde::Deserialize, Debug)] -pub enum AccessEvent { - /// A read may have occurred on no more than the specified address range. - Read(Range), - /// A write may have occurred on no more than the specified address range. - Write(Range), -} diff --git a/src/tools/miri/src/shims/native_lib/trace/parent.rs b/src/tools/miri/src/shims/native_lib/trace/parent.rs index 0a0415c6e1087..1d968b7a974eb 100644 --- a/src/tools/miri/src/shims/native_lib/trace/parent.rs +++ b/src/tools/miri/src/shims/native_lib/trace/parent.rs @@ -4,8 +4,8 @@ use ipc_channel::ipc; use nix::sys::{ptrace, signal, wait}; use nix::unistd; -use super::messages::{Confirmation, MemEvents, TraceRequest}; -use super::{AccessEvent, CALLBACK_STACK_SIZE, StartFfiInfo}; +use super::CALLBACK_STACK_SIZE; +use super::messages::{AccessEvent, Confirmation, MemEvents, StartFfiInfo, TraceRequest}; /// The flags to use when calling `waitid()`. /// Since bitwise or on the nix version of these flags is implemented as a trait, From 85357d1e5cf56d8aca54e7d4f6cc365b006f484a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 28 Jun 2025 13:33:12 +0200 Subject: [PATCH 37/38] update lockfile --- Cargo.lock | 151 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 135 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e1cf17e2c01ca..110d17142bb3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -366,6 +366,26 @@ dependencies = [ "serde", ] +[[package]] +name = "capstone" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a" +dependencies = [ + "capstone-sys", + "libc", +] + +[[package]] +name = "capstone-sys" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "cargo-miri" version = "0.1.0" @@ -738,7 +758,7 @@ dependencies = [ "tracing-subscriber", "unified-diff", "walkdir", - "windows", + "windows 0.61.3", ] [[package]] @@ -1592,7 +1612,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.61.2", ] [[package]] @@ -1916,6 +1936,25 @@ dependencies = [ "unic-langid", ] +[[package]] +name = "ipc-channel" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8251fb7bcd9ccd3725ed8deae9fe7db8e586495c9eb5b0c52e6233e5e75ea" +dependencies = [ + "bincode", + "crossbeam-channel", + "fnv", + "lazy_static", + "libc", + "mio", + "rand 0.8.5", + "serde", + "tempfile", + "uuid", + "windows 0.58.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -2301,6 +2340,18 @@ dependencies = [ "adler2", ] +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "log", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys 0.59.0", +] + [[package]] name = "miow" version = "0.6.0" @@ -2316,18 +2367,22 @@ version = "0.1.0" dependencies = [ "aes", "bitflags", + "capstone", "chrono", "chrono-tz", "colored", "directories", "getrandom 0.3.3", + "ipc-channel", "libc", "libffi", "libloading", "measureme", + "nix", "rand 0.9.1", "regex", "rustc_version", + "serde", "smallvec", "tempfile", "tikv-jemalloc-sys", @@ -3519,7 +3574,7 @@ dependencies = [ "thorin-dwp", "tracing", "wasm-encoder 0.219.2", - "windows", + "windows 0.61.3", ] [[package]] @@ -3578,7 +3633,7 @@ dependencies = [ "tempfile", "thin-vec", "tracing", - "windows", + "windows 0.61.3", ] [[package]] @@ -3641,7 +3696,7 @@ dependencies = [ "shlex", "stable_mir", "tracing", - "windows", + "windows 0.61.3", ] [[package]] @@ -3696,7 +3751,7 @@ dependencies = [ "termcolor", "termize", "tracing", - "windows", + "windows 0.61.3", ] [[package]] @@ -4457,7 +4512,7 @@ dependencies = [ "rustc_target", "termize", "tracing", - "windows", + "windows 0.61.3", ] [[package]] @@ -5142,7 +5197,7 @@ dependencies = [ "libc", "objc2-core-foundation", "objc2-io-kit", - "windows", + "windows 0.61.3", ] [[package]] @@ -6034,6 +6089,16 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +dependencies = [ + "windows-core 0.58.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows" version = "0.61.3" @@ -6041,7 +6106,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ "windows-collections", - "windows-core", + "windows-core 0.61.2", "windows-future", "windows-link", "windows-numerics", @@ -6064,7 +6129,20 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ - "windows-core", + "windows-core 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +dependencies = [ + "windows-implement 0.58.0", + "windows-interface 0.58.0", + "windows-result 0.2.0", + "windows-strings 0.1.0", + "windows-targets 0.52.6", ] [[package]] @@ -6073,11 +6151,11 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ - "windows-implement", - "windows-interface", + "windows-implement 0.60.0", + "windows-interface 0.59.1", "windows-link", - "windows-result", - "windows-strings", + "windows-result 0.3.4", + "windows-strings 0.4.2", ] [[package]] @@ -6086,11 +6164,22 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link", "windows-threading", ] +[[package]] +name = "windows-implement" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.103", +] + [[package]] name = "windows-implement" version = "0.60.0" @@ -6102,6 +6191,17 @@ dependencies = [ "syn 2.0.103", ] +[[package]] +name = "windows-interface" +version = "0.58.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.103", +] + [[package]] name = "windows-interface" version = "0.59.1" @@ -6125,10 +6225,19 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link", ] +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-result" version = "0.3.4" @@ -6138,6 +6247,16 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result 0.2.0", + "windows-targets 0.52.6", +] + [[package]] name = "windows-strings" version = "0.4.2" From 8d7f053a2007bf09d0e4668910f02c95560e164d Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 28 Jun 2025 13:47:52 +0200 Subject: [PATCH 38/38] fix miri build in bootstrap --- src/tools/miri/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 850478b516c9b..68f4ba3320ca1 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -1,4 +1,5 @@ #![cfg_attr(bootstrap, feature(cfg_match))] +#![cfg_attr(bootstrap, feature(nonnull_provenance))] #![cfg_attr(not(bootstrap), feature(cfg_select))] #![feature(rustc_private)] #![feature(float_gamma)]