From 69e8167eac758fa589592b9f408a5d852a43c35d Mon Sep 17 00:00:00 2001 From: Marcondiro <46560192+Marcondiro@users.noreply.github.com> Date: Mon, 20 Jan 2025 18:05:34 +0100 Subject: [PATCH 1/6] Fix potential invalid pointer to Asid --- src/image/mod.rs | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/src/image/mod.rs b/src/image/mod.rs index fc9851e..c1ee115 100644 --- a/src/image/mod.rs +++ b/src/image/mod.rs @@ -108,7 +108,9 @@ pub struct Image { // Any read data callback set by this `Image` instance. callback: Option, caches: Vec>, - asids: HashSet, + // `HashSet` might grow and move the content around, we cannot use `Asid` directly since we + // share a pointer with libipt and it must be valid for the entire Image (section) filetime. + asids: HashSet>, } impl Image { @@ -233,7 +235,9 @@ impl Image { })?; self.caches.extend_from_slice(&src.caches); - self.asids.extend(&src.asids); + for asid in &src.asids { + self.asids.insert(asid.clone()); + } Ok(res) } @@ -252,7 +256,7 @@ impl Image { ) -> Result<(), PtError> { let asid_ptr = if let Some(a) = asid { // fixme: use get_or_insert once stable (if ever) - self.asids.insert(*a); + self.asids.insert(Rc::new(*a)); &raw const self.asids.get(a).unwrap().0 } else { ptr::null() @@ -275,9 +279,6 @@ impl Image { ); })?; self.caches.push(iscache); - if let Some(a) = asid { - self.asids.insert(*a); - } Ok(()) } @@ -303,7 +304,7 @@ impl Image { let cfilename = str_to_cstring_pterror(filename)?; let asid_ptr = if let Some(a) = asid { // fixme: use get_or_insert once stable (if ever) - self.asids.insert(*a); + self.asids.insert(Rc::new(*a)); &raw const self.asids.get(a).unwrap().0 } else { ptr::null() @@ -318,9 +319,6 @@ impl Image { vaddr, ) })?; - if let Some(a) = asid { - self.asids.insert(*a); - } Ok(()) } } @@ -488,4 +486,29 @@ mod test { i.add_cached(Rc::new(c), isid, Some(&asid)).unwrap(); assert_eq!(i.remove_by_asid(&asid).unwrap(), 1); } + + #[test] + fn img_extend() { + let file: PathBuf = [env!("CARGO_MANIFEST_DIR"), "testfiles", "garbage.txt"] + .iter() + .collect(); + + let mut img = Image::new(None).unwrap(); + { + let mut img2 = Image::new(None).unwrap(); + for i in 0..100 { + let mut cache = SectionCache::new(None).unwrap(); + let asid = Asid::new(Some(i), Some(i)); + let isid = cache.add_file(file.to_str().unwrap(), i, 1, i).unwrap(); + let rc = Rc::new(cache); + img2.add_cached(rc.clone(), isid, Some(&asid)).unwrap() + } + + img.extend(&img2).unwrap(); + } + + for i in 0..100 { + assert_eq!(img.remove_by_asid(&Asid::new(Some(i), Some(i))).unwrap(), 1); + } + } } From bc6d2d70610b5fbee505f914e05dcba28a62c266 Mon Sep 17 00:00:00 2001 From: Marcondiro <46560192+Marcondiro@users.noreply.github.com> Date: Tue, 21 Jan 2025 17:08:20 +0100 Subject: [PATCH 2/6] Image::add_files_cached() API to simplify the creation of a cached image --- CHANGELOG.md | 1 + Cargo.toml | 4 ++-- src/image/mod.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0511f7c..aff3227 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ _This changelog documents only changes relevant to users, internal changes might - This [CHANGELOG](./CHANGELOG.md) 🎉 - Explicit [MSRV](Cargo.toml) +- New `Image::add_files_cached()` API to simplify the creation of a cached image for simple use cases. ### Changed diff --git a/Cargo.toml b/Cargo.toml index 5f46a5f..b0db105 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libipt" -version = "0.3.0-beta.2" +version = "0.3.0-beta.3" authors = [ "sum_catnip ", "Marcondiro ", @@ -17,6 +17,6 @@ rust-version = "1.82.0" libipt_master = ["libipt-sys/libipt_master"] [dependencies] -libipt-sys = { version = "0.2.1-beta.3", git = "https://github.com/sum-catnip/libipt-sys.git" } +libipt-sys = { version = "0.2.1", git = "https://github.com/sum-catnip/libipt-sys.git" } bitflags = "2.4.1" num_enum = "0.7.1" diff --git a/src/image/mod.rs b/src/image/mod.rs index c1ee115..e41fdff 100644 --- a/src/image/mod.rs +++ b/src/image/mod.rs @@ -97,6 +97,19 @@ impl Drop for BoxedCallback { } } +/// Info of a binary's section that can be used to populate an `Image` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SectionInfo { + /// Path of the binary + pub filename: String, + /// Offset of the section in the file + pub offset: u64, + /// Size of the section + pub size: u64, + /// Start virtual address of the section once loaded in memory + pub virtual_address: u64, +} + /// An Image defines the memory image that was traced as a collection /// of file sections and the virtual addresses at which those sections were loaded. #[derive(Debug)] @@ -109,7 +122,7 @@ pub struct Image { callback: Option, caches: Vec>, // `HashSet` might grow and move the content around, we cannot use `Asid` directly since we - // share a pointer with libipt and it must be valid for the entire Image (section) filetime. + // share a pointer with libipt, and it must be valid for the entire Image (section) lifetime. asids: HashSet>, } @@ -321,6 +334,31 @@ impl Image { })?; Ok(()) } + + /// Add multiple file sections to the traced memory image, backed by a cache. + /// + /// This is the same as creating a `SectionCache` and subsequently calling `add_cached()` for + /// each section. + pub fn add_files_cached( + &mut self, + sections_info: &[SectionInfo], + asid: Option<&Asid>, + ) -> Result<(), PtError> { + let mut image_cache = SectionCache::new(None)?; + + let mut isids = Vec::with_capacity(sections_info.len()); + for s in sections_info { + let isid = image_cache.add_file(&s.filename, s.offset, s.size, s.virtual_address)?; + isids.push(isid); + } + + let rc_cache = Rc::new(image_cache); + for isid in isids { + self.add_cached(rc_cache.clone(), isid, asid)?; + } + + Ok(()) + } } impl Drop for Image { @@ -511,4 +549,22 @@ mod test { assert_eq!(img.remove_by_asid(&Asid::new(Some(i), Some(i))).unwrap(), 1); } } + + #[test] + fn img_add_files_cached() { + let file: PathBuf = [env!("CARGO_MANIFEST_DIR"), "testfiles", "garbage.txt"] + .iter() + .collect(); + + let section = SectionInfo { + filename: file.to_string_lossy().to_string(), + offset: 5, + size: 15, + virtual_address: 0x1337, + }; + let mut i = img_with_file(); + let asid = Asid::new(Some(3), Some(4)); + i.add_files_cached(&[section], Some(&asid)).unwrap(); + assert_eq!(i.remove_by_asid(&asid).unwrap(), 1); + } } From 46887bb38ffab9efb9185c58dcf6240e468e29a1 Mon Sep 17 00:00:00 2001 From: Marcondiro Date: Wed, 22 Jan 2025 19:38:41 +0100 Subject: [PATCH 3/6] Start improving docs --- CHANGELOG.md | 2 ++ src/block/mod.rs | 37 ++++++++++++++++++------------------- src/insn/class.rs | 10 +++++----- src/lib.rs | 26 +++++++++++--------------- src/version.rs | 2 +- 5 files changed, 37 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aff3227..f5fbe50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,12 @@ _This changelog documents only changes relevant to users, internal changes might - Decoders/Encoder `::new()` have been replaced by `EncoderDecoderBuilder.build()` - Decoders/Encoder `.get_config()` have been replaced by `.used_builder()` - Block/Insn decoders `.image()` now returns `&mut Image` instead of `Result` +- Block `raw()` now returns an `Option::<&[u8]>` - `Image.copy()` has been replaced by `Image.extend()` - `Image.add_cached()` now takes a `Rc` instead of `&mut SectionCache` to ensure that the cache outlives the `Image`. - Many packet/event types methods now take a `&self` instead of consuming `self` - Some simple methods are now `const` +- ~~`Class:Error`~~ -> `Class::Unknown` ### Removed diff --git a/src/block/mod.rs b/src/block/mod.rs index cf4c836..74feb76 100644 --- a/src/block/mod.rs +++ b/src/block/mod.rs @@ -12,7 +12,7 @@ pub use decoder::*; /// A block of instructions. /// /// Instructions in this block are executed sequentially but are not necessarily -/// contiguous in memory. Users are expected to follow direct branches. +/// contiguous in memory. Users are expected to follow direct branches. #[derive(Debug, Clone, Copy)] #[repr(transparent)] pub struct Block(pub(super) pt_block); @@ -31,6 +31,7 @@ impl Block { self.0.end_ip } + // TODO: make this more rusty? at least with an Option? &Image? /// The image section that contains the instructions in this block. /// /// A value of zero means that the section did not have an identifier. @@ -50,7 +51,7 @@ impl Block { /// The instruction class for the last instruction in this block. /// - /// This field may be set to `Class::Error` to indicate that the instruction + /// This field may be set to `Class::Unknown` to indicate that the instruction /// class is not available. The block decoder may choose to not provide /// the instruction class in some cases for performance reasons. #[must_use] @@ -68,31 +69,29 @@ impl Block { /// The raw bytes of the last instruction in this block in case the /// instruction does not fit entirely into this block's section. /// - /// This field is only valid if truncated is set. + /// This field is `Some`only if `truncated()` is true. #[must_use] - pub fn raw(&self) -> &[u8] { - &self.0.raw[..self.0.size as usize] + pub fn raw(&self) -> Option<&[u8]> { + if self.truncated() { + Some(&self.0.raw[..self.0.size as usize]) + } else { + None + } } - /// A collection of flags giving additional information about the - /// instructions in this block. - /// - /// - all instructions in this block were executed speculatively. + /// All instructions in this block were executed speculatively. #[must_use] pub fn speculative(&self) -> bool { self.0.speculative() > 0 } - /// A collection of flags giving additional information about the - /// instructions in this block. - /// - /// - the last instruction in this block is truncated. + /// The last instruction in this block is truncated. /// /// It starts in this block's section but continues in one or more /// other sections depending on how fragmented the memory image is. /// - /// The raw bytes for the last instruction are provided in \@raw and - /// its size in \@size in this case. + /// The raw bytes for the last instruction are provided in @raw and + /// its size in @size in this case. #[must_use] pub fn truncated(&self) -> bool { self.0.truncated() > 0 @@ -125,9 +124,9 @@ mod test { assert_eq!(blk.end_ip(), 2); assert_eq!(blk.isid(), 3); assert_eq!(blk.mode(), ExecModeType::Bit32); - assert_eq!(blk.class(), Class::Error); + assert_eq!(blk.class(), Class::Unknown); assert_eq!(blk.ninsn(), 4); - assert_eq!(blk.raw(), &data[..8]); + assert_eq!(blk.raw(), Some(&data[..8])); assert!(blk.truncated()); assert!(!blk.speculative()); } @@ -153,9 +152,9 @@ mod test { assert_eq!(blk.end_ip(), 2); assert_eq!(blk.isid(), 3); assert_eq!(blk.mode(), ExecModeType::Bit32); - assert_eq!(blk.class(), Class::Error); + assert_eq!(blk.class(), Class::Unknown); assert_eq!(blk.ninsn(), 4); - assert!(blk.raw().len() > 0); + assert!(blk.raw().is_none()); assert!(!blk.truncated()); assert!(!blk.speculative()); } diff --git a/src/insn/class.rs b/src/insn/class.rs index 7cdbb28..9254ea4 100644 --- a/src/insn/class.rs +++ b/src/insn/class.rs @@ -2,10 +2,10 @@ #![allow(clippy::unnecessary_cast)] use libipt_sys::{ - pt_insn_class_ptic_call, pt_insn_class_ptic_cond_jump, pt_insn_class_ptic_error, - pt_insn_class_ptic_far_call, pt_insn_class_ptic_far_jump, pt_insn_class_ptic_far_return, - pt_insn_class_ptic_jump, pt_insn_class_ptic_other, pt_insn_class_ptic_ptwrite, - pt_insn_class_ptic_return, + pt_insn_class_ptic_call, pt_insn_class_ptic_cond_jump, pt_insn_class_ptic_far_call, + pt_insn_class_ptic_far_jump, pt_insn_class_ptic_far_return, pt_insn_class_ptic_jump, + pt_insn_class_ptic_other, pt_insn_class_ptic_ptwrite, pt_insn_class_ptic_return, + pt_insn_class_ptic_unknown, }; use num_enum::TryFromPrimitive; @@ -21,7 +21,7 @@ pub enum Class { /// The instruction is a near conditional jump. CondJump = pt_insn_class_ptic_cond_jump as u32, /// The instruction could not be classified. - Error = pt_insn_class_ptic_error as u32, + Unknown = pt_insn_class_ptic_unknown as u32, /// The instruction is a call-like far transfer. /// E.g. SYSCALL, SYSENTER, or FAR CALL. FarCall = pt_insn_class_ptic_far_call as u32, diff --git a/src/lib.rs b/src/lib.rs index 05ac23a..18a7132 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,44 +1,40 @@ #![warn(clippy::all, clippy::cargo)] -/// The pt_config structure defines an Intel Processor Trace (Intel PT) encoder or decoder configuration. -/// -/// It is required for allocating a trace packet encoder (see pt_alloc_encoder(3)), -/// a trace packet decoder (see pt_pkt_alloc_decoder(3)), -/// a query decoder (see pt_qry_alloc_decoder(3)), -/// or an instruction flow decoder (see pt_insn_alloc_decoder(3)). +/// Intel Processor Trace (Intel PT) encoder or decoder configuration. pub mod enc_dec_builder; -/// The library uses a single error enum for all layers. +/// Unique error enum used across the library. /// -/// Not all errors may occur on every layer. -/// Every API function specifies the errors it may return. (not accurate!) +/// Not all errors may occur in every module's function. +/// Every API function specifies the errors it may return (sometimes not exhaustively!). pub mod error; -/// This layer deals with Intel PT packet encoding and decoding. -/// -/// It can further be split into three sub-layers: opcodes, encoding, and decoding. +/// Intel PT packet encoding and decoding. pub mod packet; -/// The event layer deals with packet combinations that encode higher-level events. +/// Higher-level events often resulting from the combination of different PT packets. /// /// It is used for reconstructing execution flow for users who need finer-grain control not available via the instruction flow layer /// or for users who want to integrate execution flow reconstruction with other functionality more tightly than it would be possible otherwise. pub mod event; -/// The block layer provides a simple API for iterating over blocks of sequential instructions in execution order. +/// Simple API for iterating over blocks of sequential instructions in execution order. /// /// The instructions in a block are sequential in the sense that no trace is required for reconstructing the instructions. /// The IP of the first instruction is given in struct `Block` and the IP of other instructions in the block can be determined by decoding and examining the previous instruction. pub mod block; -/// The instruction flow layer provides a simple API for iterating over instructions in execution order. +/// Simple API for iterating over instructions in execution order. pub mod insn; mod version; pub use version::Version; +/// Module handling the memory images of the binaries beeing traced, used in high level decoding. pub mod image; +/// Address Space Identifier pub mod asid; +/// Info about the decoder status pub mod status; diff --git a/src/version.rs b/src/version.rs index 6cdc7ae..744dddc 100644 --- a/src/version.rs +++ b/src/version.rs @@ -1,7 +1,7 @@ use libipt_sys::{pt_library_version, pt_version}; use std::ffi::CStr; -/// The library version. +/// The underlying C library (libipt) version. #[derive(Clone, Copy, Debug)] #[repr(transparent)] pub struct Version(pt_version); From f905c536ba3eead8bd30b74f395922db205dff67 Mon Sep 17 00:00:00 2001 From: Marcondiro <46560192+Marcondiro@users.noreply.github.com> Date: Thu, 23 Jan 2025 16:35:15 +0100 Subject: [PATCH 4/6] More flexible EncoderDecoderBuilder Clone --- src/enc_dec_builder/mod.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/enc_dec_builder/mod.rs b/src/enc_dec_builder/mod.rs index a7898f0..9089ade 100644 --- a/src/enc_dec_builder/mod.rs +++ b/src/enc_dec_builder/mod.rs @@ -52,7 +52,7 @@ pub trait PtEncoderDecoder { Self: Sized; } -#[derive(Debug, Clone)] +#[derive(Debug)] #[repr(transparent)] pub struct EncoderDecoderBuilder { pub(crate) config: pt_config, @@ -68,6 +68,15 @@ where } } +impl Clone for EncoderDecoderBuilder { + fn clone(&self) -> Self { + Self { + config: self.config, + target: PhantomData, + } + } +} + impl EncoderDecoderBuilder where T: PtEncoderDecoder, From 90d33cfdb91d4d472b072ff9818d41f4c17c02ce Mon Sep 17 00:00:00 2001 From: Marcondiro <46560192+Marcondiro@users.noreply.github.com> Date: Thu, 23 Jan 2025 17:42:17 +0100 Subject: [PATCH 5/6] Improve docs pt.2 --- src/block/decoder.rs | 21 ++++++++++----------- src/enc_dec_builder/mod.rs | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/block/decoder.rs b/src/block/decoder.rs index 1d2793e..9e5d3a3 100644 --- a/src/block/decoder.rs +++ b/src/block/decoder.rs @@ -16,7 +16,7 @@ use std::mem::MaybeUninit; use std::ptr; use std::ptr::NonNull; -/// The decoder will work on the buffer defined in a Config, it shall contain +/// The decoder will work on the buffer defined in the builder, it shall contain /// raw trace data and remain valid for the lifetime of the decoder. /// /// The decoder needs to be synchronized before it can be used. @@ -28,9 +28,9 @@ pub struct BlockDecoder<'a> { } impl PtEncoderDecoder for BlockDecoder<'_> { - /// Allocate an Intel PT block decoder. + /// Create an Intel PT block decoder. /// - /// The decoder will work on the buffer defined in @config, + /// The decoder will work on the buffer defined in @builder, /// it shall contain raw trace data and remain valid for the lifetime of the decoder. /// The decoder needs to be synchronized before it can be used. fn new_from_builder(builder: &EncoderDecoderBuilder) -> Result { @@ -51,8 +51,7 @@ impl PtEncoderDecoder for BlockDecoder<'_> { impl BlockDecoder<'_> { /// Return the current address space identifier. /// - /// On success, provides the current address space identifier in @asid. - /// Returns Asid on success, a `PtError` otherwise. + /// On success, provides the current address space identifier a `PtError` otherwise. pub fn asid(&self) -> Result { let mut asid = MaybeUninit::::uninit(); ensure_ptok(unsafe { @@ -76,7 +75,7 @@ impl BlockDecoder<'_> { /// Get the next pending event. /// - /// On success, provides the next event, a `StatusFlag` instance and updates the decoder. + /// On success, provides the next event, a `Status` instance and updates the decoder. /// Returns `BadQuery` if there is no event. pub fn event(&mut self) -> Result<(Event, Status), PtError> { let mut evt = MaybeUninit::::uninit(); @@ -135,16 +134,16 @@ impl BlockDecoder<'_> { /// Determine the next block of instructions. /// /// On success, provides the next block of instructions in execution order. - /// Also Returns a `StatusFlag` instance on success. + /// Also Returns a `Status` instance on success. /// Returns Eos to indicate the end of the trace stream. - /// Subsequent calls to next will continue to return Eos until trace is required to determine the next instruction. + /// Subsequent calls will continue to return Eos until trace is required to determine the next instruction. /// Returns `BadContext` if the decoder encountered an unexpected packet. /// Returns `BadOpc` if the decoder encountered unknown packets. /// Returns `BadPacket` if the decoder encountered unknown packet payloads. /// Returns `BadQuery` if the decoder got out of sync. - /// Returns Eos if decoding reached the end of the Intel PT buffer. - /// Returns Nomap if the memory at the instruction address can't be read. - /// Returns Nosync if the decoder is out of sync. + /// Returns `Eos` if decoding reached the end of the Intel PT buffer. + /// Returns `Nomap` if the memory at the instruction address can't be read. + /// Returns `Nosync` if the decoder is out of sync. pub fn decode_next(&mut self) -> Result<(Block, Status), PtError> { let mut blk = MaybeUninit::::uninit(); let status = extract_status_or_pterr(unsafe { diff --git a/src/enc_dec_builder/mod.rs b/src/enc_dec_builder/mod.rs index 9089ade..2d86ed5 100644 --- a/src/enc_dec_builder/mod.rs +++ b/src/enc_dec_builder/mod.rs @@ -81,7 +81,7 @@ impl EncoderDecoderBuilder where T: PtEncoderDecoder, { - /// Initializes an EncoderDecoderBuilder instance + /// Create an EncoderDecoderBuilder pub const fn new() -> Self { let mut config: pt_config = unsafe { mem::zeroed() }; config.size = size_of::(); From 936030e9241706b653aff93e4d1aa71d1c19f246 Mon Sep 17 00:00:00 2001 From: Marcondiro <46560192+Marcondiro@users.noreply.github.com> Date: Wed, 29 Jan 2025 09:34:24 +0100 Subject: [PATCH 6/6] Bump to 0.3.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b0db105..a8ba0b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libipt" -version = "0.3.0-beta.3" +version = "0.3.0" authors = [ "sum_catnip ", "Marcondiro ",