diff --git a/src/native_stack_trace.rs b/src/native_stack_trace.rs index 15942ad7..eaf42625 100644 --- a/src/native_stack_trace.rs +++ b/src/native_stack_trace.rs @@ -99,15 +99,12 @@ impl NativeStack { while python_frame_index < frames.len() { merged.push(frames[python_frame_index].clone()); + python_frame_index += 1; if frames[python_frame_index].is_entry - || frames[python_frame_index].is_shim_entry - { + || frames[python_frame_index].is_shim_entry { break; } - - python_frame_index += 1; } - python_frame_index += 1; } } }; diff --git a/src/python_interpreters.rs b/src/python_interpreters.rs index 29d835c9..3efc75b1 100644 --- a/src/python_interpreters.rs +++ b/src/python_interpreters.rs @@ -9,6 +9,8 @@ This means we can't dereference them directly. #![allow(clippy::unnecessary_cast)] +use anyhow::{Error, Result}; + // these bindings are automatically generated by rust bindgen // using the generate_bindings.py script use crate::python_bindings::{ @@ -66,7 +68,7 @@ pub trait CodeObject { fn argcount(&self) -> i32; fn varnames(&self) -> *mut Self::TupleObject; - fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32; + fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result; } pub trait BytesObject { @@ -214,7 +216,7 @@ macro_rules! PythonCodeObjectImpl { self.co_varnames as *mut Self::TupleObject } - fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { + fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result { let lasti = lasti as i32; // unpack the line table. format is specified here: @@ -238,34 +240,89 @@ macro_rules! PythonCodeObjectImpl { line_number += increment; i += 2; } - line_number + Ok(line_number) } } }; } -fn read_varint(index: &mut usize, table: &[u8]) -> usize { - let mut ret: usize; - let mut byte = table[*index]; - let mut shift = 0; - *index += 1; - ret = (byte & 63) as usize; +struct ByteStream<'a> { + cursor: usize, + buffer: &'a [u8], +} - while byte & 64 != 0 { - byte = table[*index]; - *index += 1; - shift += 6; - ret += ((byte & 63) as usize) << shift; +impl<'a> ByteStream<'a> { + fn new(buffer: &'a [u8]) -> Self { + Self { cursor: 0, buffer } + } + + fn try_read(&mut self) -> Option { + if self.cursor >= self.buffer.len() { + None + } else { + let byte = self.buffer[self.cursor]; + self.cursor += 1; + Some(byte) + } + } + + fn read(&mut self) -> Result { + match self.try_read() { + Some(byte) => Ok(byte), + None => Err(Error::msg(format!( + "Tried to read after end of buffer {:?}", + self.buffer + ))), + } + } + + fn try_read_header(&mut self) -> Result, Error> { + match self.try_read() { + Some(byte) => { + if byte & 0b10000000 == 0 { + Err(Error::msg(format!( + "Expected header bit at index {} in {:?}", + self.cursor - 1, self.buffer + ))) + } else { + Ok(Some(byte)) + } + } + None => Ok(None), + } + } + + fn read_body(&mut self) -> Result { + let byte = self.read()?; + if byte & 0b10000000 != 0 { + Err(Error::msg(format!( + "Expected non-header bit at index {} in {:?}", + self.cursor - 1, self.buffer + ))) + } else { + Ok(byte) + } + } + + fn read_varint(&mut self) -> Result { + let mut byte = self.read_body()?; + let mut value = (byte & 63) as usize; + let mut shift: usize = 0; + while byte & 64 != 0 { + byte = self.read_body()?; + shift += 6; + value |= ((byte & 63) as usize) << shift; + } + Ok(value) } - ret -} -fn read_signed_varint(index: &mut usize, table: &[u8]) -> isize { - let unsigned_val = read_varint(index, table); - if unsigned_val & 1 != 0 { - -((unsigned_val >> 1) as isize) - } else { - (unsigned_val >> 1) as isize + fn read_signed_varint(&mut self) -> Result { + let value = self.read_varint()?; + Ok(if value & 1 != 0 { + -((value >> 1) as isize) + } else { + (value >> 1) as isize + }) } } @@ -299,49 +356,46 @@ macro_rules! CompactCodeObjectImpl { self.co_localsplusnames as *mut Self::TupleObject } - fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { + fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result { // unpack compressed table format from python 3.11 // https://github.com/python/cpython/pull/91666/files let lasti = lasti - offset_of(self, &self.co_code_adaptive) as i32; let mut line_number: i32 = self.first_lineno(); let mut bytecode_address: i32 = 0; + let mut stream = ByteStream::new(table); - let mut index: usize = 0; - loop { - if index >= table.len() { - break; - } - let byte = table[index]; - index += 1; - + while let Some(byte) = stream.try_read_header()? { + let code = (byte >> 3) & 15; let delta = ((byte & 7) as i32) + 1; bytecode_address += delta * 2; - let code = (byte >> 3) & 15; + let line_delta = match code { 15 => 0, 14 => { - let delta = read_signed_varint(&mut index, table); - read_varint(&mut index, table); // end line - read_varint(&mut index, table); // start column - read_varint(&mut index, table); // end column + let delta = stream.read_signed_varint()?; + stream.read_varint()?; // end line + stream.read_varint()?; // start column + stream.read_varint()?; // end column delta } - 13 => read_signed_varint(&mut index, table), + 13 => stream.read_signed_varint()?, 10..=12 => { - index += 2; // start column / end column + stream.read_body()?; // start column + stream.read_body()?; // end column (code - 10).into() } _ => { - index += 1; // column + stream.read_body()?; // column 0 } }; + line_number += line_delta as i32; if bytecode_address >= lasti { break; } } - line_number + Ok(line_number) } } }; @@ -693,7 +747,7 @@ impl CodeObject for v3_10_0::PyCodeObject { fn varnames(&self) -> *mut Self::TupleObject { self.co_varnames as *mut Self::TupleObject } - fn get_line_number(&self, lasti: i32, table: &[u8]) -> i32 { + fn get_line_number(&self, lasti: i32, table: &[u8]) -> Result { // in Python 3.10 we need to double the lasti instruction value here (and no I don't know why) // https://github.com/python/cpython/blob/7b88f63e1dd4006b1a08b9c9f087dd13449ecc76/Python/ceval.c#L5999 // Whereas in python versions up to 3.9 we didn't. @@ -722,7 +776,7 @@ impl CodeObject for v3_10_0::PyCodeObject { } } - line_number + Ok(line_number) } } @@ -819,6 +873,45 @@ mod tests { 128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4, 22, 208, 4, 22, ]; - assert_eq!(code.get_line_number(214, &table), 5); + assert_eq!(code.get_line_number(214, &table).unwrap(), 5); + } + + #[test] + fn test_py3_12_line_numbers() { + use crate::python_bindings::v3_12_0::PyCodeObject; + let code = PyCodeObject { + co_firstlineno: 4, + ..Default::default() + }; + + let table = [ + 128_u8, 0, 221, 4, 8, 132, 74, 136, 118, 209, 4, 22, 212, 4, 22, 208, 4, 22, 208, 4, + 22, 208, 4, 22, + ]; + assert_eq!(code.get_line_number(214, &table).unwrap(), 5); + } + + #[test] + fn test_py3_12_line_numbers_truncated() { + use crate::python_bindings::v3_12_0::PyCodeObject; + let code = PyCodeObject { + co_firstlineno: 4, + ..Default::default() + }; + + let table = [128_u8]; + assert!(code.get_line_number(214, &table).is_err()); + } + + #[test] + fn test_py3_12_line_numbers_invalid() { + use crate::python_bindings::v3_12_0::PyCodeObject; + let code = PyCodeObject { + co_firstlineno: 4, + ..Default::default() + }; + + let table = [0]; + assert!(code.get_line_number(214, &table).is_err()); } } diff --git a/src/stack_trace.rs b/src/stack_trace.rs index 1318123c..43dd52ed 100644 --- a/src/stack_trace.rs +++ b/src/stack_trace.rs @@ -261,7 +261,7 @@ fn get_line_number( ) -> Result { let table = copy_bytes(code.line_table(), process).context("Failed to copy line number table")?; - Ok(code.get_line_number(lasti, &table)) + code.get_line_number(lasti, &table) } fn get_locals(