Skip to content

Commit 0b1ffb5

Browse files
itsjunetimevinxv
authored andcommitted
Overhaul many parts of unsafe semantics using zerocopy and NonNulls (messense#112)
* Overhaul many parts of unsafe semantics using zerocopy and NonNulls * fmt * Mark FFI analogues as repr(C) to ensure consistent representation * fix double-free when calling PdfPage::try_from(Page) * Fix up from_raw and from_non_null fns for Page to be more accurate to names * Add new FzArray type to ensure fz-allocated Arrays are dropped correctly * Make fzarray fields non-pub * Fix some doc links * Remove unused import * Finally fix ASAN issues * fmt * Fix compilation on older targets * Actually fix compilation this time lmao * Make FzArray default impl invariant over T * Make build error clearer by reporting src and dst * Don't try to recursively copy .git dir when building mupdf-sys * fmt * *Actually* don't copy over .git directory was building
1 parent 72f42bf commit 0b1ffb5

File tree

14 files changed

+514
-179
lines changed

14 files changed

+514
-179
lines changed

.github/workflows/CI.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ jobs:
101101
cargo test -Zbuild-std --target x86_64-unknown-linux-gnu --features serde
102102
env:
103103
RUSTFLAGS: -Zsanitizer=address
104+
LSAN_OPTIONS: report_objects=1
104105

105106
valgrind:
106107
name: Valgrind

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ once_cell = "1.3.1"
5050
num_enum = "0.7.0"
5151
bitflags = "2.0.2"
5252
serde = { version = "1.0.201", features = ["derive"], optional = true }
53+
zerocopy = { version = "0.8.17", features = ["derive"] }
5354

5455
[dependencies.font-kit]
5556
version = "0.14.1"

mupdf-sys/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,4 @@ pkg-config = "0.3"
9393
regex = "1.11"
9494

9595
[dependencies]
96+
zerocopy = { version = "0.8.17", features = ["derive"] }

mupdf-sys/build.rs

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::env;
2+
use std::ffi::OsStr;
23
use std::fs;
34
use std::path::{Path, PathBuf};
45
use std::process::Stdio;
@@ -15,24 +16,29 @@ const SKIP_FONTS: [&str; 6] = [
1516
];
1617

1718
macro_rules! t {
18-
($e:expr) => {
19+
($e:expr ) => {
1920
match $e {
2021
Ok(n) => n,
2122
Err(e) => panic!("\n{} failed with {}\n", stringify!($e), e),
2223
}
2324
};
2425
}
2526

26-
fn cp_r(dir: &Path, dest: &Path) {
27+
fn cp_r(dir: &Path, dest: &Path, excluding_dir_names: &'static [&'static str]) {
2728
for entry in t!(fs::read_dir(dir)) {
2829
let entry = t!(entry);
2930
let path = entry.path();
3031
let dst = dest.join(path.file_name().expect("Failed to get filename of path"));
3132
if t!(fs::metadata(&path)).is_file() {
32-
t!(fs::copy(path, dst));
33-
} else {
33+
fs::copy(&path, &dst)
34+
.unwrap_or_else(|e| panic!("Couldn't fs::copy {path:?} to {dst:?}: {e}"));
35+
} else if path
36+
.file_name()
37+
.and_then(OsStr::to_str)
38+
.is_none_or(|dst| !excluding_dir_names.contains(&dst))
39+
{
3440
t!(fs::create_dir_all(&dst));
35-
cp_r(&path, &dst);
41+
cp_r(&path, &dst, excluding_dir_names);
3642
}
3743
}
3844
}
@@ -54,7 +60,7 @@ fn build_libmupdf() {
5460

5561
let current_dir = env::current_dir().unwrap();
5662
let mupdf_src_dir = current_dir.join("mupdf");
57-
cp_r(&mupdf_src_dir, &build_dir);
63+
cp_r(&mupdf_src_dir, &build_dir, &[".git"]);
5864

5965
let mut build = cc::Build::new();
6066
#[cfg(not(feature = "xps"))]
@@ -219,7 +225,7 @@ fn build_libmupdf() {
219225

220226
let current_dir = env::current_dir().unwrap();
221227
let mupdf_src_dir = current_dir.join("mupdf");
222-
cp_r(&mupdf_src_dir, &build_dir);
228+
cp_r(&mupdf_src_dir, &build_dir, &[".git"]);
223229

224230
let msbuild = cc::windows_registry::find(target.as_str(), "msbuild.exe");
225231
if let Some(mut msbuild) = msbuild {
@@ -432,6 +438,23 @@ impl bindgen::callbacks::ParseCallbacks for Callback {
432438
}
433439
Some(output)
434440
}
441+
442+
fn add_derives(&self, info: &bindgen::callbacks::DeriveInfo<'_>) -> Vec<String> {
443+
static ZEROCOPY_TYPES: [&str; 2] = ["fz_point", "fz_quad"];
444+
445+
if ZEROCOPY_TYPES.contains(&info.name) {
446+
[
447+
"zerocopy::FromBytes",
448+
"zerocopy::IntoBytes",
449+
"zerocopy::Immutable",
450+
]
451+
.into_iter()
452+
.map(ToString::to_string)
453+
.collect()
454+
} else {
455+
vec![]
456+
}
457+
}
435458
}
436459

437460
fn main() {

src/array.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
use std::{
2+
ffi::c_void,
3+
ops::{Deref, DerefMut},
4+
ptr::{self, NonNull},
5+
slice,
6+
};
7+
8+
use mupdf_sys::fz_free;
9+
10+
use crate::context;
11+
12+
/// Essentially a [`Box`]`<[T], A>` with `fz_calloc` as the allocator. Necessary until the
13+
/// allocator API is stable, as we need to be able to free this with `fz_free` instead of the
14+
/// system allocator.
15+
// An important note about `fz_calloc`: If a function which allocates from `fz_calloc` gives you a
16+
// pointer, allocated from `fz_calloc`, but says the length of items behind it is 0, you still have
17+
// to `fz_free` that pointer. It's probably lying about the length behind it being 0 - it was
18+
// probably part of an allocation bigger than 0, but of which 0 bytes were actually written to.
19+
pub struct FzArray<T> {
20+
ptr: Option<NonNull<T>>,
21+
len: usize,
22+
}
23+
24+
impl<T> Default for FzArray<T> {
25+
fn default() -> Self {
26+
Self { ptr: None, len: 0 }
27+
}
28+
}
29+
30+
impl<T> FzArray<T> {
31+
/// # Safety
32+
///
33+
/// * `ptr` must point to an array of at least `len` instances of `T`. There may be more than
34+
/// `len` instances, but only `len` will be accessed by this struct.
35+
///
36+
/// * If `len > 0`, the memory it points to also must be allocated by `fz_calloc` inside a
37+
/// mupdf FFI call. `ptr` may be dangling or not well-aligned if `len == 0`
38+
pub(crate) unsafe fn from_parts(ptr: NonNull<T>, len: usize) -> Self {
39+
Self {
40+
ptr: Some(ptr),
41+
len,
42+
}
43+
}
44+
45+
/// # Safety
46+
///
47+
/// * It must be valid to call [`FzArray::from_parts`] with the ptr stored inside `self` and
48+
/// the `len` specified in this call
49+
pub(crate) unsafe fn set_len(&mut self, len: usize) {
50+
self.len = len;
51+
}
52+
}
53+
54+
impl<T> Deref for FzArray<T> {
55+
type Target = [T];
56+
fn deref(&self) -> &Self::Target {
57+
match self.ptr {
58+
// SAFETY: `self.ptr.as_ptr()` is not non-null (as it's a NonNull) and the creator has
59+
// promised us that it does point to a valid slice. Also, if it does point to a
60+
Some(ptr) => unsafe { slice::from_raw_parts(ptr.as_ptr(), self.len) },
61+
None => &[],
62+
}
63+
}
64+
}
65+
66+
impl<T> DerefMut for FzArray<T> {
67+
fn deref_mut(&mut self) -> &mut Self::Target {
68+
match self.ptr.as_mut() {
69+
Some(ptr) => {
70+
let ptr = unsafe { ptr.as_mut() };
71+
unsafe { slice::from_raw_parts_mut(ptr, self.len) }
72+
}
73+
None => &mut [],
74+
}
75+
}
76+
}
77+
78+
impl<T> Drop for FzArray<T> {
79+
fn drop(&mut self) {
80+
if let Some(ptr) = self.ptr {
81+
// SAFETY: Upheld by constructor - this must point to something allocated by fz_calloc
82+
unsafe { fz_free(context(), ptr.as_ptr() as *mut c_void) };
83+
}
84+
}
85+
}
86+
87+
impl<T> IntoIterator for FzArray<T> {
88+
type IntoIter = FzIter<T>;
89+
type Item = T;
90+
fn into_iter(self) -> Self::IntoIter {
91+
let next_item_and_end = self.ptr.map(|p| {
92+
// Would be nice to use `.addr()` but it seems CI doesn't have 1.84 yet, and that's
93+
// what stabilized the strict provenance APIs
94+
let end = unsafe { p.add(self.len) }.as_ptr() as usize;
95+
(p, end)
96+
});
97+
FzIter {
98+
_kept_to_be_dropped: self,
99+
next_item_and_end,
100+
}
101+
}
102+
}
103+
104+
pub struct FzIter<T> {
105+
_kept_to_be_dropped: FzArray<T>,
106+
next_item_and_end: Option<(NonNull<T>, usize)>,
107+
}
108+
109+
impl<T> Iterator for FzIter<T> {
110+
type Item = T;
111+
fn next(&mut self) -> Option<Self::Item> {
112+
let (next_item, end_addr) = self.next_item_and_end.as_mut()?;
113+
114+
// Same thing as above with `.addr()`
115+
if next_item.as_ptr() as usize == *end_addr {
116+
return None;
117+
}
118+
119+
let ret = unsafe { ptr::read(next_item.as_ptr()) };
120+
*next_item = unsafe { next_item.add(1) };
121+
Some(ret)
122+
}
123+
}

src/document.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,9 @@ impl Document {
214214
}
215215

216216
pub fn load_page(&self, page_no: i32) -> Result<Page, Error> {
217-
unsafe {
218-
let inner = ffi_try!(mupdf_load_page(context(), self.inner, page_no));
219-
Ok(Page::from_raw(inner))
220-
}
217+
let fz_page = unsafe { ffi_try!(mupdf_load_page(context(), self.inner, page_no)) };
218+
// SAFETY: We're trusting the FFI layer here to provide a valid pointer
219+
unsafe { Page::from_raw(fz_page) }
221220
}
222221

223222
pub fn pages(&self) -> Result<PageIter, Error> {

src/error.rs

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::fmt;
22
use std::io;
33
use std::ffi::NulError;
4+
use std::num::TryFromIntError;
5+
use std::ptr::NonNull;
46

57
use mupdf_sys::*;
68

@@ -24,48 +26,54 @@ impl std::error::Error for MuPdfError {}
2426

2527
/// # Safety
2628
///
27-
/// * `error` must point to a non-null, well-aligned initialized instance of `mupdf_error_t`.
29+
/// * `ptr` must point to a valid, well-aligned instance of [`mupdf_error_t`].
2830
///
29-
/// * `(*error).message` must point to a null-terminated c-string.
30-
pub unsafe fn ffi_error(err: *mut mupdf_error_t) -> MuPdfError {
31+
/// * The pointers stored in this [`mupdf_error_t`] must also be non-null, well-aligned, and point
32+
/// to valid instances of what they claim to represent.
33+
///
34+
/// * The [`field@mupdf_error_t::message`] ptr in `ptr` must point to a null-terminated c-string
35+
pub unsafe fn ffi_error(ptr: NonNull<mupdf_error_t>) -> MuPdfError {
3136
use std::ffi::CStr;
3237

33-
let code = (*err).type_;
34-
let c_msg = (*err).message;
35-
let c_str = CStr::from_ptr(c_msg);
36-
let message = format!("{}", c_str.to_string_lossy());
37-
mupdf_drop_error(err);
38+
// SAFETY: Upheld by caller
39+
let err = unsafe { *ptr.as_ptr() };
40+
let code = err.type_;
41+
let c_msg = err.message;
42+
43+
// SAFETY: Upheld by caller
44+
let c_str = unsafe { CStr::from_ptr(c_msg) };
45+
let message = c_str.to_string_lossy().to_string();
46+
47+
// SAFETY: Upheld by caller; if it's pointing to a valid instance then it can be dropped
48+
unsafe { mupdf_drop_error(ptr.as_ptr()) };
3849
MuPdfError { code, message }
3950
}
4051

4152
macro_rules! ffi_try {
4253
($func:ident($($arg:expr),+)) => ({
4354
use std::ptr;
4455
let mut err = ptr::null_mut();
45-
let res = $func($($arg),+, &mut err);
46-
if !err.is_null() {
56+
// SAFETY: Upheld by the caller of the macro
57+
let res = $func($($arg),+, (&mut err) as *mut *mut ::mupdf_sys::mupdf_error_t);
58+
if let Some(err) = ::core::ptr::NonNull::new(err) {
59+
// SAFETY: We're trusting the FFI call to provide us with a valid ptr if it is not
60+
// null.
4761
return Err($crate::ffi_error(err).into());
4862
}
4963
res
5064
});
51-
($func:ident()) => ({
52-
use std::ptr;
53-
let mut err = ptr::null_mut();
54-
let res = $func(&mut err);
55-
if !err.is_null() {
56-
return Err($crate::ffi_error(err).into());
57-
}
58-
res
59-
})
6065
}
6166

6267
#[derive(Debug)]
68+
#[non_exhaustive]
6369
pub enum Error {
6470
Io(io::Error),
6571
InvalidLanguage(String),
6672
InvalidPdfDocument,
6773
MuPdf(MuPdfError),
6874
Nul(NulError),
75+
IntConversion(TryFromIntError),
76+
UnexpectedNullPtr
6977
}
7078

7179
impl fmt::Display for Error {
@@ -76,6 +84,8 @@ impl fmt::Display for Error {
7684
Error::InvalidPdfDocument => write!(f, "invalid pdf document"),
7785
Error::MuPdf(ref err) => err.fmt(f),
7886
Error::Nul(ref err) => err.fmt(f),
87+
Error::IntConversion(ref err) => err.fmt(f),
88+
Error::UnexpectedNullPtr => write!(f, "An FFI function call returned a null ptr when we expected a non-null ptr")
7989
}
8090
}
8191
}
@@ -99,3 +109,9 @@ impl From<NulError> for Error {
99109
Self::Nul(err)
100110
}
101111
}
112+
113+
impl From<TryFromIntError> for Error {
114+
fn from(value: TryFromIntError) -> Self {
115+
Self::IntConversion(value)
116+
}
117+
}

0 commit comments

Comments
 (0)