Skip to content

Commit 40e55a4

Browse files
committed
Move to_ methods from Page to TextPage
1 parent b153848 commit 40e55a4

File tree

11 files changed

+296
-308
lines changed

11 files changed

+296
-308
lines changed

examples/extract_images.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
11
use std::io::Write;
22

3+
use mupdf::{Document, ImageFormat, TextPageFlags};
4+
35
fn main() -> Result<(), Box<dyn std::error::Error>> {
4-
let filename: String = std::env::args()
5-
.collect::<Vec<_>>()
6-
.get(1)
7-
.expect("missing filename")
8-
.to_owned();
9-
let document = mupdf::document::Document::open(&filename)?;
6+
let filename: String = std::env::args().nth(1).expect("missing filename");
7+
let document = Document::open(&filename)?;
108

119
let mut image_num: u32 = 0;
1210

1311
for page in document.pages()? {
14-
let text_page = page?.to_text_page(mupdf::text_page::TextPageOptions::PRESERVE_IMAGES)?;
12+
let text_page = page?.to_text_page(TextPageFlags::PRESERVE_IMAGES)?;
1513

1614
for block in text_page.blocks() {
1715
if let Some(image) = block.image() {
1816
let pixmap = image.to_pixmap()?;
1917
let mut bytes: Vec<u8> = vec![];
20-
pixmap.write_to(&mut bytes, mupdf::pixmap::ImageFormat::PNG)?;
18+
pixmap.write_to(&mut bytes, ImageFormat::PNG)?;
2119

2220
let mut output_file = std::fs::File::create(format!("output_{}.png", image_num))?;
2321
output_file.write_all(&bytes)?;

examples/extract_stext.rs

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,25 @@
11
use std::io;
22

3-
fn main() {
4-
// cargo run --example extract_stext
5-
let mut path_to_doc = String::new();
6-
println!("Enter a path to document: ");
7-
io::stdin()
8-
.read_line(&mut path_to_doc)
9-
.expect("Failed to read line");
10-
let doc = mupdf::document::Document::open(path_to_doc.trim()).unwrap();
11-
let page = doc.load_page(0).unwrap();
12-
match page.stext_page_as_json_from_page(1.0) {
13-
Ok(stext_json) => {
14-
let stext_page: serde_json::Result<mupdf::page::StextPage> =
15-
serde_json::from_str(stext_json.as_str());
16-
match stext_page {
17-
Ok(res) => {
18-
for block in res.blocks {
19-
if block.r#type.eq("text") {
20-
for line in block.lines {
21-
println!("{:?}", &line.text);
22-
}
23-
}
24-
}
25-
}
26-
Err(err) => {
27-
println!("stext_page parsing error: {:?}", &err);
3+
use mupdf::{page::StextPage, Document, TextPageFlags};
4+
5+
fn main() -> Result<(), Box<dyn std::error::Error>> {
6+
let filename: String = std::env::args().nth(1).expect("missing filename");
7+
let document = Document::open(&filename)?;
8+
9+
for page in document.pages()? {
10+
let text_page = page?.to_text_page(TextPageFlags::empty())?;
11+
12+
let json = text_page.to_json(1.0)?;
13+
let stext_page: StextPage = serde_json::from_str(json.as_str())?;
14+
15+
for block in stext_page.blocks {
16+
if block.r#type == "text" {
17+
for line in block.lines {
18+
println!("{:?}", &line.text);
2819
}
2920
}
3021
}
31-
Err(_err) => {}
3222
}
23+
24+
Ok(())
3325
}

examples/extract_text.rs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1+
use mupdf::{Document, TextPageFlags};
2+
13
fn main() -> Result<(), Box<dyn std::error::Error>> {
2-
let filename: String = std::env::args()
3-
.collect::<Vec<_>>()
4-
.get(1)
5-
.expect("missing filename")
6-
.to_owned();
7-
let document = mupdf::document::Document::open(&filename)?;
4+
let filename: String = std::env::args().nth(1).expect("missing filename");
5+
let document = Document::open(&filename)?;
86

97
for page in document.pages()? {
10-
let text_page = page?.to_text_page(mupdf::text_page::TextPageOptions::empty())?;
8+
let text_page = page?.to_text_page(TextPageFlags::empty())?;
119

1210
for block in text_page.blocks() {
1311
for line in block.lines() {

mupdf-sys/wrapper.c

Lines changed: 29 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -774,14 +774,12 @@ fz_buffer *mupdf_page_to_svg(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_c
774774
return buf;
775775
}
776776

777-
fz_stext_page *mupdf_page_to_text_page(fz_context *ctx, fz_page *page, int flags, mupdf_error_t **errptr)
777+
fz_stext_page *mupdf_new_stext_page_from_page(fz_context *ctx, fz_page *page, const fz_stext_options *options, mupdf_error_t **errptr)
778778
{
779779
fz_stext_page *text_page = NULL;
780-
fz_stext_options opts = {0};
781-
opts.flags = flags;
782780
fz_try(ctx)
783781
{
784-
text_page = fz_new_stext_page_from_page(ctx, page, &opts);
782+
text_page = fz_new_stext_page_from_page(ctx, page, options);
785783
}
786784
fz_catch(ctx)
787785
{
@@ -859,148 +857,77 @@ void mupdf_run_page_widgets(fz_context *ctx, fz_page *page, fz_device *device, f
859857
}
860858
}
861859

862-
fz_buffer *mupdf_page_to_html(fz_context *ctx, fz_page *page, mupdf_error_t **errptr)
860+
fz_output *mupdf_new_output_with_buffer(fz_context *ctx, fz_buffer *buf, mupdf_error_t **errptr) {
861+
fz_output* output;
862+
fz_try(ctx)
863+
{
864+
output = fz_new_output_with_buffer(ctx, buf);
865+
}
866+
fz_catch(ctx)
867+
{
868+
mupdf_save_error(ctx, errptr);
869+
}
870+
return output;
871+
}
872+
873+
void mupdf_print_stext_page_as_html(fz_context *ctx, fz_output *out, fz_stext_page *page, int id, mupdf_error_t **errptr)
863874
{
864-
fz_buffer *buf = NULL;
865-
fz_output *out = NULL;
866-
fz_stext_page *text = NULL;
867-
fz_var(text);
868-
fz_var(buf);
869-
fz_var(out);
870-
fz_try(ctx)
871-
{
872-
text = fz_new_stext_page_from_page(ctx, page, NULL);
873-
buf = fz_new_buffer(ctx, 8192);
874-
out = fz_new_output_with_buffer(ctx, buf);
875-
fz_print_stext_header_as_html(ctx, out);
876-
fz_print_stext_page_as_html(ctx, out, text, page->number);
877-
fz_print_stext_trailer_as_html(ctx, out);
878-
fz_close_output(ctx, out);
879-
}
880-
fz_always(ctx)
875+
fz_try(ctx)
881876
{
882-
fz_drop_output(ctx, out);
883-
fz_drop_stext_page(ctx, text);
877+
fz_print_stext_page_as_html(ctx, out, page, id);
884878
}
885879
fz_catch(ctx)
886880
{
887881
mupdf_save_error(ctx, errptr);
888882
}
889-
return buf;
890883
}
891884

892-
fz_buffer *mupdf_stext_page_as_json_from_page(fz_context *ctx, fz_page *page, float scale, mupdf_error_t **errptr)
885+
void mupdf_print_stext_page_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id, mupdf_error_t **errptr)
893886
{
894-
fz_buffer *buf = NULL;
895-
fz_output *out = NULL;
896-
fz_stext_page *stext_page = NULL;
897-
fz_var(stext_page);
898-
fz_var(buf);
899-
fz_var(out);
900-
fz_try(ctx)
901-
{
902-
stext_page = fz_new_stext_page_from_page(ctx, page, NULL);
903-
buf = fz_new_buffer(ctx, 8192);
904-
out = fz_new_output_with_buffer(ctx, buf);
905-
fz_print_stext_page_as_json(ctx, out, stext_page, scale);
906-
fz_close_output(ctx, out);
907-
}
908-
fz_always(ctx)
887+
fz_try(ctx)
909888
{
910-
fz_drop_output(ctx, out);
911-
fz_drop_stext_page(ctx, stext_page);
889+
fz_print_stext_page_as_xhtml(ctx, out, page, id);
912890
}
913891
fz_catch(ctx)
914892
{
915893
mupdf_save_error(ctx, errptr);
916894
}
917-
return buf;
918895
}
919896

920-
fz_buffer *mupdf_page_to_xhtml(fz_context *ctx, fz_page *page, mupdf_error_t **errptr)
897+
void mupdf_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id, mupdf_error_t **errptr)
921898
{
922-
fz_buffer *buf = NULL;
923-
fz_output *out = NULL;
924-
fz_stext_page *text = NULL;
925-
fz_var(text);
926-
fz_var(buf);
927-
fz_var(out);
928-
fz_try(ctx)
929-
{
930-
text = fz_new_stext_page_from_page(ctx, page, NULL);
931-
buf = fz_new_buffer(ctx, 8192);
932-
out = fz_new_output_with_buffer(ctx, buf);
933-
fz_print_stext_header_as_xhtml(ctx, out);
934-
fz_print_stext_page_as_xhtml(ctx, out, text, page->number);
935-
fz_print_stext_trailer_as_xhtml(ctx, out);
936-
fz_close_output(ctx, out);
937-
}
938-
fz_always(ctx)
899+
fz_try(ctx)
939900
{
940-
fz_drop_output(ctx, out);
941-
fz_drop_stext_page(ctx, text);
901+
fz_print_stext_page_as_xml(ctx, out, page, id);
942902
}
943903
fz_catch(ctx)
944904
{
945905
mupdf_save_error(ctx, errptr);
946906
}
947-
return buf;
948907
}
949908

950-
fz_buffer *mupdf_page_to_xml(fz_context *ctx, fz_page *page, mupdf_error_t **errptr)
909+
void mupdf_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page, mupdf_error_t **errptr)
951910
{
952-
fz_buffer *buf = NULL;
953-
fz_output *out = NULL;
954-
fz_stext_page *text = NULL;
955-
fz_var(text);
956-
fz_var(buf);
957-
fz_var(out);
958-
fz_try(ctx)
959-
{
960-
text = fz_new_stext_page_from_page(ctx, page, NULL);
961-
buf = fz_new_buffer(ctx, 8192);
962-
out = fz_new_output_with_buffer(ctx, buf);
963-
fz_print_stext_page_as_xml(ctx, out, text, page->number);
964-
fz_close_output(ctx, out);
965-
}
966-
fz_always(ctx)
911+
fz_try(ctx)
967912
{
968-
fz_drop_output(ctx, out);
969-
fz_drop_stext_page(ctx, text);
913+
fz_print_stext_page_as_text(ctx, out, page);
970914
}
971915
fz_catch(ctx)
972916
{
973917
mupdf_save_error(ctx, errptr);
974918
}
975-
return buf;
976919
}
977920

978-
fz_buffer *mupdf_page_to_text(fz_context *ctx, fz_page *page, mupdf_error_t **errptr)
921+
void mupdf_print_stext_page_as_json(fz_context *ctx, fz_output *out, fz_stext_page *page, float scale, mupdf_error_t **errptr)
979922
{
980-
fz_buffer *buf = NULL;
981-
fz_output *out = NULL;
982-
fz_stext_page *text = NULL;
983-
fz_var(text);
984-
fz_var(buf);
985-
fz_var(out);
986923
fz_try(ctx)
987924
{
988-
text = fz_new_stext_page_from_page(ctx, page, NULL);
989-
buf = fz_new_buffer(ctx, 8192);
990-
out = fz_new_output_with_buffer(ctx, buf);
991-
fz_print_stext_page_as_text(ctx, out, text);
992-
fz_close_output(ctx, out);
993-
}
994-
fz_always(ctx)
995-
{
996-
fz_drop_output(ctx, out);
997-
fz_drop_stext_page(ctx, text);
925+
fz_print_stext_page_as_json(ctx, out, page, scale);
998926
}
999927
fz_catch(ctx)
1000928
{
1001929
mupdf_save_error(ctx, errptr);
1002930
}
1003-
return buf;
1004931
}
1005932

1006933
fz_link *mupdf_load_links(fz_context *ctx, fz_page *page, mupdf_error_t **errptr)
@@ -1017,30 +944,6 @@ fz_link *mupdf_load_links(fz_context *ctx, fz_page *page, mupdf_error_t **errptr
1017944
return link;
1018945
}
1019946

1020-
fz_buffer *mupdf_stext_page_to_text(fz_context *ctx, fz_stext_page *page, mupdf_error_t **errptr)
1021-
{
1022-
fz_buffer *buf = NULL;
1023-
fz_output *out = NULL;
1024-
fz_var(buf);
1025-
fz_var(out);
1026-
fz_try(ctx)
1027-
{
1028-
buf = fz_new_buffer(ctx, 8192);
1029-
out = fz_new_output_with_buffer(ctx, buf);
1030-
fz_print_stext_page_as_text(ctx, out, page);
1031-
fz_close_output(ctx, out);
1032-
}
1033-
fz_always(ctx)
1034-
{
1035-
fz_drop_output(ctx, out);
1036-
}
1037-
fz_catch(ctx)
1038-
{
1039-
mupdf_save_error(ctx, errptr);
1040-
}
1041-
return buf;
1042-
}
1043-
1044947
fz_separations *mupdf_page_separations(fz_context *ctx, fz_page *page, mupdf_error_t **errptr)
1045948
{
1046949
fz_separations *seps = NULL;

src/device.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use mupdf_sys::*;
66
use num_enum::TryFromPrimitive;
77

88
use crate::{
9-
context, ColorParams, Colorspace, DisplayList, Error, IRect, Image, Matrix, Path, Pixmap, Rect,
10-
Shade, StrokeState, Text, TextPage, TextPageOptions,
9+
context, ColorParams, Colorspace, DisplayList, Error, FFIWrapper, IRect, Image, Matrix, Path,
10+
Pixmap, Rect, Shade, StrokeState, Text, TextPage, TextPageFlags,
1111
};
1212

1313
mod native;
@@ -206,11 +206,11 @@ impl Device {
206206
})
207207
}
208208

209-
pub fn from_text_page(page: &TextPage, opts: TextPageOptions) -> Result<Self, Error> {
209+
pub fn from_text_page(page: &TextPage, opts: TextPageFlags) -> Result<Self, Error> {
210210
unsafe {
211211
ffi_try!(mupdf_new_stext_device(
212212
context(),
213-
page.inner,
213+
page.as_ptr().cast_mut(),
214214
opts.bits() as _
215215
))
216216
}

src/display_list.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
use std::ffi::CString;
1+
use std::{ffi::CString, ptr::NonNull};
22

33
use mupdf_sys::*;
44

55
use crate::{
66
array::FzArray, context, rust_vec_from_ffi_ptr, Colorspace, Cookie, Device, Error, Image,
7-
Matrix, Pixmap, Quad, Rect, TextPage, TextPageOptions,
7+
Matrix, Pixmap, Quad, Rect, TextPage, TextPageFlags,
88
};
99

1010
#[derive(Debug)]
@@ -40,15 +40,18 @@ impl DisplayList {
4040
.map(|inner| unsafe { Pixmap::from_raw(inner) })
4141
}
4242

43-
pub fn to_text_page(&self, opts: TextPageOptions) -> Result<TextPage, Error> {
44-
unsafe {
43+
pub fn to_text_page(&self, opts: TextPageFlags) -> Result<TextPage, Error> {
44+
let inner = unsafe {
4545
ffi_try!(mupdf_display_list_to_text_page(
4646
context(),
4747
self.inner,
4848
opts.bits() as _
49-
))
50-
}
51-
.map(|inner| unsafe { TextPage::from_raw(inner) })
49+
))?
50+
};
51+
52+
let inner = unsafe { NonNull::new_unchecked(inner) };
53+
54+
Ok(TextPage { inner })
5255
}
5356

5457
pub fn to_image(&self, width: f32, height: f32) -> Result<Image, Error> {

0 commit comments

Comments
 (0)