From 1dad166e103f30ed5978bc47f951c8c7ddef57c4 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Wed, 1 Jan 2025 10:31:43 +0100 Subject: [PATCH 01/21] draft delayed background rendering --- .../SplashBackgroundRenderer.cc | 94 ++++++++----------- pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h | 2 + pdf2htmlEX/src/HTMLRenderer/general.cc | 30 +++++- pdf2htmlEX/src/Param.h | 1 + 4 files changed, 70 insertions(+), 57 deletions(-) diff --git a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc index de965e16..83ee758b 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -111,66 +111,54 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) (!(param.use_cropbox)), false, false, nullptr, nullptr, &annot_cb, &process_annotation); + + auto * bitmap = getBitmap(); + + auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + if(param.embed_image) + html_renderer->tmp_files.add((const char *)fn); + + SplashImageFileFormat splashImageFileFormat; + if(format == "png") + splashImageFileFormat = splashFormatPng; + else if(format == "jpg") + splashImageFileFormat = splashFormatJpeg; + else + throw string("Image format not supported: ") + format; + + SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi); + if (e != splashOk) + throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e); + return true; } void SplashBackgroundRenderer::embed_image(int pageno) { - auto * bitmap = getBitmap(); - // dump the background image only when it is not empty - if(bitmap->getWidth() >= 0 && bitmap->getHeight() >= 0) + auto & f_page = *(html_renderer->f_curpage); + + f_page << "\"\"str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str()); + ifstream fin((char*)path, ifstream::binary); + if(!fin) + throw string("Cannot read background image ") + (char*)path; + + auto iter = FORMAT_MIME_TYPE_MAP.find(format); + if(iter == FORMAT_MIME_TYPE_MAP.end()) + throw string("Image format not supported: ") + format; + + string mime_type = iter->second; + f_page << "data:" << mime_type << ";base64," << Base64Stream(fin); + } + else { - { - auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - if(param.embed_image) - html_renderer->tmp_files.add((const char *)fn); - - SplashImageFileFormat splashImageFileFormat; - if(format == "png") - splashImageFileFormat = splashFormatPng; - else if(format == "jpg") - splashImageFileFormat = splashFormatJpeg; - else - throw string("Image format not supported: ") + format; - - SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi); - if (e != splashOk) - throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e); - } - - double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi; - double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi; - - auto & f_page = *(html_renderer->f_curpage); - auto & all_manager = html_renderer->all_manager; - - f_page << "getWidth()) - << " " << CSS::HEIGHT_CN << all_manager.height.install(v_scale * bitmap->getHeight()) - << "\" alt=\"\" src=\""; - - if(param.embed_image) - { - auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str()); - ifstream fin((char*)path, ifstream::binary); - if(!fin) - throw string("Cannot read background image ") + (char*)path; - - auto iter = FORMAT_MIME_TYPE_MAP.find(format); - if(iter == FORMAT_MIME_TYPE_MAP.end()) - throw string("Image format not supported: ") + format; - - string mime_type = iter->second; - f_page << "data:" << mime_type << ";base64," << Base64Stream(fin); - } - else - { - f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str()); - } - f_page << "\"/>"; + f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str()); } + f_page << "\"/>"; } } // namespace pdf2htmlEX diff --git a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h index 6f2c24c9..984b1d7d 100644 --- a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h +++ b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h @@ -80,6 +80,8 @@ struct HTMLRenderer : OutputDev void process(PDFDoc * doc); + bool renderPage(PDFDoc * doc, int pageno); + //////////////////////////////////////////////////// // OutputDev interface //////////////////////////////////////////////////// diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index 7d43d130..62d96e19 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -190,6 +190,21 @@ void HTMLRenderer::process(PDFDoc *doc) cerr << endl; } +bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno) +{ + if (bg_renderer->render_page(cur_doc, pageNum)) + { + return true; + } + else if (fallback_bg_renderer) + { + if (fallback_bg_renderer->render_page(cur_doc, pageNum)) + return true; + } + + return false; +} + void HTMLRenderer::setDefaultCTM(const double *ctm) { memcpy(default_ctm, ctm, sizeof(default_ctm)); @@ -243,14 +258,21 @@ void HTMLRenderer::endPage() { if(param.process_nontext) { - if (bg_renderer->render_page(cur_doc, pageNum)) + if (param.delay_background) { bg_renderer->embed_image(pageNum); } - else if (fallback_bg_renderer) + else { - if (fallback_bg_renderer->render_page(cur_doc, pageNum)) - fallback_bg_renderer->embed_image(pageNum); + if (bg_renderer->render_page(cur_doc, pageNum)) + { + bg_renderer->embed_image(pageNum); + } + else if (fallback_bg_renderer) + { + if (fallback_bg_renderer->render_page(cur_doc, pageNum)) + fallback_bg_renderer->embed_image(pageNum); + } } } diff --git a/pdf2htmlEX/src/Param.h b/pdf2htmlEX/src/Param.h index 859c78b7..b382b96f 100644 --- a/pdf2htmlEX/src/Param.h +++ b/pdf2htmlEX/src/Param.h @@ -46,6 +46,7 @@ struct Param int printing; int fallback; int tmp_file_size_limit; + int delay_background; // fonts int embed_external_font; From 2297a93b7db180532c614be5e8f26705e2bdb0a3 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Wed, 1 Jan 2025 14:53:58 +0100 Subject: [PATCH 02/21] kepp background renderer allocated --- pdf2htmlEX/src/HTMLRenderer/general.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index 62d96e19..bdebc0aa 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -183,9 +183,6 @@ void HTMLRenderer::process(PDFDoc *doc) post_process(); - bg_renderer = nullptr; - fallback_bg_renderer = nullptr; - if(param.quiet == 0) cerr << endl; } From 276ea933cdfd7770fa4734d9248f0f6d07ed3853 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Wed, 1 Jan 2025 15:21:26 +0100 Subject: [PATCH 03/21] fix page number --- pdf2htmlEX/src/HTMLRenderer/general.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index bdebc0aa..07c22fd0 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -189,13 +189,13 @@ void HTMLRenderer::process(PDFDoc *doc) bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno) { - if (bg_renderer->render_page(cur_doc, pageNum)) + if (bg_renderer->render_page(cur_doc, pageno)) { return true; } else if (fallback_bg_renderer) { - if (fallback_bg_renderer->render_page(cur_doc, pageNum)) + if (fallback_bg_renderer->render_page(cur_doc, pageno)) return true; } From 6e041d07eb0a48de73612ffa3f7c7af1be236531 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Thu, 2 Jan 2025 10:56:01 +0100 Subject: [PATCH 04/21] page cache --- pdf2htmlEX/src/CoveredTextDetector.cc | 14 ++++++------ pdf2htmlEX/src/CoveredTextDetector.h | 2 +- pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h | 5 +++++ pdf2htmlEX/src/HTMLRenderer/general.cc | 26 ++++++++++++++++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/pdf2htmlEX/src/CoveredTextDetector.cc b/pdf2htmlEX/src/CoveredTextDetector.cc index 0792c528..f46817cb 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.cc +++ b/pdf2htmlEX/src/CoveredTextDetector.cc @@ -14,7 +14,7 @@ namespace pdf2htmlEX { -CoveredTextDetector::CoveredTextDetector(Param & param): param(param) +CoveredTextDetector::CoveredTextDetector(Param & param): param(¶m) { } @@ -41,10 +41,10 @@ void CoveredTextDetector::add_char_bbox_clipped(cairo_t *cairo, double * bbox, i char_pts_visible.push_back(pts_visible); // DCRH: Hide if no points are visible, or if some points are visible and correct_text_visibility == 2 - if (pts_visible == 0 || param.correct_text_visibility == 2) { + if (pts_visible == 0 || param->correct_text_visibility == 2) { chars_covered.push_back(true); - if (pts_visible > 0 && param.correct_text_visibility == 2) { - param.actual_dpi = std::min(param.text_dpi, param.max_dpi); // Char partially covered so increase background resolution + if (pts_visible > 0 && param->correct_text_visibility == 2) { + param->actual_dpi = std::min(param->text_dpi, param->max_dpi); // Char partially covered so increase background resolution } } else { chars_covered.push_back(false); @@ -98,13 +98,13 @@ printf("pts_visible=%x\n", pts_visible); printf("pts_visible=%x\n", pts_visible); #endif char_pts_visible[i] = pts_visible; - if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param.correct_text_visibility == 2)) { + if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param->correct_text_visibility == 2)) { #ifdef DEBUG printf("Char covered\n"); #endif chars_covered[i] = true; - if (pts_visible > 0 && param.correct_text_visibility == 2) { // Partially visible text => increase rendering DPI - param.actual_dpi = std::min(param.text_dpi, param.max_dpi); + if (pts_visible > 0 && param->correct_text_visibility == 2) { // Partially visible text => increase rendering DPI + param->actual_dpi = std::min(param->text_dpi, param->max_dpi); } } } else { diff --git a/pdf2htmlEX/src/CoveredTextDetector.h b/pdf2htmlEX/src/CoveredTextDetector.h index 0f0506f3..2e664a7f 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.h +++ b/pdf2htmlEX/src/CoveredTextDetector.h @@ -60,7 +60,7 @@ class CoveredTextDetector // x00, y00, x01, y01; x10, y10, x11, y11;... std::vector char_bboxes; std::vector char_pts_visible; - Param & param; + Param * param; }; } diff --git a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h index 984b1d7d..983962d1 100644 --- a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h +++ b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h @@ -381,6 +381,11 @@ struct HTMLRenderer : OutputDev CoveredTextDetector covered_text_detector; DrawingTracer tracer; + + struct PageCache { + CoveredTextDetector covered_text_detector; + }; + std::unordered_map page_cache; }; } //namespace pdf2htmlEX diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index 07c22fd0..c2811238 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -183,12 +183,31 @@ void HTMLRenderer::process(PDFDoc *doc) post_process(); + if (param.delay_background == 0) + { + bg_renderer = nullptr; + fallback_bg_renderer = nullptr; + } + if(param.quiet == 0) cerr << endl; } bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno) { + if (param.delay_background == 0) + { + return false; + } + + if (page_cache.find(pageno) != page_cache.end()) + { + cerr << "Page number " << pageno << " not found in page cache" << endl; + return false; + } + + covered_text_detector = page_cache[pageno].covered_text_detector; + if (bg_renderer->render_page(cur_doc, pageno)) { return true; @@ -209,6 +228,13 @@ void HTMLRenderer::setDefaultCTM(const double *ctm) void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) { + if (param.delay_background && this->pageNum > 0) + { + page_cache[this->pageNum] = { + .covered_text_detector = covered_text_detector, + }; + } + covered_text_detector.reset(); tracer.reset(state); From 6083912607a7dd97f0014fd9a0fe8829f5fd446d Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Thu, 2 Jan 2025 10:58:34 +0100 Subject: [PATCH 05/21] default construct CoveredTextDetector --- pdf2htmlEX/src/CoveredTextDetector.cc | 4 ++++ pdf2htmlEX/src/CoveredTextDetector.h | 1 + 2 files changed, 5 insertions(+) diff --git a/pdf2htmlEX/src/CoveredTextDetector.cc b/pdf2htmlEX/src/CoveredTextDetector.cc index f46817cb..30fb22b8 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.cc +++ b/pdf2htmlEX/src/CoveredTextDetector.cc @@ -14,6 +14,10 @@ namespace pdf2htmlEX { +CoveredTextDetector::CoveredTextDetector() +{ +} + CoveredTextDetector::CoveredTextDetector(Param & param): param(¶m) { } diff --git a/pdf2htmlEX/src/CoveredTextDetector.h b/pdf2htmlEX/src/CoveredTextDetector.h index 2e664a7f..d1e6bf34 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.h +++ b/pdf2htmlEX/src/CoveredTextDetector.h @@ -21,6 +21,7 @@ namespace pdf2htmlEX { class CoveredTextDetector { public: + CoveredTextDetector(); CoveredTextDetector(Param & param); From 967b1d0273239732bb0ddd763ba102c5ec824d47 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Thu, 2 Jan 2025 11:06:37 +0100 Subject: [PATCH 06/21] fix silly --- pdf2htmlEX/src/HTMLRenderer/general.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index c2811238..5b4a30dd 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -200,7 +200,7 @@ bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno) return false; } - if (page_cache.find(pageno) != page_cache.end()) + if (page_cache.find(pageno) == page_cache.end()) { cerr << "Page number " << pageno << " not found in page cache" << endl; return false; From 961e1f343c995989b91d199b1b3d57e65213eca9 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Thu, 2 Jan 2025 11:12:01 +0100 Subject: [PATCH 07/21] cache at page end --- pdf2htmlEX/src/HTMLRenderer/general.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index 5b4a30dd..c9a34f8c 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -228,13 +228,6 @@ void HTMLRenderer::setDefaultCTM(const double *ctm) void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) { - if (param.delay_background && this->pageNum > 0) - { - page_cache[this->pageNum] = { - .covered_text_detector = covered_text_detector, - }; - } - covered_text_detector.reset(); tracer.reset(state); @@ -339,6 +332,13 @@ void HTMLRenderer::endPage() { { f_pages.fs << "" << endl; } + + if (param.delay_background) + { + page_cache[this->pageNum] = { + .covered_text_detector = covered_text_detector, + }; + } } void HTMLRenderer::pre_process(PDFDoc * doc) From 641036637e49ab92475c0211b9a18687fc9a84a9 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 16:04:33 +0100 Subject: [PATCH 08/21] try auto img reload --- pdf2htmlEX/share/pdf2htmlEX.js.in | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index 40ffde66..065b7c75 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -335,6 +335,19 @@ Viewer.prototype = { this.initialize_radio_button(); this.render(); + + { + // deal with image reloading + var images = document.getElementsByTagName('img'); + for (var i = 0; i < images.length; i++) { + images[i].addEventListener('error', function() { + setTimeout(function() { + images[i].src = images[i].src; + }, 1000); + }); + images[i].src = images[i].src; + } + } }, /* From ae1859dc66c50ec7de448e5239ad15c30658226b Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 16:11:40 +0100 Subject: [PATCH 09/21] try again --- pdf2htmlEX/share/pdf2htmlEX.js.in | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index 065b7c75..c6be6641 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -340,12 +340,13 @@ Viewer.prototype = { // deal with image reloading var images = document.getElementsByTagName('img'); for (var i = 0; i < images.length; i++) { - images[i].addEventListener('error', function() { + var image = images[i]; + image.addEventListener('error', function() { setTimeout(function() { - images[i].src = images[i].src; + image.src = image.src; }, 1000); }); - images[i].src = images[i].src; + image.src = image.src; } } }, From 8a9ec470d40ca1dbaf42fab0973ec69e62569f8b Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 16:15:12 +0100 Subject: [PATCH 10/21] try again --- pdf2htmlEX/share/pdf2htmlEX.js.in | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index c6be6641..8a7dec85 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -343,10 +343,14 @@ Viewer.prototype = { var image = images[i]; image.addEventListener('error', function() { setTimeout(function() { - image.src = image.src; + tmp = image.src; + image.src = ''; + image.src = tmp; }, 1000); }); - image.src = image.src; + tmp = image.src; + image.src = ''; + image.src = tmp; } } }, From 939f6da4af385022067e24782605e2500ea0edb8 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 16:17:07 +0100 Subject: [PATCH 11/21] try again --- pdf2htmlEX/share/pdf2htmlEX.js.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index 8a7dec85..64838947 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -337,18 +337,18 @@ Viewer.prototype = { this.render(); { - // deal with image reloading + // deal with delayed image loading var images = document.getElementsByTagName('img'); for (var i = 0; i < images.length; i++) { var image = images[i]; image.addEventListener('error', function() { setTimeout(function() { - tmp = image.src; + var tmp = image.src; image.src = ''; image.src = tmp; }, 1000); }); - tmp = image.src; + var tmp = image.src; image.src = ''; image.src = tmp; } From 8034bd80dd285e4fb21bde4cb9da01b0c0a0057a Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 16:20:00 +0100 Subject: [PATCH 12/21] debug --- pdf2htmlEX/share/pdf2htmlEX.js.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index 64838947..13b7d418 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -342,12 +342,15 @@ Viewer.prototype = { for (var i = 0; i < images.length; i++) { var image = images[i]; image.addEventListener('error', function() { + console.debug('image load error, retry in 1s'); setTimeout(function() { + console.debug('retrying image load'); var tmp = image.src; image.src = ''; image.src = tmp; }, 1000); }); + console.debug('image reload'); var tmp = image.src; image.src = ''; image.src = tmp; From 354d72fde063ca17b6d6305f775e651af5eb5e43 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 16:50:14 +0100 Subject: [PATCH 13/21] more debug --- pdf2htmlEX/share/pdf2htmlEX.js.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index 13b7d418..f5f13954 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -348,12 +348,14 @@ Viewer.prototype = { var tmp = image.src; image.src = ''; image.src = tmp; + console.debug('image src reset to ' + tmp); }, 1000); }); console.debug('image reload'); var tmp = image.src; image.src = ''; image.src = tmp; + console.debug('image src reset to ' + tmp); } } }, From 74304176fa1840db3531d613f553ca889e998477 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 17:00:04 +0100 Subject: [PATCH 14/21] more debugging --- pdf2htmlEX/share/pdf2htmlEX.js.in | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index f5f13954..f017e132 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -342,20 +342,18 @@ Viewer.prototype = { for (var i = 0; i < images.length; i++) { var image = images[i]; image.addEventListener('error', function() { - console.debug('image load error, retry in 1s'); + console.debug('image load error, retry in 1s', image); setTimeout(function() { - console.debug('retrying image load'); + console.debug('retrying image load', image); var tmp = image.src; image.src = ''; image.src = tmp; - console.debug('image src reset to ' + tmp); }, 1000); }); - console.debug('image reload'); + console.debug('image reload', image); var tmp = image.src; image.src = ''; image.src = tmp; - console.debug('image src reset to ' + tmp); } } }, From c1bb81c5d05b7308e2b5b096732f75627269fb9b Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 17:06:20 +0100 Subject: [PATCH 15/21] use closure --- pdf2htmlEX/share/pdf2htmlEX.js.in | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index f017e132..13225f3d 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -341,19 +341,17 @@ Viewer.prototype = { var images = document.getElementsByTagName('img'); for (var i = 0; i < images.length; i++) { var image = images[i]; - image.addEventListener('error', function() { - console.debug('image load error, retry in 1s', image); - setTimeout(function() { - console.debug('retrying image load', image); - var tmp = image.src; - image.src = ''; - image.src = tmp; - }, 1000); - }); + (function(image) { + image.addEventListener('load', function() { + console.debug('image load error, retry in 1s', image); + setTimeout(function() { + console.debug('retrying image load', image); + image.src = image.src; + }, 1000); + }); + })(image); console.debug('image reload', image); - var tmp = image.src; - image.src = ''; - image.src = tmp; + image.src = image.src; } } }, From 5114984214b069dc67bfa2aa254e6ae92cbb9dba Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 17:09:40 +0100 Subject: [PATCH 16/21] try tmp again --- pdf2htmlEX/share/pdf2htmlEX.js.in | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index 13225f3d..fc2f6686 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -346,12 +346,16 @@ Viewer.prototype = { console.debug('image load error, retry in 1s', image); setTimeout(function() { console.debug('retrying image load', image); - image.src = image.src; + var tmp = image.src; + image.src = ''; + image.src = tmp; }, 1000); }); })(image); console.debug('image reload', image); - image.src = image.src; + var tmp = image.src; + image.src = ''; + image.src = tmp; } } }, From b2bfeb5966397cc5ee3f89d707117bda3b184f05 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 17:17:39 +0100 Subject: [PATCH 17/21] fix silly --- pdf2htmlEX/share/pdf2htmlEX.js.in | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index fc2f6686..0f79f2e0 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -342,20 +342,16 @@ Viewer.prototype = { for (var i = 0; i < images.length; i++) { var image = images[i]; (function(image) { - image.addEventListener('load', function() { + image.addEventListener('error', function() { console.debug('image load error, retry in 1s', image); setTimeout(function() { console.debug('retrying image load', image); - var tmp = image.src; - image.src = ''; - image.src = tmp; + image.src = image.src; }, 1000); }); })(image); console.debug('image reload', image); - var tmp = image.src; - image.src = ''; - image.src = tmp; + image.src = image.src; } } }, From dc612728425725d16845869b4d2e142b250fc2e7 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 17:39:06 +0100 Subject: [PATCH 18/21] tmp image then move; remove debug output --- pdf2htmlEX/share/pdf2htmlEX.js.in | 3 --- .../CairoBackgroundRenderer.cc | 16 ++++++++++------ .../SplashBackgroundRenderer.cc | 12 ++++++++---- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index 0f79f2e0..c0b04fb2 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -343,14 +343,11 @@ Viewer.prototype = { var image = images[i]; (function(image) { image.addEventListener('error', function() { - console.debug('image load error, retry in 1s', image); setTimeout(function() { - console.debug('retrying image load', image); image.src = image.src; }, 1000); }); })(image); - console.debug('image reload', image); image.src = image.src; } } diff --git a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc index ae5c8091..d77009e1 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -130,11 +130,11 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) if (doc->getPageRotate(pageno) == 90 || doc->getPageRotate(pageno) == 270) std::swap(page_height, page_width); - string fn = (char*)html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); - if(param.embed_image) - html_renderer->tmp_files.add(fn); + + std::string tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + std::string fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - surface = cairo_svg_surface_create(fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); + surface = cairo_svg_surface_create(tmp_fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi); @@ -174,7 +174,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { int n = 0; char c; - ifstream svgfile(fn); + ifstream svgfile(tmp_fn); //count of '<' in the file should be an approximation of node count. while(svgfile >> c) { @@ -182,7 +182,6 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) ++n; if (n > param.svg_node_count_limit) { - html_renderer->tmp_files.add(fn); return false; } } @@ -192,6 +191,11 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) for (auto id : bitmaps_in_current_page) ++bitmaps_ref_count[id]; + std::rename(tmp_fn.c_str(), fn.c_str()); + + if(param.embed_image) + html_renderer->tmp_files.add(fn); + return true; } diff --git a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc index 83ee758b..9f38a725 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -114,9 +114,8 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) auto * bitmap = getBitmap(); - auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - if(param.embed_image) - html_renderer->tmp_files.add((const char *)fn); + std::string tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + std::string fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); SplashImageFileFormat splashImageFileFormat; if(format == "png") @@ -126,10 +125,15 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) else throw string("Image format not supported: ") + format; - SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi); + SplashError e = bitmap->writeImgFile(splashImageFileFormat, tmp_fn.c_str(), param.actual_dpi, param.actual_dpi); if (e != splashOk) throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e); + std::rename(tmp_fn.c_str(), fn.c_str()); + + if(param.embed_image) + html_renderer->tmp_files.add(fn); + return true; } From 8e76a858d167319ae1c27d94c717ed58819ef315 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 17:44:40 +0100 Subject: [PATCH 19/21] fix funky string access --- .../BackgroundRenderer/CairoBackgroundRenderer.cc | 13 ++++++------- .../BackgroundRenderer/SplashBackgroundRenderer.cc | 10 +++++----- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc index d77009e1..8abed701 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -130,11 +130,10 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) if (doc->getPageRotate(pageno) == 90 || doc->getPageRotate(pageno) == 270) std::swap(page_height, page_width); - - std::string tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - std::string fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + auto tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - surface = cairo_svg_surface_create(tmp_fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); + surface = cairo_svg_surface_create((const char *)tmp_fn, page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi); @@ -174,7 +173,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { int n = 0; char c; - ifstream svgfile(tmp_fn); + ifstream svgfile((const char *)tmp_fn); //count of '<' in the file should be an approximation of node count. while(svgfile >> c) { @@ -191,10 +190,10 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) for (auto id : bitmaps_in_current_page) ++bitmaps_ref_count[id]; - std::rename(tmp_fn.c_str(), fn.c_str()); + std::rename((const char *)tmp_fn, (const char *)fn); if(param.embed_image) - html_renderer->tmp_files.add(fn); + html_renderer->tmp_files.add((const char *)fn); return true; } diff --git a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc index 9f38a725..52859de6 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -114,8 +114,8 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) auto * bitmap = getBitmap(); - std::string tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - std::string fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + auto tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); SplashImageFileFormat splashImageFileFormat; if(format == "png") @@ -125,14 +125,14 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) else throw string("Image format not supported: ") + format; - SplashError e = bitmap->writeImgFile(splashImageFileFormat, tmp_fn.c_str(), param.actual_dpi, param.actual_dpi); + SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)tmp_fn, param.actual_dpi, param.actual_dpi); if (e != splashOk) throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e); - std::rename(tmp_fn.c_str(), fn.c_str()); + std::rename((const char *)tmp_fn, (const char *)fn); if(param.embed_image) - html_renderer->tmp_files.add(fn); + html_renderer->tmp_files.add((const char *)fn); return true; } From 41c330005a2c7d43dfa204923a17d1d2084510ff Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Fri, 3 Jan 2025 17:46:31 +0100 Subject: [PATCH 20/21] fix one more silly --- pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 8abed701..b349cb4c 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -130,8 +130,8 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) if (doc->getPageRotate(pageno) == 90 || doc->getPageRotate(pageno) == 270) std::swap(page_height, page_width); - auto tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + auto tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); + auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); surface = cairo_svg_surface_create((const char *)tmp_fn, page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); From f55e7ecab2a43d1a621a424779e845fd73fbcb30 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 5 Jan 2025 11:28:12 +0100 Subject: [PATCH 21/21] remove js reload; remove tmp image --- pdf2htmlEX/share/pdf2htmlEX.js.in | 16 ---------------- .../CairoBackgroundRenderer.cc | 7 ++----- .../SplashBackgroundRenderer.cc | 5 +---- 3 files changed, 3 insertions(+), 25 deletions(-) diff --git a/pdf2htmlEX/share/pdf2htmlEX.js.in b/pdf2htmlEX/share/pdf2htmlEX.js.in index c0b04fb2..40ffde66 100644 --- a/pdf2htmlEX/share/pdf2htmlEX.js.in +++ b/pdf2htmlEX/share/pdf2htmlEX.js.in @@ -335,22 +335,6 @@ Viewer.prototype = { this.initialize_radio_button(); this.render(); - - { - // deal with delayed image loading - var images = document.getElementsByTagName('img'); - for (var i = 0; i < images.length; i++) { - var image = images[i]; - (function(image) { - image.addEventListener('error', function() { - setTimeout(function() { - image.src = image.src; - }, 1000); - }); - })(image); - image.src = image.src; - } - } }, /* diff --git a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc index b349cb4c..4621db9b 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -130,10 +130,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) if (doc->getPageRotate(pageno) == 90 || doc->getPageRotate(pageno) == 270) std::swap(page_height, page_width); - auto tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); - surface = cairo_svg_surface_create((const char *)tmp_fn, page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); + surface = cairo_svg_surface_create((const char *)fn, page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi); @@ -173,7 +172,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { int n = 0; char c; - ifstream svgfile((const char *)tmp_fn); + ifstream svgfile((const char *)fn); //count of '<' in the file should be an approximation of node count. while(svgfile >> c) { @@ -190,8 +189,6 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) for (auto id : bitmaps_in_current_page) ++bitmaps_ref_count[id]; - std::rename((const char *)tmp_fn, (const char *)fn); - if(param.embed_image) html_renderer->tmp_files.add((const char *)fn); diff --git a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc index 52859de6..780c008a 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -114,7 +114,6 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) auto * bitmap = getBitmap(); - auto tmp_fn = html_renderer->str_fmt("%s/tmp_bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); SplashImageFileFormat splashImageFileFormat; @@ -125,12 +124,10 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) else throw string("Image format not supported: ") + format; - SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)tmp_fn, param.actual_dpi, param.actual_dpi); + SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi); if (e != splashOk) throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e); - std::rename((const char *)tmp_fn, (const char *)fn); - if(param.embed_image) html_renderer->tmp_files.add((const char *)fn);