diff --git a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc index ae5c8091..4621db9b 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -130,11 +130,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) if (doc->getPageRotate(pageno) == 90 || doc->getPageRotate(pageno) == 270) std::swap(page_height, page_width); - string fn = (char*)html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); - if(param.embed_image) - html_renderer->tmp_files.add(fn); + auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); - surface = cairo_svg_surface_create(fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); + surface = cairo_svg_surface_create((const char *)fn, page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi); @@ -174,7 +172,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { int n = 0; char c; - ifstream svgfile(fn); + ifstream svgfile((const char *)fn); //count of '<' in the file should be an approximation of node count. while(svgfile >> c) { @@ -182,7 +180,6 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) ++n; if (n > param.svg_node_count_limit) { - html_renderer->tmp_files.add(fn); return false; } } @@ -192,6 +189,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) for (auto id : bitmaps_in_current_page) ++bitmaps_ref_count[id]; + if(param.embed_image) + html_renderer->tmp_files.add((const char *)fn); + return true; } diff --git a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc index de965e16..780c008a 100644 --- a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -111,66 +111,55 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) (!(param.use_cropbox)), false, false, nullptr, nullptr, &annot_cb, &process_annotation); + + auto * bitmap = getBitmap(); + + auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); + + SplashImageFileFormat splashImageFileFormat; + if(format == "png") + splashImageFileFormat = splashFormatPng; + else if(format == "jpg") + splashImageFileFormat = splashFormatJpeg; + else + throw string("Image format not supported: ") + format; + + SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi); + if (e != splashOk) + throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e); + + if(param.embed_image) + html_renderer->tmp_files.add((const char *)fn); + return true; } void SplashBackgroundRenderer::embed_image(int pageno) { - auto * bitmap = getBitmap(); - // dump the background image only when it is not empty - if(bitmap->getWidth() >= 0 && bitmap->getHeight() >= 0) + auto & f_page = *(html_renderer->f_curpage); + + f_page << "\"\"str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str()); + ifstream fin((char*)path, ifstream::binary); + if(!fin) + throw string("Cannot read background image ") + (char*)path; + + auto iter = FORMAT_MIME_TYPE_MAP.find(format); + if(iter == FORMAT_MIME_TYPE_MAP.end()) + throw string("Image format not supported: ") + format; + + string mime_type = iter->second; + f_page << "data:" << mime_type << ";base64," << Base64Stream(fin); + } + else { - { - auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); - if(param.embed_image) - html_renderer->tmp_files.add((const char *)fn); - - SplashImageFileFormat splashImageFileFormat; - if(format == "png") - splashImageFileFormat = splashFormatPng; - else if(format == "jpg") - splashImageFileFormat = splashFormatJpeg; - else - throw string("Image format not supported: ") + format; - - SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi); - if (e != splashOk) - throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e); - } - - double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi; - double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi; - - auto & f_page = *(html_renderer->f_curpage); - auto & all_manager = html_renderer->all_manager; - - f_page << "getWidth()) - << " " << CSS::HEIGHT_CN << all_manager.height.install(v_scale * bitmap->getHeight()) - << "\" alt=\"\" src=\""; - - if(param.embed_image) - { - auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str()); - ifstream fin((char*)path, ifstream::binary); - if(!fin) - throw string("Cannot read background image ") + (char*)path; - - auto iter = FORMAT_MIME_TYPE_MAP.find(format); - if(iter == FORMAT_MIME_TYPE_MAP.end()) - throw string("Image format not supported: ") + format; - - string mime_type = iter->second; - f_page << "data:" << mime_type << ";base64," << Base64Stream(fin); - } - else - { - f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str()); - } - f_page << "\"/>"; + f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str()); } + f_page << "\"/>"; } } // namespace pdf2htmlEX diff --git a/pdf2htmlEX/src/CoveredTextDetector.cc b/pdf2htmlEX/src/CoveredTextDetector.cc index 0792c528..30fb22b8 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.cc +++ b/pdf2htmlEX/src/CoveredTextDetector.cc @@ -14,7 +14,11 @@ namespace pdf2htmlEX { -CoveredTextDetector::CoveredTextDetector(Param & param): param(param) +CoveredTextDetector::CoveredTextDetector() +{ +} + +CoveredTextDetector::CoveredTextDetector(Param & param): param(¶m) { } @@ -41,10 +45,10 @@ void CoveredTextDetector::add_char_bbox_clipped(cairo_t *cairo, double * bbox, i char_pts_visible.push_back(pts_visible); // DCRH: Hide if no points are visible, or if some points are visible and correct_text_visibility == 2 - if (pts_visible == 0 || param.correct_text_visibility == 2) { + if (pts_visible == 0 || param->correct_text_visibility == 2) { chars_covered.push_back(true); - if (pts_visible > 0 && param.correct_text_visibility == 2) { - param.actual_dpi = std::min(param.text_dpi, param.max_dpi); // Char partially covered so increase background resolution + if (pts_visible > 0 && param->correct_text_visibility == 2) { + param->actual_dpi = std::min(param->text_dpi, param->max_dpi); // Char partially covered so increase background resolution } } else { chars_covered.push_back(false); @@ -98,13 +102,13 @@ printf("pts_visible=%x\n", pts_visible); printf("pts_visible=%x\n", pts_visible); #endif char_pts_visible[i] = pts_visible; - if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param.correct_text_visibility == 2)) { + if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param->correct_text_visibility == 2)) { #ifdef DEBUG printf("Char covered\n"); #endif chars_covered[i] = true; - if (pts_visible > 0 && param.correct_text_visibility == 2) { // Partially visible text => increase rendering DPI - param.actual_dpi = std::min(param.text_dpi, param.max_dpi); + if (pts_visible > 0 && param->correct_text_visibility == 2) { // Partially visible text => increase rendering DPI + param->actual_dpi = std::min(param->text_dpi, param->max_dpi); } } } else { diff --git a/pdf2htmlEX/src/CoveredTextDetector.h b/pdf2htmlEX/src/CoveredTextDetector.h index 0f0506f3..d1e6bf34 100644 --- a/pdf2htmlEX/src/CoveredTextDetector.h +++ b/pdf2htmlEX/src/CoveredTextDetector.h @@ -21,6 +21,7 @@ namespace pdf2htmlEX { class CoveredTextDetector { public: + CoveredTextDetector(); CoveredTextDetector(Param & param); @@ -60,7 +61,7 @@ class CoveredTextDetector // x00, y00, x01, y01; x10, y10, x11, y11;... std::vector char_bboxes; std::vector char_pts_visible; - Param & param; + Param * param; }; } diff --git a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h index 6f2c24c9..983962d1 100644 --- a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h +++ b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h @@ -80,6 +80,8 @@ struct HTMLRenderer : OutputDev void process(PDFDoc * doc); + bool renderPage(PDFDoc * doc, int pageno); + //////////////////////////////////////////////////// // OutputDev interface //////////////////////////////////////////////////// @@ -379,6 +381,11 @@ struct HTMLRenderer : OutputDev CoveredTextDetector covered_text_detector; DrawingTracer tracer; + + struct PageCache { + CoveredTextDetector covered_text_detector; + }; + std::unordered_map page_cache; }; } //namespace pdf2htmlEX diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc index 7d43d130..c9a34f8c 100644 --- a/pdf2htmlEX/src/HTMLRenderer/general.cc +++ b/pdf2htmlEX/src/HTMLRenderer/general.cc @@ -183,13 +183,44 @@ void HTMLRenderer::process(PDFDoc *doc) post_process(); - bg_renderer = nullptr; - fallback_bg_renderer = nullptr; + if (param.delay_background == 0) + { + bg_renderer = nullptr; + fallback_bg_renderer = nullptr; + } if(param.quiet == 0) cerr << endl; } +bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno) +{ + if (param.delay_background == 0) + { + return false; + } + + if (page_cache.find(pageno) == page_cache.end()) + { + cerr << "Page number " << pageno << " not found in page cache" << endl; + return false; + } + + covered_text_detector = page_cache[pageno].covered_text_detector; + + if (bg_renderer->render_page(cur_doc, pageno)) + { + return true; + } + else if (fallback_bg_renderer) + { + if (fallback_bg_renderer->render_page(cur_doc, pageno)) + return true; + } + + return false; +} + void HTMLRenderer::setDefaultCTM(const double *ctm) { memcpy(default_ctm, ctm, sizeof(default_ctm)); @@ -243,14 +274,21 @@ void HTMLRenderer::endPage() { if(param.process_nontext) { - if (bg_renderer->render_page(cur_doc, pageNum)) + if (param.delay_background) { bg_renderer->embed_image(pageNum); } - else if (fallback_bg_renderer) + else { - if (fallback_bg_renderer->render_page(cur_doc, pageNum)) - fallback_bg_renderer->embed_image(pageNum); + if (bg_renderer->render_page(cur_doc, pageNum)) + { + bg_renderer->embed_image(pageNum); + } + else if (fallback_bg_renderer) + { + if (fallback_bg_renderer->render_page(cur_doc, pageNum)) + fallback_bg_renderer->embed_image(pageNum); + } } } @@ -294,6 +332,13 @@ void HTMLRenderer::endPage() { { f_pages.fs << "" << endl; } + + if (param.delay_background) + { + page_cache[this->pageNum] = { + .covered_text_detector = covered_text_detector, + }; + } } void HTMLRenderer::pre_process(PDFDoc * doc) diff --git a/pdf2htmlEX/src/Param.h b/pdf2htmlEX/src/Param.h index 859c78b7..b382b96f 100644 --- a/pdf2htmlEX/src/Param.h +++ b/pdf2htmlEX/src/Param.h @@ -46,6 +46,7 @@ struct Param int printing; int fallback; int tmp_file_size_limit; + int delay_background; // fonts int embed_external_font;