diff --git a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc
index ae5c8091..4621db9b 100644
--- a/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc
+++ b/pdf2htmlEX/src/BackgroundRenderer/CairoBackgroundRenderer.cc
@@ -130,11 +130,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
if (doc->getPageRotate(pageno) == 90 || doc->getPageRotate(pageno) == 270)
std::swap(page_height, page_width);
- string fn = (char*)html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno);
- if(param.embed_image)
- html_renderer->tmp_files.add(fn);
+ auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno);
- surface = cairo_svg_surface_create(fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI);
+ surface = cairo_svg_surface_create((const char *)fn, page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI);
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi);
@@ -174,7 +172,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
{
int n = 0;
char c;
- ifstream svgfile(fn);
+ ifstream svgfile((const char *)fn);
//count of '<' in the file should be an approximation of node count.
while(svgfile >> c)
{
@@ -182,7 +180,6 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
++n;
if (n > param.svg_node_count_limit)
{
- html_renderer->tmp_files.add(fn);
return false;
}
}
@@ -192,6 +189,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
for (auto id : bitmaps_in_current_page)
++bitmaps_ref_count[id];
+ if(param.embed_image)
+ html_renderer->tmp_files.add((const char *)fn);
+
return true;
}
diff --git a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc
index de965e16..780c008a 100644
--- a/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc
+++ b/pdf2htmlEX/src/BackgroundRenderer/SplashBackgroundRenderer.cc
@@ -111,66 +111,55 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
(!(param.use_cropbox)),
false, false,
nullptr, nullptr, &annot_cb, &process_annotation);
+
+ auto * bitmap = getBitmap();
+
+ auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str());
+
+ SplashImageFileFormat splashImageFileFormat;
+ if(format == "png")
+ splashImageFileFormat = splashFormatPng;
+ else if(format == "jpg")
+ splashImageFileFormat = splashFormatJpeg;
+ else
+ throw string("Image format not supported: ") + format;
+
+ SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi);
+ if (e != splashOk)
+ throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e);
+
+ if(param.embed_image)
+ html_renderer->tmp_files.add((const char *)fn);
+
return true;
}
void SplashBackgroundRenderer::embed_image(int pageno)
{
- auto * bitmap = getBitmap();
- // dump the background image only when it is not empty
- if(bitmap->getWidth() >= 0 && bitmap->getHeight() >= 0)
+ auto & f_page = *(html_renderer->f_curpage);
+
+ f_page << "
str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str());
+ ifstream fin((char*)path, ifstream::binary);
+ if(!fin)
+ throw string("Cannot read background image ") + (char*)path;
+
+ auto iter = FORMAT_MIME_TYPE_MAP.find(format);
+ if(iter == FORMAT_MIME_TYPE_MAP.end())
+ throw string("Image format not supported: ") + format;
+
+ string mime_type = iter->second;
+ f_page << "data:" << mime_type << ";base64," << Base64Stream(fin);
+ }
+ else
{
- {
- auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str());
- if(param.embed_image)
- html_renderer->tmp_files.add((const char *)fn);
-
- SplashImageFileFormat splashImageFileFormat;
- if(format == "png")
- splashImageFileFormat = splashFormatPng;
- else if(format == "jpg")
- splashImageFileFormat = splashFormatJpeg;
- else
- throw string("Image format not supported: ") + format;
-
- SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi);
- if (e != splashOk)
- throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e);
- }
-
- double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
- double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
-
- auto & f_page = *(html_renderer->f_curpage);
- auto & all_manager = html_renderer->all_manager;
-
- f_page << "
getWidth())
- << " " << CSS::HEIGHT_CN << all_manager.height.install(v_scale * bitmap->getHeight())
- << "\" alt=\"\" src=\"";
-
- if(param.embed_image)
- {
- auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str());
- ifstream fin((char*)path, ifstream::binary);
- if(!fin)
- throw string("Cannot read background image ") + (char*)path;
-
- auto iter = FORMAT_MIME_TYPE_MAP.find(format);
- if(iter == FORMAT_MIME_TYPE_MAP.end())
- throw string("Image format not supported: ") + format;
-
- string mime_type = iter->second;
- f_page << "data:" << mime_type << ";base64," << Base64Stream(fin);
- }
- else
- {
- f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str());
- }
- f_page << "\"/>";
+ f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str());
}
+ f_page << "\"/>";
}
} // namespace pdf2htmlEX
diff --git a/pdf2htmlEX/src/CoveredTextDetector.cc b/pdf2htmlEX/src/CoveredTextDetector.cc
index 0792c528..30fb22b8 100644
--- a/pdf2htmlEX/src/CoveredTextDetector.cc
+++ b/pdf2htmlEX/src/CoveredTextDetector.cc
@@ -14,7 +14,11 @@
namespace pdf2htmlEX {
-CoveredTextDetector::CoveredTextDetector(Param & param): param(param)
+CoveredTextDetector::CoveredTextDetector()
+{
+}
+
+CoveredTextDetector::CoveredTextDetector(Param & param): param(¶m)
{
}
@@ -41,10 +45,10 @@ void CoveredTextDetector::add_char_bbox_clipped(cairo_t *cairo, double * bbox, i
char_pts_visible.push_back(pts_visible);
// DCRH: Hide if no points are visible, or if some points are visible and correct_text_visibility == 2
- if (pts_visible == 0 || param.correct_text_visibility == 2) {
+ if (pts_visible == 0 || param->correct_text_visibility == 2) {
chars_covered.push_back(true);
- if (pts_visible > 0 && param.correct_text_visibility == 2) {
- param.actual_dpi = std::min(param.text_dpi, param.max_dpi); // Char partially covered so increase background resolution
+ if (pts_visible > 0 && param->correct_text_visibility == 2) {
+ param->actual_dpi = std::min(param->text_dpi, param->max_dpi); // Char partially covered so increase background resolution
}
} else {
chars_covered.push_back(false);
@@ -98,13 +102,13 @@ printf("pts_visible=%x\n", pts_visible);
printf("pts_visible=%x\n", pts_visible);
#endif
char_pts_visible[i] = pts_visible;
- if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param.correct_text_visibility == 2)) {
+ if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param->correct_text_visibility == 2)) {
#ifdef DEBUG
printf("Char covered\n");
#endif
chars_covered[i] = true;
- if (pts_visible > 0 && param.correct_text_visibility == 2) { // Partially visible text => increase rendering DPI
- param.actual_dpi = std::min(param.text_dpi, param.max_dpi);
+ if (pts_visible > 0 && param->correct_text_visibility == 2) { // Partially visible text => increase rendering DPI
+ param->actual_dpi = std::min(param->text_dpi, param->max_dpi);
}
}
} else {
diff --git a/pdf2htmlEX/src/CoveredTextDetector.h b/pdf2htmlEX/src/CoveredTextDetector.h
index 0f0506f3..d1e6bf34 100644
--- a/pdf2htmlEX/src/CoveredTextDetector.h
+++ b/pdf2htmlEX/src/CoveredTextDetector.h
@@ -21,6 +21,7 @@ namespace pdf2htmlEX {
class CoveredTextDetector
{
public:
+ CoveredTextDetector();
CoveredTextDetector(Param & param);
@@ -60,7 +61,7 @@ class CoveredTextDetector
// x00, y00, x01, y01; x10, y10, x11, y11;...
std::vector char_bboxes;
std::vector char_pts_visible;
- Param & param;
+ Param * param;
};
}
diff --git a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h
index 6f2c24c9..983962d1 100644
--- a/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h
+++ b/pdf2htmlEX/src/HTMLRenderer/HTMLRenderer.h
@@ -80,6 +80,8 @@ struct HTMLRenderer : OutputDev
void process(PDFDoc * doc);
+ bool renderPage(PDFDoc * doc, int pageno);
+
////////////////////////////////////////////////////
// OutputDev interface
////////////////////////////////////////////////////
@@ -379,6 +381,11 @@ struct HTMLRenderer : OutputDev
CoveredTextDetector covered_text_detector;
DrawingTracer tracer;
+
+ struct PageCache {
+ CoveredTextDetector covered_text_detector;
+ };
+ std::unordered_map page_cache;
};
} //namespace pdf2htmlEX
diff --git a/pdf2htmlEX/src/HTMLRenderer/general.cc b/pdf2htmlEX/src/HTMLRenderer/general.cc
index 7d43d130..c9a34f8c 100644
--- a/pdf2htmlEX/src/HTMLRenderer/general.cc
+++ b/pdf2htmlEX/src/HTMLRenderer/general.cc
@@ -183,13 +183,44 @@ void HTMLRenderer::process(PDFDoc *doc)
post_process();
- bg_renderer = nullptr;
- fallback_bg_renderer = nullptr;
+ if (param.delay_background == 0)
+ {
+ bg_renderer = nullptr;
+ fallback_bg_renderer = nullptr;
+ }
if(param.quiet == 0)
cerr << endl;
}
+bool HTMLRenderer::renderPage(PDFDoc *doc, int pageno)
+{
+ if (param.delay_background == 0)
+ {
+ return false;
+ }
+
+ if (page_cache.find(pageno) == page_cache.end())
+ {
+ cerr << "Page number " << pageno << " not found in page cache" << endl;
+ return false;
+ }
+
+ covered_text_detector = page_cache[pageno].covered_text_detector;
+
+ if (bg_renderer->render_page(cur_doc, pageno))
+ {
+ return true;
+ }
+ else if (fallback_bg_renderer)
+ {
+ if (fallback_bg_renderer->render_page(cur_doc, pageno))
+ return true;
+ }
+
+ return false;
+}
+
void HTMLRenderer::setDefaultCTM(const double *ctm)
{
memcpy(default_ctm, ctm, sizeof(default_ctm));
@@ -243,14 +274,21 @@ void HTMLRenderer::endPage() {
if(param.process_nontext)
{
- if (bg_renderer->render_page(cur_doc, pageNum))
+ if (param.delay_background)
{
bg_renderer->embed_image(pageNum);
}
- else if (fallback_bg_renderer)
+ else
{
- if (fallback_bg_renderer->render_page(cur_doc, pageNum))
- fallback_bg_renderer->embed_image(pageNum);
+ if (bg_renderer->render_page(cur_doc, pageNum))
+ {
+ bg_renderer->embed_image(pageNum);
+ }
+ else if (fallback_bg_renderer)
+ {
+ if (fallback_bg_renderer->render_page(cur_doc, pageNum))
+ fallback_bg_renderer->embed_image(pageNum);
+ }
}
}
@@ -294,6 +332,13 @@ void HTMLRenderer::endPage() {
{
f_pages.fs << "" << endl;
}
+
+ if (param.delay_background)
+ {
+ page_cache[this->pageNum] = {
+ .covered_text_detector = covered_text_detector,
+ };
+ }
}
void HTMLRenderer::pre_process(PDFDoc * doc)
diff --git a/pdf2htmlEX/src/Param.h b/pdf2htmlEX/src/Param.h
index 859c78b7..b382b96f 100644
--- a/pdf2htmlEX/src/Param.h
+++ b/pdf2htmlEX/src/Param.h
@@ -46,6 +46,7 @@ struct Param
int printing;
int fallback;
int tmp_file_size_limit;
+ int delay_background;
// fonts
int embed_external_font;