Skip to content

Commit 208d064

Browse files
committed
When content stream is compressed using bad flate data, trying to extract
as much data as possible and show some results.
1 parent bc85a7f commit 208d064

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

src/Inflate.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ _inflate(stm::z_stream) =
6969
ccall((:inflate, libz), Cint, (Ref{z_stream}, Cint), stm, 0)
7070

7171
function inflate(io::IO)
72-
CHUNK = 16384
72+
CHUNK = 262144
7373

7474
iob = IOBuffer()
7575

@@ -92,10 +92,13 @@ function inflate(io::IO)
9292
strm.next_out = pointer(oub)
9393
ret = _inflate(strm)
9494
ret == Z_STREAM_ERROR && error("zlib stream state clobbered")
95-
ret != Z_OK && ret != Z_STREAM_END && _zerror(ret)
95+
ret == Z_DATA_ERROR && @warn "invalid or incomplete inflate data"
96+
ret != Z_OK && ret != Z_STREAM_END && ret != Z_DATA_ERROR &&
97+
_zerror(ret)
9698
have = CHUNK - strm.avail_out
9799
resize!(oub, have)
98100
write(iob, oub)
101+
(ret == Z_STREAM_END || ret == Z_DATA_ERROR) && break
99102
resize!(oub, CHUNK)
100103
end
101104
end
@@ -108,7 +111,6 @@ end
108111
function _zerror(ret::Cint)
109112
msg =
110113
ret == Z_STREAM_ERROR ? "invalid compression level" :
111-
ret == Z_DATA_ERROR ? "invalid or incomplete deflate data" :
112114
ret == Z_MEM_ERROR ? "out of memory" :
113115
ret == Z_VERSION_ERROR ? "zlib version mismatch!" :
114116
"zlib internal error"

test/runtests.jl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ using PDFIO.Common: read_pkcs12
1313

1414
include("debugIO.jl")
1515

16-
pdftest_ver = "0.0.7"
16+
pdftest_ver = "0.0.8"
1717
pdftest_link = "https://github.com/sambitdash/PDFTest/archive/v"*pdftest_ver
1818

1919
zipfile = "pdftest-"*pdftest_ver
@@ -273,6 +273,24 @@ end
273273
pdDocClose(doc)
274274
length(utilPrintOpenFiles()) == 0
275275
end
276+
@test begin
277+
# Corrupt deflate stream
278+
filename="00007.pdf"
279+
DEBUG && println(filename)
280+
resfile, template, filename = local_testfiles(filename)
281+
doc = pdDocOpen(filename)
282+
io = util_open(resfile, "w")
283+
@info "Begin testing corrupt flate"
284+
try
285+
extract_text(io, doc)
286+
finally
287+
util_close(io)
288+
pdDocClose(doc)
289+
end
290+
@info "End testing corrupt flate"
291+
@test files_equal(resfile, template)
292+
length(utilPrintOpenFiles()) == 0
293+
end
276294
end
277295
@testset "Document without Info" begin
278296
@test begin

0 commit comments

Comments
 (0)