Skip to content

Commit bc35b7b

Browse files
committed
Handling for corrupt files.
1. Some files the ObjectStream may not be referred from the XRefStm. 2. Some objects like fonts etc. may not be available. In that case, PDFDocEncoding is assumed.
1 parent 6cbb3b4 commit bc35b7b

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

src/CosDoc.jl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,8 @@ end
150150
cosDocGetObject(doc::CosDocImpl, obj::CosObject) = obj
151151

152152
function cosDocGetObject(doc::CosDocImpl, ref::CosIndirectObjectRef)
153-
locObj = doc.xref[ref]
153+
locObj = get(doc.xref, ref, CosObjectLoc(-1))
154+
locObj.loc == -1 && return CosNull
154155
return cosDocGetObject(doc, locObj.stm, ref, locObj)
155156
end
156157

@@ -165,13 +166,14 @@ function cosDocGetObject(doc::CosDocImpl, stm::CosNullType,
165166
end
166167

167168
function cosDocGetObject(doc::CosDocImpl, stmref::CosIndirectObjectRef,
168-
ref::CosIndirectObjectRef, locObj::CosObjectLoc)
169-
objstm = cosDocGetObject(doc, stmref)
170-
if (locObj.obj == CosNull)
171-
locObj.obj = cosObjectStreamGetObject(objstm, ref, locObj.loc)
172-
attach_object(doc, locObj.obj)
173-
end
174-
return locObj.obj
169+
ref::CosIndirectObjectRef, locObj::CosObjectLoc)
170+
objstm = cosDocGetObject(doc, stmref)
171+
(objstm === CosNull) && return CosNull
172+
if (locObj.obj == CosNull)
173+
locObj.obj = cosObjectStreamGetObject(objstm, ref, locObj.loc)
174+
attach_object(doc, locObj.obj)
175+
end
176+
return locObj.obj
175177
end
176178

177179
function read_header(ps)
@@ -234,6 +236,7 @@ function doc_trailer_update(ps::BufferedInputStream, doc::CosDocImpl)
234236

235237
if doc.isPDF
236238
seek(ps, doc.startxref)
239+
chomp_space!(ps)
237240
doc.hasNativeXRefStm = (may_have_xrefstream(doc) && ispdfdigit(peek(ps)))
238241
(doc.hasNativeXRefStm) ? read_xref_streams(ps, doc) : read_xref_tables(ps, doc)
239242
end

src/PDPage.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ function page_find_font(page::PDPageImpl, fontname::CosName)
159159
cosdoc = page.doc.cosDoc
160160
pgnode = page.cospage
161161

162-
while font === CosNull || pgnode !== CosNull
162+
while font === CosNull && pgnode !== CosNull
163163
resref = get(pgnode, cn"Resources")
164164
resources = cosDocGetObject(cosdoc, resref)
165165
if resources !== CosNull
@@ -175,5 +175,7 @@ function page_find_font(page::PDPageImpl, fontname::CosName)
175175
return font
176176
end
177177

178+
get_encoded_string(s::CosString, fontname::CosNullType, page::PDPage) = CDTextString(s)
179+
178180
get_encoded_string(s::CosString, fontname::CosName, page::PDPage) =
179181
get_encoded_string(s, get(page.fums, fontname, nothing))

src/PDPageElement.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ function showtext(io::IO, grp::PDPageObjectGroup, state::Dict=Dict())
472472
end
473473

474474
function showtext(io::IO, tr::PDPageTextRun, state::Dict=Dict())
475-
fontname, font = get(state, :font, CosNull)
475+
fontname, font = get(state, :font, (CosNull, CosNull))
476476
page = get(state, :page, CosNull)
477477
for s in tr.ss
478478
text = get_encoded_string(s, fontname, page)

0 commit comments

Comments
 (0)