Skip to content

Commit 1979381

Browse files
committed
Cleaner handling of Object Streams and XRef tables
1 parent 696fe56 commit 1979381

File tree

2 files changed

+108
-88
lines changed

2 files changed

+108
-88
lines changed

src/CosDoc.jl

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,18 @@ end
168168
function cosDocGetObject(doc::CosDocImpl, stmref::CosIndirectObjectRef,
169169
ref::CosIndirectObjectRef, locObj::CosObjectLoc)
170170
objstm = cosDocGetObject(doc, stmref)
171+
if (objstm === CosNull)
172+
#= This is not really needed but PDF specification is kind of equivocal if
173+
object streams should be referenced in the XRef stream.
174+
175+
An object stream itself, like any stream, shall be an indirect object, and
176+
therefore, there shall be an entry for it in a cross-reference table or
177+
cross-reference stream (see 7.5.8, "Cross-Reference Streams"), although there
178+
might not be any references to it (of the form 243 0 R).
179+
=#
180+
objstm = scan_object_stream(doc, stmref)
181+
attach_object(doc, objstm)
182+
end
171183
(objstm === CosNull) && return CosNull
172184
if (locObj.obj == CosNull)
173185
locObj.obj = cosObjectStreamGetObject(objstm, ref, locObj.loc)
@@ -176,6 +188,25 @@ function cosDocGetObject(doc::CosDocImpl, stmref::CosIndirectObjectRef,
176188
return locObj.obj
177189
end
178190

191+
function scan_object_stream(doc::CosDocImpl, stmref::CosIndirectObjectRef)
192+
look_ahead = 2048
193+
loc = doc.startxref - look_ahead
194+
if loc < 0
195+
loc = 0
196+
end
197+
seek(doc.ps, loc)
198+
look_ahead = doc.startxref - loc
199+
ref = get(stmref)
200+
keyword = "$(ref[1]) $(ref[2]) obj"
201+
loc1 = locate_keyword!(doc.ps, transcode(UInt8, keyword), look_ahead)
202+
loc1 < 0 && return CosNull
203+
seek(doc.ps, loc + loc1)
204+
obj = parse_indirect_obj(doc.ps, doc.xref)
205+
obj === CosNull && return CosNull
206+
doc.xref[stmref] = CosObjectLoc(loc + loc1, CosNull, obj)
207+
return obj
208+
end
209+
179210
function read_header(ps)
180211
skipv(ps,PERCENT)
181212
b = UInt8[]

src/CosObjStream.jl

Lines changed: 77 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,22 @@
11
using BufferedStreams
22

3-
# The xref stream may be accessed later. There is no point encrypting this data
4-
#Ideal will be to remove the filter.
53
function make_number(data, start, nbytes)
6-
sum = 0
7-
for ii = 1:nbytes
8-
sum *= 256
9-
sum += data[start+ii]
10-
end
11-
return sum
4+
sum = 0
5+
for ii = 1:nbytes
6+
sum *= 256
7+
sum += data[start+ii]
8+
end
9+
return sum
1210
end
1311

1412
function get_xref_record(data, start, w)
15-
v=Vector{Int}()
16-
for tw in w
17-
n = make_number(data, start, tw)
18-
start += tw
19-
push!(v, n)
20-
end
21-
return v
13+
v=Vector{Int}()
14+
for tw in w
15+
n = make_number(data, start, tw)
16+
start += tw
17+
push!(v, n)
18+
end
19+
return v
2220
end
2321

2422
function createObjectStreams(stm::CosStream)
@@ -31,62 +29,59 @@ function createObjectStreams(stm::CosStream)
3129
end
3230

3331
function read_xref_stream(xrefstm::CosObject,
34-
xref::Dict{CosIndirectObjectRef, CosObjectLoc})
32+
xref::Dict{CosIndirectObjectRef, CosObjectLoc})
3533

36-
@assert get(xrefstm, CosName("Type"))==CosName("XRef")
37-
size = get(xrefstm, CosName("Size"))
38-
@assert size !=CosNull
34+
@assert get(xrefstm, cn"Type") == cn"XRef"
35+
size = get(xrefstm, cn"Size")
36+
@assert size !=CosNull
3937

40-
w = get(xrefstm, CosName("W"))
41-
@assert w != CosNull
42-
@assert length(w) == 3
38+
w = get(xrefstm, cn"W")
39+
@assert w != CosNull
40+
@assert length(w) == 3
4341

44-
index = get(xrefstm, CosName("Index"))
42+
index = get(xrefstm, cn"Index")
4543

46-
if (index == CosNull)
47-
index = CosArray([CosInt(0),size])
48-
end
44+
if (index == CosNull)
45+
index = CosArray([CosInt(0),size])
46+
end
4947

50-
cosStreamRemoveFilters(xrefstm)
51-
52-
53-
input = get(xrefstm)
54-
data = read(input)
55-
close(input)
56-
57-
w_n = get(w,true) #This size is 3
58-
recsize = sum(w_n)
59-
60-
lenidx = length(index)
61-
@assert rem(lenidx,2) == 0
62-
idx_int=get(index,true)
63-
64-
it = 0 #iterator for data
65-
count_record = 0
66-
for i = 1:div(lenidx,2)
67-
for j = 0:idx_int[2*i]-2
68-
oid = idx_int[2*i-1]+j
69-
record = get_xref_record(data,it,w_n)
70-
@assert length(record) == 3
71-
@assert record[1] in 0:2
72-
73-
loc = (record[1] == 1) ? record[2] :
74-
(record[1] == 2) ? record[3] : 0
75-
stm = (record[1] == 2) ? CosIndirectObjectRef(record[2],0) : CosNull
76-
ref = (record[1] == 1) ? CosIndirectObjectRef(oid, record[3]) :
77-
(record[1] == 2) ? CosIndirectObjectRef(oid, 0) :
78-
CosIndirectObjectRef(0,0)
79-
80-
it += recsize
81-
if (record[1] != 0)
82-
count_record +=1
83-
if !haskey(xref,ref)
84-
xref[ref]=CosObjectLoc(loc,stm)
48+
# The xref stream may be accessed later. There is no point encrypting this data
49+
# Ideal will be to remove the filter.
50+
cosStreamRemoveFilters(xrefstm)
51+
52+
input = get(xrefstm)
53+
data = read(input)
54+
close(input)
55+
datasize = length(data)
56+
57+
w_n = get(w,true) #This size is 3
58+
recsize = sum(w_n)
59+
60+
lenidx = length(index)
61+
@assert rem(lenidx,2) == 0
62+
idx_int=get(index,true)
63+
64+
it = 0 #iterator for data
65+
for i = 1:div(lenidx,2)
66+
for j = 0:idx_int[2*i] - 1
67+
oid = idx_int[2*i-1]+j
68+
record = get_xref_record(data, it, w_n)
69+
@assert length(record) == 3
70+
@assert record[1] in 0:2
71+
72+
loc = (record[1] == 1) ? record[2] :
73+
(record[1] == 2) ? record[3] : 0
74+
stm = (record[1] == 2) ? CosIndirectObjectRef(record[2],0) : CosNull
75+
ref = (record[1] == 1) ? CosIndirectObjectRef(oid, record[3]) :
76+
(record[1] == 2) ? CosIndirectObjectRef(oid, 0) :
77+
CosIndirectObjectRef(0,0)
78+
it += recsize
79+
record[1] != 0 && !haskey(xref,ref) && (xref[ref] = CosObjectLoc(loc,stm))
80+
it >= datasize && break
8581
end
86-
end
82+
it >= datasize && break
8783
end
88-
end
89-
return xref
84+
return xref
9085
end
9186

9287
function read_object_info_from_stm(stm::CosStream,
@@ -105,31 +100,25 @@ function read_object_info_from_stm(stm::CosStream,
105100
end
106101
end
107102

108-
function cosObjectStreamGetObject(stm::CosIndirectObject{CosObjectStream},
109-
ref::CosIndirectObjectRef, loc::Int)
110-
return cosObjectStreamGetObject(stm.obj, ref, loc)
111-
end
103+
cosObjectStreamGetObject(stm::CosIndirectObject{CosObjectStream},
104+
ref::CosIndirectObjectRef, loc::Int) = cosObjectStreamGetObject(stm.obj, ref, loc)
112105

113-
function cosObjectStreamGetObject(stm::CosObjectStream,
114-
ref::CosIndirectObjectRef, loc::Int)
115-
objtuple = get(ref)
116-
if (stm.oids[loc+1] != objtuple[1])
117-
return CosNull
118-
end
119-
dirobj = cosObjectStreamGetObject(stm, CosNull, loc)
120-
return CosIndirectObject(objtuple[1], objtuple[2], dirobj)
106+
function cosObjectStreamGetObject(stm::CosObjectStream, ref::CosIndirectObjectRef, loc::Int)
107+
objtuple = get(ref)
108+
stm.oids[loc+1] != objtuple[1] && return CosNull
109+
dirobj = cosObjectStreamGetObject(stm, CosNull, loc)
110+
return CosIndirectObject(objtuple[1], objtuple[2], dirobj)
121111
end
122112

123-
function cosObjectStreamGetObject(stm::CosObjectStream,
124-
ref::CosNullType, loc::Int)
125-
filename = get(stm, CosName("F"))
126-
io = util_open(String(filename),"r")
127-
ps = BufferedInputStream(io)
128-
try
129-
seek(ps, stm.oloc[loc+1])
130-
obj = parse_value(ps)
131-
return obj
132-
finally
133-
close(ps)
134-
end
113+
function cosObjectStreamGetObject(stm::CosObjectStream, ref::CosNullType, loc::Int)
114+
filename = get(stm, CosName("F"))
115+
io = util_open(String(filename),"r")
116+
ps = BufferedInputStream(io)
117+
try
118+
seek(ps, stm.oloc[loc+1])
119+
obj = parse_value(ps)
120+
return obj
121+
finally
122+
close(ps)
123+
end
135124
end

0 commit comments

Comments
 (0)