Skip to content

Commit 9c8f210

Browse files
committed
Adding common data structures for PDF
1 parent 94f3bf1 commit 9c8f210

File tree

7 files changed

+67
-21
lines changed

7 files changed

+67
-21
lines changed

REQUIRE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ Compat
33
BufferedStreams 0.3
44
Libz 0.2
55
StringEncodings
6+
TimeZones
67
Documenter

src/CDObject.jl

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,38 @@
1-
export CDTextString, CDDate
1+
export CDTextString, CDDate, CDRect
22

33
const CDTextString = String
4-
const CDDate = DateTime
4+
5+
using TimeZones
6+
7+
"""
8+
PDF files support the string format: (D:YYYYMMDDHHmmSSOHH'mm)
9+
10+
"""
11+
@compat struct CDDate
12+
d::ZonedDateTime
13+
CDDate(d::ZonedDateTime) = new(d)
14+
end
15+
16+
function CDDate(s::CDTextString)
17+
s = ascii(s)
18+
if startswith(s, "D:")
19+
s = s[3:end]
20+
end
21+
s = *(split(s,'\'')...)
22+
format = "yyyymmddHHMMSS"
23+
if endswith(s, 'Z')
24+
s = s[1:end-1]
25+
else
26+
format *= "zzzz"
27+
end
28+
CDDate(ZonedDateTime(s, format))
29+
end
30+
31+
Base.show(io::IO, dt::CDDate) = show(io, dt.d)
32+
33+
@compat struct CDRect{T <: Number}
34+
llx::T
35+
lly::T
36+
urx::T
37+
ury::T
38+
end

src/CosObject.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ export CosDict, CosString, CosNumeric, CosBoolean, CosTrue, CosFalse,
44
CosObject, CosNull, CosNullType,CosFloat, CosInt, CosArray, CosName,
55
CosDict, CosIndirectObjectRef, CosStream, get, set!
66

7-
using StringEncodings
8-
97
@compat abstract type CosObject end
108

119
@inline get{T<:CosObject}(o::T)=o.val
@@ -191,7 +189,7 @@ showref(io::IO, o::CosObject) = show(io, o)
191189

192190
show(io::IO, o::CosNullType) = print(io, "null")
193191

194-
show(io::IO, o::CosName) = @printf io "/%s" split(String(o.val),'_')[2]
192+
show(io::IO, o::CosName) = @printf io "/%s" String(o)
195193

196194
show(io::IO, o::CosXString) = @printf "%s" "<"*String(copy(o.val))*">"
197195

src/CosObjectHelpers.jl

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
using ..Common
22

3-
function Base.convert(::Type{CDTextString}, xstr::CosXString)
3+
import Base: convert
4+
5+
using StringEncodings
6+
7+
function convert(::Type{CDTextString}, xstr::CosXString)
48
const feff = [LATIN_F, LATIN_E, LATIN_F, LATIN_F]
59
const FEFF = [LATIN_UPPER_F, LATIN_UPPER_E, LATIN_UPPER_F, LATIN_UPPER_F]
610
prefix = xstr.val[1:4]
@@ -13,7 +17,7 @@ function Base.convert(::Type{CDTextString}, xstr::CosXString)
1317
(buffer[2i-1], buffer[2i]) = (buffer[2i], buffer[2i-1])
1418
end
1519
end
16-
utf_16_data = reinterpret(UInt16, buffer)
20+
utf_16_data = reinterpret(UInt16, buffer[3:end])
1721
str = transcode(String, utf_16_data)
1822
else
1923
# Assume PDFDocEncoding (ISO-8859-1)
@@ -22,5 +26,15 @@ function Base.convert(::Type{CDTextString}, xstr::CosXString)
2226
return CDTextString(str)
2327
end
2428

25-
Base.convert(::Type{CDTextString}, lstr::CosLiteralString) =
29+
convert(::Type{CDTextString}, lstr::CosLiteralString) =
2630
CDTextString(StringEncodings.decode(lstr.val, "ISO_8859-1"))
31+
32+
convert{T <: Number}(::Type{T}, i::CosInt) = T(get(i))
33+
34+
convert{T <: Number}(::Type{T}, f::CosFloat) = T(get(f))
35+
36+
convert(::Type{CDRect}, a::CosArray) = CDRect(a...)
37+
38+
convert{T <: CosString}(::Type{CDDate}, ls::T) = CDDate(CDTextString(ls))
39+
40+
convert(::Type{CDTextString}, name::CosName) = CDTextString(split(String(name.val),'_')[2])

src/PDDoc.jl

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ export PDDoc,
22
pdDocOpen,
33
pdDocClose,
44
pdDocGetCatalog,
5-
pdDocGetInfo, pdDocGetProducers,
5+
pdDocGetInfo,
66
pdDocGetCosDoc,
77
pdDocGetPageCount,
88
pdDocGetPage
@@ -43,14 +43,13 @@ end
4343
function pdDocGetInfo(doc::PDDoc)
4444
ref = get(doc.cosDoc.trailer[1], CosName("Info"))
4545
obj = cosDocGetObject(doc.cosDoc, ref)
46-
return obj
47-
end
48-
49-
function pdDocGetProducers(doc::PDDoc)
50-
info = pdDocGetInfo(doc)
51-
creator = CDTextString(get(info, CosName("Creator")))
52-
producer = CDTextString(get(info, CosName("Producer")))
53-
return Dict("creator" => creator, "producer" => producer)
46+
dInfo = Dict{CDTextString, Union{CDTextString, CDDate}}()
47+
for (index, data) in enumerate(get(obj))
48+
skey = CDTextString(data[1])
49+
dInfo[skey] = (skey == "CreationDate") || (skey == "ModDate") ?
50+
CDDate(data[2]) : CDTextString(data[2])
51+
end
52+
return dInfo
5453
end
5554

5655
@compat mutable struct PDDocImpl <: PDDoc

test/debugIO.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ IODebug=[0,Vector{Tuple{AbstractString,IOStream}}()]
3333
function get_tempfilepath()
3434
global IODebug
3535
IODebug[1]+=1
36-
path=get_tempdir()*"/"*string(IODebug[1])
37-
return (path,util_open(path,"w"))
36+
path = joinpath(get_tempdir(), string(IODebug[1]))
37+
return (path, util_open(path,"w"))
3838
end
3939

4040
function util_open(filename, mode)

test/runtests.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ include("debugIO.jl")
1616
println(pdDocGetCatalog(doc))
1717
cosDoc = pdDocGetCosDoc(doc)
1818
map(println, cosDoc.trailer)
19-
println(pdDocGetInfo(doc))
20-
println(pdDocGetProducers(doc))
19+
info = pdDocGetInfo(doc)
20+
@assert info["Producer"] == "LibreOffice 5.3" && info["Creator"] == "Writer"
2121
@assert pdDocGetPageCount(doc) == 2
2222
page = pdDocGetPage(doc, 1)
2323
@assert pdPageIsEmpty(page) == false

0 commit comments

Comments
 (0)