Skip to content

Commit 94f3bf1

Browse files
committed
Adding additional common datastructures for PDF translation.
1 parent 32e0363 commit 94f3bf1

File tree

8 files changed

+53
-13
lines changed

8 files changed

+53
-13
lines changed

REQUIRE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ julia 0.6
22
Compat
33
BufferedStreams 0.3
44
Libz 0.2
5+
StringEncodings
56
Documenter

src/CDObject.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
export CDTextString, CDDate
2+
3+
const CDTextString = String
4+
const CDDate = DateTime

src/Common.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ include("bytes.jl")
99
include("errors.jl")
1010
include("Utils.jl")
1111
include("BufferParser.jl")
12+
include("CDObject.jl")
1213

1314
end

src/Cos.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ using ..Common
1010
using Compat
1111

1212
include("CosObject.jl")
13+
include("CosObjectHelpers.jl")
1314
include("CosStream.jl")
1415
include("CosReader.jl")
1516
include("CosObjStream.jl")

src/CosObject.jl

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ export CosDict, CosString, CosNumeric, CosBoolean, CosTrue, CosFalse,
44
CosObject, CosNull, CosNullType,CosFloat, CosInt, CosArray, CosName,
55
CosDict, CosIndirectObjectRef, CosStream, get, set!
66

7+
using StringEncodings
8+
79
@compat abstract type CosObject end
810

911
@inline get{T<:CosObject}(o::T)=o.val
@@ -61,25 +63,13 @@ end
6163
CosXString(arr::Vector{UInt8})=new(arr)
6264
end
6365

64-
Base.convert(::Type{Vector{UInt8}}, xstr::CosXString)=
65-
(xstr.val |> String |> hex2bytes)
66-
67-
Base.convert(::Type{String}, xstr::CosXString)=
68-
String(convert(Vector{UInt8},xstr))
69-
70-
7166
@compat struct CosLiteralString <: CosString
7267
val::Vector{UInt8}
7368
CosLiteralString(arr::Vector{UInt8})=new(arr)
7469
end
7570

7671
CosLiteralString(str::AbstractString)=CosLiteralString(transcode(UInt8,str))
7772

78-
Base.convert(::Type{Vector{UInt8}}, str::CosLiteralString)=copy(str.val)
79-
80-
Base.convert(::Type{String}, str::CosLiteralString)=
81-
String(convert(Vector{UInt8},str))
82-
8373
@compat mutable struct CosArray <: CosObject
8474
val::Array{CosObject,1}
8575
function CosArray(arr::Array{T,1} where {T<:CosObject})

src/CosObjectHelpers.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
using ..Common
2+
3+
function Base.convert(::Type{CDTextString}, xstr::CosXString)
4+
const feff = [LATIN_F, LATIN_E, LATIN_F, LATIN_F]
5+
const FEFF = [LATIN_UPPER_F, LATIN_UPPER_E, LATIN_UPPER_F, LATIN_UPPER_F]
6+
prefix = xstr.val[1:4]
7+
data = xstr.val
8+
buffer = data |> String |> hex2bytes
9+
if prefix == feff || prefix == FEFF
10+
if (0x04030201 == ENDIAN_BOM)
11+
len2 = div(length(buffer),2)
12+
for i=1:len2
13+
(buffer[2i-1], buffer[2i]) = (buffer[2i], buffer[2i-1])
14+
end
15+
end
16+
utf_16_data = reinterpret(UInt16, buffer)
17+
str = transcode(String, utf_16_data)
18+
else
19+
# Assume PDFDocEncoding (ISO-8859-1)
20+
str = StringEncodings.decode(buffer, "ISO_8859-1")
21+
end
22+
return CDTextString(str)
23+
end
24+
25+
Base.convert(::Type{CDTextString}, lstr::CosLiteralString) =
26+
CDTextString(StringEncodings.decode(lstr.val, "ISO_8859-1"))

src/PDDoc.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ export PDDoc,
22
pdDocOpen,
33
pdDocClose,
44
pdDocGetCatalog,
5+
pdDocGetInfo, pdDocGetProducers,
56
pdDocGetCosDoc,
67
pdDocGetPageCount,
78
pdDocGetPage
89

10+
using ..Common
11+
912
@compat abstract type PDDoc end
1013

1114
function pdDocOpen(fp::String)
@@ -37,7 +40,18 @@ end
3740
function pdDocGetPage(doc::PDDoc, name::String)
3841
end
3942

43+
function pdDocGetInfo(doc::PDDoc)
44+
ref = get(doc.cosDoc.trailer[1], CosName("Info"))
45+
obj = cosDocGetObject(doc.cosDoc, ref)
46+
return obj
47+
end
4048

49+
function pdDocGetProducers(doc::PDDoc)
50+
info = pdDocGetInfo(doc)
51+
creator = CDTextString(get(info, CosName("Creator")))
52+
producer = CDTextString(get(info, CosName("Producer")))
53+
return Dict("creator" => creator, "producer" => producer)
54+
end
4155

4256
@compat mutable struct PDDocImpl <: PDDoc
4357
cosDoc::CosDoc

test/runtests.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ include("debugIO.jl")
1313
filename="files/1.pdf"
1414
println(filename)
1515
doc = pdDocOpen(filename)
16+
println(pdDocGetCatalog(doc))
1617
cosDoc = pdDocGetCosDoc(doc)
17-
println(cosDocGetRoot(cosDoc))
18+
map(println, cosDoc.trailer)
19+
println(pdDocGetInfo(doc))
20+
println(pdDocGetProducers(doc))
1821
@assert pdDocGetPageCount(doc) == 2
1922
page = pdDocGetPage(doc, 1)
2023
@assert pdPageIsEmpty(page) == false

0 commit comments

Comments
 (0)