Skip to content

Commit 826552f

Browse files
committed
Merge branch 'master' of https://github.com/sambitdash/PDFIO.jl
2 parents abc7747 + d64c258 commit 826552f

File tree

11 files changed

+173
-44
lines changed

11 files changed

+173
-44
lines changed

.github/workflows/Build.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@ jobs:
1111
test:
1212
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
1313
runs-on: ${{ matrix.os }}
14+
continue-on-error: ${{ matrix.version == 'nightly' }}
1415
strategy:
1516
fail-fast: false
1617
matrix:
1718
version:
1819
- '1.6'
1920
- '1' # automatically expands to the latest stable 1.x release of Julia
20-
- nightly
21+
- 'nightly'
2122
os:
2223
- ubuntu-latest
2324
arch:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ test/*.pdf
3131
test/*.res
3232
test/pvt
3333
test/PDFTest*
34+
test/pdftest*
3435
file.txt
3536
data/fonts/Arial.afm
3637
test/sample01.pem

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ julia = "1.6"
2727
[extras]
2828
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2929
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
30+
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
3031

3132
[targets]
32-
test = ["Test", "ZipFile"]
33+
test = ["Test", "ZipFile", "Downloads"]

src/CosDoc.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ function read_trailer(ps::IOStream, lookahead::Int)
391391
end
392392

393393
function doc_trailer_update(ps::IOStream, doc::CosDocImpl)
394-
TRAILER_REWIND = 50
394+
TRAILER_REWIND = 256
395395

396396
seek(ps, doc.size-TRAILER_REWIND)
397397

src/Inflate.jl

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,7 @@ const Z_MEM_ERROR = -4
4545
const Z_BUF_ERROR = -5
4646
const Z_VERSION_ERROR = -6
4747

48-
@static if Base.VERSION > v"1.3-"
49-
using Zlib_jll: libz
50-
else
51-
isfile(joinpath(dirname(@__FILE__),"..","deps","deps.jl")) ||
52-
error("PDFIO not properly installed. Please run Pkg.build(\"PDFIO\")")
53-
include("../deps/deps.jl")
54-
end
48+
using Zlib_jll: libz
5549

5650
_zlibVersion() = ccall((:zlibVersion, libz), Ptr{Cstring}, ())
5751

src/LibCrypto.jl

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
@static if Base.VERSION > v"1.3-"
2-
using OpenSSL_jll: libcrypto
3-
else
4-
isfile(joinpath(dirname(@__FILE__), "..", "deps", "deps.jl")) ||
5-
error("PDFIO not properly installed. Please run Pkg.build(\"PDFIO\")")
6-
include("../deps/deps.jl")
7-
end
1+
using OpenSSL_jll: libcrypto
82

93
using Base: SecretBuffer, SecretBuffer!
104
import Base: copy

src/PDFontTables.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,17 @@ const GlyphName_to_ZAPEncoding = reverse_dict(ZAPEncoding_to_GlyphName)
5555

5656
using AdobeGlyphList
5757

58-
function agl_mapping_to_dict(m)
58+
function agl_mapping_to_dict(m; fn=false)
5959
dict = Dict{CosName, Char}()
60-
map((@view m[:,1]), (@view m[:,2])) do x, y
60+
v1, v2 = fn ? (2, 1) : (1, 2)
61+
map((@view m[:,v1]), (@view m[:,v2])) do x, y
6162
dict[CosName(strip(x))] = y
6263
end
6364
return dict
6465
end
6566

6667
const AGL_Glyph_to_Unicode = agl_mapping_to_dict(agl())
68+
const AGLFN_Glyph_to_Unicode = agl_mapping_to_dict(aglfn(), fn=true)
6769
const AGL_ZAP_to_Unicode = agl_mapping_to_dict(zapfdingbats())
6870
const AGL_Unicode_to_Glyph = reverse_dict(AGL_Glyph_to_Unicode)
6971
const AGL_Unicode_to_ZAP = reverse_dict(AGL_ZAP_to_Unicode)

src/PDFonts.jl

Lines changed: 115 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ function show(io::IO, cmap::CMap)
4545
end
4646

4747

48-
const FontUnicodeMapping = Union{Dict{UInt8, Char}, CMap, Nothing}
48+
const FontUnicodeMapping = Union{Dict{UInt8, Vector{Char}}, CMap, Nothing}
4949

5050
#=
5151
mutable struct FontUnicodeMapping
@@ -56,7 +56,56 @@ mutable struct FontUnicodeMapping
5656
end
5757
=#
5858

59-
function merge_encoding!(fum::Dict{UInt8, Char}, encoding::CosName,
59+
function Base.merge!(fum::Dict{UInt8, Vector{Char}}, enc::Dict{UInt8, Char})
60+
for (k, v) in enc
61+
fum[k] = [v]
62+
end
63+
end
64+
65+
function get_agl_unicode(g::AbstractString)::Union{Vector{Char}, Char}
66+
r = r"u(?'u'[[:xdigit:]]+$)|uni(?'uni'[[:xdigit:]]{4,6}$)"
67+
m = match(r, g)
68+
if m !== nothing
69+
u, uni = m["u"], m["uni"]
70+
if u !== nothing
71+
l = length(u)
72+
if l > 3 && mod(l, 4) == 0
73+
ret = Char[]
74+
for i = 1:4:l
75+
c = parse(UInt16, SubString(u, i, i+3), base=16)
76+
0xE000 > c > 0xD7FF && break
77+
push!(ret, Char(c))
78+
end
79+
length(ret)*4 == l && return(ret)
80+
end
81+
else
82+
c = parse(UInt32, uni, base=16)
83+
0x0000 <= c <= 0xD7FF && 0xE000 <= c <= 0x10FFFF && return Char(c)
84+
end
85+
end
86+
cg = CosName(g)
87+
return get(AGL_Glyph_to_Unicode, cg, get(AGLFN_Glyph_to_Unicode, cg, zero(Char)))
88+
end
89+
90+
function get_unicodes_from_glyph_name(s::String)
91+
n = split(s, ".")
92+
nf = n[1]
93+
isempty(nf) && return [zero(Char)]
94+
gs = split(nf, "_")
95+
u = Char[]
96+
for g in gs
97+
append!(u, get_agl_unicode(g))
98+
end
99+
return u
100+
end
101+
102+
function merge_agl!(fum::Dict{UInt8, Vector{Char}}, d::Dict{UInt8, CosName})
103+
for (k, v) in d
104+
fum[k] = get_unicodes_from_glyph_name(String(v))
105+
end
106+
end
107+
108+
function merge_encoding!(fum::Dict{UInt8, Vector{Char}}, encoding::CosName,
60109
doc::CosDoc, font::IDDRef{CosDict})
61110
encoding_mapping =
62111
encoding == cn"WinAnsiEncoding" ? WINEncoding_to_Unicode :
@@ -82,10 +131,12 @@ function FontType(subtype::CosName)
82131
return FontDefType()
83132
end
84133

134+
# Entry point if someone wants to handle encoding based on subtype
135+
# By default maps to the default font unicode mapping.
85136
merge_encoding!(fum::FontUnicodeMapping, ftype::FontType,
86137
doc::CosDoc, font::IDDRef{CosDict}) = fum
87138

88-
function merge_encoding!(fum::Dict{UInt8, Char},
139+
function merge_encoding!(fum::Dict{UInt8, Vector{Char}},
89140
ftype::Union{FontType1, FontMMType1},
90141
doc::CosDoc, font::IDDRef{CosDict})
91142
basefont = cosDocGetObject(doc, font, cn"BaseFont")
@@ -104,14 +155,14 @@ end
104155
# Reading encoding from the font files in case of Symbolic fonts are not
105156
# supported.
106157
# Font subset is addressed with font name identification.
107-
function merge_encoding!(fum::Dict{UInt8, Char}, encoding::CosNullType,
158+
function merge_encoding!(fum::Dict{UInt8, Vector{Char}}, encoding::CosNullType,
108159
doc::CosDoc, font::IDDRef{CosDict})
109160
subtype = cosDocGetObject(doc, font, cn"Subtype")
110161
subtype === CosNull && return fum
111162
return merge_encoding!(fum, FontType(subtype), doc, font)
112163
end
113164

114-
function merge_encoding!(fum::Dict{UInt8, Char},
165+
function merge_encoding!(fum::Dict{UInt8, Vector{Char}},
115166
encoding::IDD{CosDict},
116167
doc::CosDoc, font::IDDRef{CosDict})
117168
baseenc = cosDocGetObject(doc, encoding, cn"BaseEncoding")
@@ -133,8 +184,7 @@ function merge_encoding!(fum::Dict{UInt8, Char},
133184
end
134185
end
135186

136-
dict_to_unicode = dict_remap(d, AGL_Glyph_to_Unicode)
137-
merge!(fum, dict_to_unicode)
187+
merge_agl!(fum, d)
138188
return fum
139189
end
140190

@@ -143,7 +193,7 @@ function get_unicode_mapping(doc::CosDoc, font::IDDRef{CosDict})
143193
toUnicode !== CosNull &&
144194
return get_unicode_mapping(toUnicode)
145195
encoding = cosDocGetObject(doc, font, cn"Encoding")
146-
d = merge_encoding!(Dict{UInt8, Char}(), encoding, doc, font)
196+
d = merge_encoding!(Dict{UInt8, Vector{Char}}(), encoding, doc, font)
147197
return length(d) == 0 ? nothing : d
148198
end
149199

@@ -218,11 +268,11 @@ function get_glyph_id_mapping(cosdoc::CosDoc, cosfont::IDD{CosDict})
218268
return glyph_name_to_cid, cid_to_glyph_name
219269
end
220270

221-
get_encoded_string(s::CosString, fum::Union{Dict{UInt8, Char}, CMap}) =
271+
get_encoded_string(s::CosString, fum::FontUnicodeMapping) =
222272
get_encoded_string(Vector{UInt8}(s), fum)
223273

224274
function get_encoded_string(v::Union{Vector{UInt8}, NTuple{N, UInt8}},
225-
fum::Dict{UInt8, Char}) where N
275+
fum::Dict{UInt8, Vector{Char}}) where N
226276
length(v) == 0 && return ""
227277
return String(NativeEncodingToUnicode(v, fum))
228278
end
@@ -334,8 +384,17 @@ cmap_command(b::Vector{UInt8}) =
334384
length(b), b != beginbfchar && b != beginbfrange && b != begincodespacerange ?
335385
nothing : Symbol(String(b))
336386

387+
function _offset(obj::CosXString, offset)
388+
da = Vector{UInt8}(obj)
389+
db = UInt16(da[1]*256+da[2]+offset)
390+
da[1], da[2] = UInt8(div(db, 256)), UInt8(mod(db, 256))
391+
io = IOBuffer()
392+
bytes2hex(io, da)
393+
return CosXString(take!(io))
394+
end
395+
337396
function on_cmap_command!(stm::IO, command::Symbol,
338-
params::Vector{CosInt}, cmap::CMap)
397+
params::Vector{CosInt}, cmap::CMap)
339398
n = get(pop!(params))
340399
o1, o2, o3 = CosNull, CosNull, CosNull
341400
for i = 1:n
@@ -352,18 +411,57 @@ function on_cmap_command!(stm::IO, command::Symbol,
352411
if l == 1
353412
cmap.range_map[Interval(d1[1], d2[1])] = o3
354413
else
355-
imap = get!(cmap.range_map, Interval(d1[1], d2[1]),
356-
IntervalTree{UInt8, CosObject}())
357-
imap[Interval(d1[2], d2[2])] = o3
414+
if d1[2] <= d2[2]
415+
imap = get!(cmap.range_map, Interval(d1[1], d2[1]),
416+
IntervalTree{UInt8, CosObject}())
417+
imap[Interval(d1[2], d2[2])] = o3
418+
else
419+
@warn "Corrupt CMap file. Repairing... Some encodings may not map properly."
420+
imap = get!(cmap.range_map, Interval(d1[1], d1[1]),
421+
IntervalTree{UInt8, CosObject}())
422+
imap[Interval(d1[2], 0xff)] = o3
423+
o3 = _offset(o3, 0xff - d1[2] + 1)
424+
425+
if d2[1] - d1[1] > 1
426+
i1, i2 = d1[1]+0x1, d2[1]-0x1
427+
imap = get!(cmap.range_map, Interval(i1, i2),
428+
IntervalTree{UInt8, CosObject}())
429+
imap[Interval(0x00, 0xff)] = o3
430+
o3 = _offset(o3, (d2[1] - d1[1] - 1)*0x100)
431+
end
432+
imap = get!(cmap.range_map, Interval(d2[1], d2[1]),
433+
IntervalTree{UInt8, CosObject}())
434+
imap[Interval(0x00, d2[2])] = o3
435+
end
358436
end
359437
else
360438
l = length(d1)
439+
@assert (d1[1] <= d2[1]) E_INVALID_CODESPACERANGE
361440
if l == 1
362441
cmap.code_space[Interval(d1[1], d2[1])] = CosNull
363442
else
364-
imap = IntervalTree{UInt8, CosNullType}()
365-
imap[Interval(d1[2], d2[2])] = CosNull
366-
cmap.code_space[Interval(d1[1], d2[1])] = imap
443+
if d1[2] <= d2[2]
444+
imap = IntervalTree{UInt8, CosNullType}()
445+
imap[Interval(d1[2], d2[2])] = CosNull
446+
cmap.code_space[Interval(d1[1], d2[1])] = imap
447+
else
448+
@warn "Corrupt CMap file. Repairing... Some encodings may not map properly."
449+
imap = IntervalTree{UInt8, CosNullType}()
450+
imap[Interval(d1[2], 0xff)] = CosNull
451+
cmap.code_space[Interval(d1[1], d1[1])] = imap
452+
453+
imap = get!(cmap.code_space, Interval(d1[1], d1[1]), IntervalTree{UInt8, CosNullType}())
454+
imap[Interval(d1[2], 0xff)] = CosNull
455+
456+
imap = get!(cmap.code_space, Interval(d2[1], d2[1]), IntervalTree{UInt8, CosNullType}())
457+
imap[Interval(0x00, d2[2])] = CosNull
458+
459+
if d2[1] - d1[1] > 1
460+
i1, i2 = d1[1]+0x1, d2[1]-0x1
461+
imap = get!(cmap.code_space, Interval(i1, i2), IntervalTree{UInt8, CosNullType}())
462+
imap[Interval(0x00, 0xff)] = CosNull
463+
end
464+
end
367465
end
368466
end
369467
end

src/Utils.jl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,9 @@ end
8080
const Unicode_to_PDFEncoding = reverse_dict(PDFEncoding_to_Unicode)
8181

8282
function NativeEncodingToUnicode(barr, mapping::Dict)
83-
l = length(barr)
84-
carr = Vector{Char}(undef, l)
85-
for i = 1:l
86-
carr[i] = get(mapping, barr[i], zero(Char))
83+
carr = Vector{Char}()
84+
for b in barr
85+
append!(carr, get(mapping, b, zero(Char)))
8786
end
8887
return carr
8988
end

src/errors.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ export E_EXPECTED_EOF, E_UNEXPECTED_EOF, E_UNEXPECTED_CHAR, E_BAD_KEY,
22
E_BAD_ESCAPE, E_BAD_CONTROL, E_LEADING_ZERO, E_BAD_NUMBER, E_BAD_HEADER,
33
E_BAD_TRAILER, E_BAD_TYPE, E_NOT_IMPLEMENTED,
44
E_INVALID_OBJECT, E_INVALID_PAGE_NUMBER, E_INVALID_PAGE_LABEL,
5-
E_NOT_TAGGED_PDF, E_INVALID_PASSWORD
5+
E_NOT_TAGGED_PDF, E_INVALID_PASSWORD, E_INVALID_CODESPACERANGE
66

77
# The following errors may be thrown by the reader
88
const E_EXPECTED_EOF = "Expected end of input"
@@ -28,3 +28,4 @@ const E_INVALID_PASSWORD = "The password supplied to open the document is inv
2828
const E_INVALID_CRYPT = "The crypt handler is not supported"
2929
const E_NOT_TAGGED_PDF = "PDF file is not tagged"
3030
const E_NOT_IMPLEMENTED = "Not Implemented"
31+
const E_INVALID_CODESPACERANGE = "Invalid code space range in CMap"

0 commit comments

Comments
 (0)