@@ -45,7 +45,7 @@ function show(io::IO, cmap::CMap)
45
45
end
46
46
47
47
48
- const FontUnicodeMapping = Union{Dict{UInt8, Char}, CMap, Nothing}
48
+ const FontUnicodeMapping = Union{Dict{UInt8, Vector{ Char} }, CMap, Nothing}
49
49
50
50
#=
51
51
mutable struct FontUnicodeMapping
@@ -56,7 +56,56 @@ mutable struct FontUnicodeMapping
56
56
end
57
57
=#
58
58
59
- function merge_encoding! (fum:: Dict{UInt8, Char} , encoding:: CosName ,
59
+ function Base. merge! (fum:: Dict{UInt8, Vector{Char}} , enc:: Dict{UInt8, Char} )
60
+ for (k, v) in enc
61
+ fum[k] = [v]
62
+ end
63
+ end
64
+
65
+ function get_agl_unicode (g:: AbstractString ):: Union{Vector{Char}, Char}
66
+ r = r" u(?'u'[[:xdigit:]]+$)|uni(?'uni'[[:xdigit:]]{4,6}$)"
67
+ m = match (r, g)
68
+ if m != = nothing
69
+ u, uni = m[" u" ], m[" uni" ]
70
+ if u != = nothing
71
+ l = length (u)
72
+ if l > 3 && mod (l, 4 ) == 0
73
+ ret = Char[]
74
+ for i = 1 : 4 : l
75
+ c = parse (UInt16, SubString (u, i, i+ 3 ), base= 16 )
76
+ 0xE000 > c > 0xD7FF && break
77
+ push! (ret, Char (c))
78
+ end
79
+ length (ret)* 4 == l && return (ret)
80
+ end
81
+ else
82
+ c = parse (UInt32, uni, base= 16 )
83
+ 0x0000 <= c <= 0xD7FF && 0xE000 <= c <= 0x10FFFF && return Char (c)
84
+ end
85
+ end
86
+ cg = CosName (g)
87
+ return get (AGL_Glyph_to_Unicode, cg, get (AGLFN_Glyph_to_Unicode, cg, zero (Char)))
88
+ end
89
+
90
+ function get_unicodes_from_glyph_name (s:: String )
91
+ n = split (s, " ." )
92
+ nf = n[1 ]
93
+ isempty (nf) && return [zero (Char)]
94
+ gs = split (nf, " _" )
95
+ u = Char[]
96
+ for g in gs
97
+ append! (u, get_agl_unicode (g))
98
+ end
99
+ return u
100
+ end
101
+
102
+ function merge_agl! (fum:: Dict{UInt8, Vector{Char}} , d:: Dict{UInt8, CosName} )
103
+ for (k, v) in d
104
+ fum[k] = get_unicodes_from_glyph_name (String (v))
105
+ end
106
+ end
107
+
108
+ function merge_encoding! (fum:: Dict{UInt8, Vector{Char}} , encoding:: CosName ,
60
109
doc:: CosDoc , font:: IDDRef{CosDict} )
61
110
encoding_mapping =
62
111
encoding == cn " WinAnsiEncoding" ? WINEncoding_to_Unicode :
@@ -82,10 +131,12 @@ function FontType(subtype::CosName)
82
131
return FontDefType ()
83
132
end
84
133
134
+ # Entry point if someone wants to handle encoding based on subtype
135
+ # By default maps to the default font unicode mapping.
85
136
merge_encoding! (fum:: FontUnicodeMapping , ftype:: FontType ,
86
137
doc:: CosDoc , font:: IDDRef{CosDict} ) = fum
87
138
88
- function merge_encoding! (fum:: Dict{UInt8, Char} ,
139
+ function merge_encoding! (fum:: Dict{UInt8, Vector{ Char} } ,
89
140
ftype:: Union{FontType1, FontMMType1} ,
90
141
doc:: CosDoc , font:: IDDRef{CosDict} )
91
142
basefont = cosDocGetObject (doc, font, cn " BaseFont" )
@@ -104,14 +155,14 @@ end
104
155
# Reading encoding from the font files in case of Symbolic fonts are not
105
156
# supported.
106
157
# Font subset is addressed with font name identification.
107
- function merge_encoding! (fum:: Dict{UInt8, Char} , encoding:: CosNullType ,
158
+ function merge_encoding! (fum:: Dict{UInt8, Vector{ Char} } , encoding:: CosNullType ,
108
159
doc:: CosDoc , font:: IDDRef{CosDict} )
109
160
subtype = cosDocGetObject (doc, font, cn " Subtype" )
110
161
subtype === CosNull && return fum
111
162
return merge_encoding! (fum, FontType (subtype), doc, font)
112
163
end
113
164
114
- function merge_encoding! (fum:: Dict{UInt8, Char} ,
165
+ function merge_encoding! (fum:: Dict{UInt8, Vector{ Char} } ,
115
166
encoding:: IDD{CosDict} ,
116
167
doc:: CosDoc , font:: IDDRef{CosDict} )
117
168
baseenc = cosDocGetObject (doc, encoding, cn " BaseEncoding" )
@@ -133,8 +184,7 @@ function merge_encoding!(fum::Dict{UInt8, Char},
133
184
end
134
185
end
135
186
136
- dict_to_unicode = dict_remap (d, AGL_Glyph_to_Unicode)
137
- merge! (fum, dict_to_unicode)
187
+ merge_agl! (fum, d)
138
188
return fum
139
189
end
140
190
@@ -143,7 +193,7 @@ function get_unicode_mapping(doc::CosDoc, font::IDDRef{CosDict})
143
193
toUnicode != = CosNull &&
144
194
return get_unicode_mapping (toUnicode)
145
195
encoding = cosDocGetObject (doc, font, cn " Encoding" )
146
- d = merge_encoding! (Dict {UInt8, Char} (), encoding, doc, font)
196
+ d = merge_encoding! (Dict {UInt8, Vector{ Char} } (), encoding, doc, font)
147
197
return length (d) == 0 ? nothing : d
148
198
end
149
199
@@ -218,11 +268,11 @@ function get_glyph_id_mapping(cosdoc::CosDoc, cosfont::IDD{CosDict})
218
268
return glyph_name_to_cid, cid_to_glyph_name
219
269
end
220
270
221
- get_encoded_string (s:: CosString , fum:: Union{Dict{UInt8, Char}, CMap} ) =
271
+ get_encoded_string (s:: CosString , fum:: FontUnicodeMapping ) =
222
272
get_encoded_string (Vector {UInt8} (s), fum)
223
273
224
274
function get_encoded_string (v:: Union{Vector{UInt8}, NTuple{N, UInt8}} ,
225
- fum:: Dict{UInt8, Char} ) where N
275
+ fum:: Dict{UInt8, Vector{ Char} } ) where N
226
276
length (v) == 0 && return " "
227
277
return String (NativeEncodingToUnicode (v, fum))
228
278
end
@@ -334,8 +384,17 @@ cmap_command(b::Vector{UInt8}) =
334
384
length (b), b != beginbfchar && b != beginbfrange && b != begincodespacerange ?
335
385
nothing : Symbol (String (b))
336
386
387
+ function _offset (obj:: CosXString , offset)
388
+ da = Vector {UInt8} (obj)
389
+ db = UInt16 (da[1 ]* 256 + da[2 ]+ offset)
390
+ da[1 ], da[2 ] = UInt8 (div (db, 256 )), UInt8 (mod (db, 256 ))
391
+ io = IOBuffer ()
392
+ bytes2hex (io, da)
393
+ return CosXString (take! (io))
394
+ end
395
+
337
396
function on_cmap_command! (stm:: IO , command:: Symbol ,
338
- params:: Vector{CosInt} , cmap:: CMap )
397
+ params:: Vector{CosInt} , cmap:: CMap )
339
398
n = get (pop! (params))
340
399
o1, o2, o3 = CosNull, CosNull, CosNull
341
400
for i = 1 : n
@@ -352,18 +411,57 @@ function on_cmap_command!(stm::IO, command::Symbol,
352
411
if l == 1
353
412
cmap. range_map[Interval (d1[1 ], d2[1 ])] = o3
354
413
else
355
- imap = get! (cmap. range_map, Interval (d1[1 ], d2[1 ]),
356
- IntervalTree {UInt8, CosObject} ())
357
- imap[Interval (d1[2 ], d2[2 ])] = o3
414
+ if d1[2 ] <= d2[2 ]
415
+ imap = get! (cmap. range_map, Interval (d1[1 ], d2[1 ]),
416
+ IntervalTree {UInt8, CosObject} ())
417
+ imap[Interval (d1[2 ], d2[2 ])] = o3
418
+ else
419
+ @warn " Corrupt CMap file. Repairing... Some encodings may not map properly."
420
+ imap = get! (cmap. range_map, Interval (d1[1 ], d1[1 ]),
421
+ IntervalTree {UInt8, CosObject} ())
422
+ imap[Interval (d1[2 ], 0xff )] = o3
423
+ o3 = _offset (o3, 0xff - d1[2 ] + 1 )
424
+
425
+ if d2[1 ] - d1[1 ] > 1
426
+ i1, i2 = d1[1 ]+ 0x1 , d2[1 ]- 0x1
427
+ imap = get! (cmap. range_map, Interval (i1, i2),
428
+ IntervalTree {UInt8, CosObject} ())
429
+ imap[Interval (0x00 , 0xff )] = o3
430
+ o3 = _offset (o3, (d2[1 ] - d1[1 ] - 1 )* 0x100 )
431
+ end
432
+ imap = get! (cmap. range_map, Interval (d2[1 ], d2[1 ]),
433
+ IntervalTree {UInt8, CosObject} ())
434
+ imap[Interval (0x00 , d2[2 ])] = o3
435
+ end
358
436
end
359
437
else
360
438
l = length (d1)
439
+ @assert (d1[1 ] <= d2[1 ]) E_INVALID_CODESPACERANGE
361
440
if l == 1
362
441
cmap. code_space[Interval (d1[1 ], d2[1 ])] = CosNull
363
442
else
364
- imap = IntervalTree {UInt8, CosNullType} ()
365
- imap[Interval (d1[2 ], d2[2 ])] = CosNull
366
- cmap. code_space[Interval (d1[1 ], d2[1 ])] = imap
443
+ if d1[2 ] <= d2[2 ]
444
+ imap = IntervalTree {UInt8, CosNullType} ()
445
+ imap[Interval (d1[2 ], d2[2 ])] = CosNull
446
+ cmap. code_space[Interval (d1[1 ], d2[1 ])] = imap
447
+ else
448
+ @warn " Corrupt CMap file. Repairing... Some encodings may not map properly."
449
+ imap = IntervalTree {UInt8, CosNullType} ()
450
+ imap[Interval (d1[2 ], 0xff )] = CosNull
451
+ cmap. code_space[Interval (d1[1 ], d1[1 ])] = imap
452
+
453
+ imap = get! (cmap. code_space, Interval (d1[1 ], d1[1 ]), IntervalTree {UInt8, CosNullType} ())
454
+ imap[Interval (d1[2 ], 0xff )] = CosNull
455
+
456
+ imap = get! (cmap. code_space, Interval (d2[1 ], d2[1 ]), IntervalTree {UInt8, CosNullType} ())
457
+ imap[Interval (0x00 , d2[2 ])] = CosNull
458
+
459
+ if d2[1 ] - d1[1 ] > 1
460
+ i1, i2 = d1[1 ]+ 0x1 , d2[1 ]- 0x1
461
+ imap = get! (cmap. code_space, Interval (i1, i2), IntervalTree {UInt8, CosNullType} ())
462
+ imap[Interval (0x00 , 0xff )] = CosNull
463
+ end
464
+ end
367
465
end
368
466
end
369
467
end
0 commit comments