@@ -15,13 +15,28 @@ const endbfrange = b"endbfrange"
15
15
const begincodespacerange = b " begincodespacerange"
16
16
const endcodespacerange = b " endcodespacerange"
17
17
18
+
18
19
mutable struct CMap
19
20
code_space:: IntervalMap{UInt8, Union{CosNullType, IntervalMap{UInt8, CosNullType}}}
20
21
range_map:: IntervalMap{UInt8, Union{CosObject, IntervalMap{UInt8, CosObject}}}
21
22
CMap () = new (IntervalMap {UInt8, Union{CosNullType, IntervalMap{UInt8, CosNullType}}} (),
22
23
IntervalMap {UInt8, Union{CosObject, IntervalMap{UInt8, CosObject}}} ())
23
24
end
24
25
26
+ #=
27
+ mutable struct CMap
28
+ code_space::Array{UInt8,2}
29
+ CMap() = new(zeros(UInt8, 256, 3))
30
+ end
31
+ =#
32
+
33
+ function show (io:: IO , cmap:: CMap )
34
+ show (io, " Code Space:\n " )
35
+ show (io, cmap. code_space)
36
+ show (io, " Range Map:\n " )
37
+ show (io, cmap. range_map)
38
+ end
39
+
25
40
mutable struct FontUnicodeMapping
26
41
encoding:: Dict
27
42
cmap:: CMap
@@ -52,7 +67,7 @@ function merge_encoding!(fum::FontUnicodeMapping, encoding::CosNullType,
52
67
basefont_with_subset = CDTextString (basefont)
53
68
basefont_str = rsplit (basefont_with_subset, ' +' ;limit= 2 )[end ]
54
69
enc = (basefont_str == " Symbol" ) ? SYMEncoding_to_Unicode :
55
- (basefont_str == " ZapfDigbats " ) ? ZAPEncoding_to_Unicode :
70
+ (basefont_str == " ZapfDingbats " ) ? ZAPEncoding_to_Unicode :
56
71
STDEncoding_to_Unicode
57
72
merge! (fum. encoding, enc)
58
73
return fum
86
101
function merge_encoding! (fum:: FontUnicodeMapping , doc:: CosDoc , font:: CosObject )
87
102
encoding = cosDocGetObject (doc, font, cn " Encoding" )
88
103
merge_encoding! (fum, encoding, doc, font)
89
- # toUnicode = cosDocGetObject(doc, font, cn"ToUnicode")
90
- # toUnicode == CosNull && return fum
91
- # merge_encoding!(fum, toUnicode, doc, font)
104
+ toUnicode = cosDocGetObject (doc, font, cn " ToUnicode" )
105
+ toUnicode == CosNull && return fum
106
+ merge_encoding! (fum, toUnicode, doc, font)
92
107
end
93
108
94
109
function merge_encoding! (fum:: FontUnicodeMapping , cmap:: CosIndirectObject{CosStream} ,
95
110
doc:: CosDoc , font:: CosObject )
96
- fum. toUnicode = read_cmap (get (cmap))
111
+ fum. cmap = read_cmap (get (cmap))
112
+ fum. hasCMap = true
97
113
return fum
98
114
end
99
115
@@ -105,18 +121,95 @@ function get_encoded_string(s::CosString, fum::FontUnicodeMapping)
105
121
return String (carr)
106
122
end
107
123
124
+ function get_unicode_chars (b:: UInt8 , itv:: IntervalValue{UInt8, CosObject} )
125
+ f = first (itv)
126
+ l = last (itv)
127
+ v = value (itv)
128
+ if v isa CosXString
129
+ bytes = Vector {UInt8} (v)
130
+ carr = get_unicode_chars (bytes)
131
+ carr[1 ] += (b - f) # Only one char should be generated here
132
+ else
133
+ @assert v isa CosArray
134
+ arr = get (v)
135
+ xstr = arr[b - f + 1 ]
136
+ @assert xstr isa CosXString
137
+ bytes = Vector {UInt8} (xstr)
138
+ carr = get_unicode_chars (bytes)
139
+ end
140
+ return carr
141
+ end
142
+
143
+ function get_unicode_chars (barr:: Vector{UInt8} )
144
+ l = length (barr)
145
+ nb = 0
146
+ retarr = Vector {Char} ()
147
+ while nb < l
148
+ b1 = barr[1 ]
149
+ b2 = barr[2 ]
150
+ nb += 2
151
+ c:: UInt32 = 0
152
+ if 0xD8 <= b1 <= 0xDB
153
+ # UTF-16 Supplementary plane = 4 bytes
154
+ b1 -= 0xD8
155
+ c = b1
156
+ c = (c << 8 ) + b2
157
+ b3 = barr[3 ]
158
+ b4 = barr[4 ]
159
+ nb += 2
160
+ if 0xDC <= b3 <= 0xDF
161
+ b3 -= 0xDC
162
+ c1 = b3
163
+ c1 = (c1 << 8 ) + b4
164
+ c = (c << 10 ) + c1
165
+ c += 0x10000
166
+ end
167
+ else
168
+ c = b1
169
+ c = (c << 8 ) + b2
170
+ end
171
+ push! (retarr, Char (c))
172
+ end
173
+ return retarr
174
+ end
175
+
108
176
# Placeholder only
109
- get_encoded_string (s:: CosString , cmap:: CMap ) = CDTextString (s)
177
+ function get_encoded_string (s:: CosString , cmap:: CMap )
178
+ cs = cmap. code_space
179
+ rm = cmap. range_map
180
+ barr = Vector {UInt8} (s)
181
+ l = length (barr)
182
+ b1 = b2 = 0x0
183
+ carr = Vector {Char} ()
184
+ retarr = Vector {Char} ()
185
+ i = 0
186
+ while i < l
187
+ b1 = barr[i+= 1 ]
188
+ if hasintersection (cs, b1)
189
+ itree = value (collect (intersect (cs, (b1,b1)))[1 ])
190
+ if itree === CosNull
191
+ itv = collect (intersect (rm, (b1,b1)))[1 ]
192
+ carr = get_unicode_chars (b1, itv)
193
+ else
194
+ b2 = barr[i+= 1 ]
195
+ itree1 = value (collect (intersect (rm, (b1,b1)))[1 ])
196
+ itv = collect (intersect (itree1, (b2,b2)))[1 ]
197
+ carr = get_unicode_chars (b2, itv)
198
+ end
199
+ append! (retarr, carr)
200
+ end
201
+ end
202
+ return retarr
203
+ end
110
204
111
205
function cmap_command (b:: Vector{UInt8} )
112
206
b != beginbfchar && b != beginbfrange && b != begincodespacerange && return nothing
113
207
return Symbol (String (b))
114
208
end
115
209
116
- function on_cmap_command (stm:: BufferedInputStream , command:: Symbol ,
210
+ function on_cmap_command! (stm:: BufferedInputStream , command:: Symbol ,
117
211
params:: Vector{CosInt} , cmap:: CMap )
118
212
n = get (pop! (params))
119
- println (n)
120
213
o1, o2, o3 = CosNull, CosNull, CosNull
121
214
for i = 1 : n
122
215
o1 = parse_value (stm)
@@ -128,14 +221,17 @@ function on_cmap_command(stm::BufferedInputStream, command::Symbol,
128
221
if (command != :begincodespacerange )
129
222
o3 = parse_value (stm)
130
223
@assert isa (o3, CosXString) || isa (o3, CosArray)
131
- println (d1)
132
224
l = length (d1)
133
225
if l == 1
134
226
cmap. range_map[(d1[1 ],d2[1 ])] = o3
135
227
else
136
- imap = IntervalMap {UInt8, CosObject} ()
228
+ if hasintersection (cmap. range_map, d1[1 ])
229
+ imap = value (collect (intersect (cmap. range_map, (d1[1 ], d2[1 ])))[1 ])
230
+ else
231
+ imap = IntervalMap {UInt8, CosObject} ()
232
+ cmap. range_map[(d1[1 ],d2[1 ])] = imap
233
+ end
137
234
imap[(d1[2 ], d2[2 ])] = o3
138
- cmap. range_map[(d1[1 ],d2[1 ])] = imap
139
235
end
140
236
else
141
237
l = length (d1)
@@ -148,9 +244,10 @@ function on_cmap_command(stm::BufferedInputStream, command::Symbol,
148
244
end
149
245
end
150
246
end
247
+ return cmap
151
248
end
152
249
153
- on_cmap_command (stm:: BufferedInputStream , command:: CosObject ,
250
+ on_cmap_command! (stm:: BufferedInputStream , command:: CosObject ,
154
251
params:: Vector{CosInt} , cmap:: CMap ) = nothing
155
252
156
253
function read_cmap (stm:: BufferedInputStream )
@@ -162,62 +259,7 @@ function read_cmap(stm::BufferedInputStream)
162
259
push! (params, obj)
163
260
end
164
261
(obj == :beginbfchar || obj == :beginbfrange || obj == :begincodespacerange ) &&
165
- on_cmap_command (stm, obj, params, tcmap)
262
+ on_cmap_command! (stm, obj, params, tcmap)
166
263
end
167
264
return tcmap
168
265
end
169
-
170
- #=
171
- function get_encoded_string(s::CosXString, cmap::CosObject)
172
- cmap_vec = read_cmap(cmap)
173
- hexbytes = get(s)
174
- data = hexbytes |> String |> hex2bytes
175
-
176
- cmap_len = length(cmap_vec)
177
-
178
- for i = 1:cmap_len
179
- nb = cmap_vec[i][1]
180
- end
181
-
182
- for b in data
183
- #if b in
184
- end
185
- state = start(cmap_vec)
186
- nbytes = []
187
- while !done(cmap_vec, state)
188
- (r, state) = next(cmap_vec, state)
189
- isa(r[2], CosInt) && push!(nbytes, Int(r[2]))
190
- end
191
- for r in cmap_vec
192
- if isa(r[1], CosInt)
193
- end
194
- i = 1
195
- len = length(data)
196
- retval = UInt16[]
197
- while i < len
198
- c = parse(UInt16, String(data[i:i+3]), 16)
199
- for r in cmap_range
200
- range = r[1]
201
- if c in range
202
- incr = c - range[1]
203
- v = r[2]
204
- if isa(v, CosXString)
205
- data2 = get(v)
206
- c2 = parse(UInt16, String(data2), 16)
207
- c2 += incr
208
- push!(retval, c2)
209
- elseif isa(v, CosArray)
210
- data2 = get(v)[incr+1]
211
- j = 1
212
- while j < length(data2)
213
- c2 = parse(UInt16, String(data2[j:j+3]), 16)
214
- push!(retval, c2)
215
- j += 4
216
- end
217
- end
218
- end
219
- end
220
- i += 4
221
- end
222
- end
223
- =#
0 commit comments