Skip to content

Commit f98d449

Browse files
committed
PDFs generated out of Google Docs create CID Widths with float unlike most cases
where integer is used as widths.
1 parent 6c6460e commit f98d449

File tree

3 files changed

+29
-12
lines changed

3 files changed

+29
-12
lines changed

src/PDFontMetrics.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,15 +179,15 @@ end
179179
get_font_widths(basefonts::CosName) = read_afm(convert(CDTextString, basefonts))
180180

181181
function get_cid_font_widths(cosDoc::CosDoc, font::IDDRef{CosDict})
182-
m = IntervalTree{UInt16, Int}()
182+
m = IntervalTree{UInt16, Float32}()
183183
encoding = cosDocGetObject(cosDoc, font, cn"Encoding")
184184
desc = cosDocGetObject(cosDoc, font, cn"DescendantFonts") |> get
185185
w = cosDocGetObject(cosDoc, desc[1], cn"W")
186186
dw = cosDocGetObject(cosDoc, desc[1], cn"DW")
187187
# If widths are not specified or the font encoding is not Identity-H
188188
# widths cannot be extracted.
189189
if w === CosNull || encoding != cn"Identity-H"
190-
return (dw === CosNull) ? CIDWidth() : CIDWidth(get(dw))
190+
return (dw === CosNull) ? CIDWidth() : CIDWidth(Float32(get(dw)))
191191
end
192192
w = get(w)
193193
next = iterate(w)
@@ -201,16 +201,16 @@ function get_cid_font_widths(cosDoc::CosDoc, font::IDDRef{CosDict})
201201
if ecid isa Vector
202202
for wdo in ecid
203203
width = get(wdo)
204-
m[Interval(UInt16(ccid), UInt16(ccid))] = width
204+
m[Interval(UInt16(ccid), UInt16(ccid))] = Float32(width)
205205
ccid += 1
206206
end
207207
else
208208
(width, state) = iterate(w, state)
209-
m[Interval(UInt16(bcid), UInt16(ecid))] = get(width)
209+
m[Interval(UInt16(bcid), UInt16(ecid))] = Float32(get(width))
210210
end
211211
next = iterate(w, state)
212212
end
213-
return (dw === CosNull) ? CIDWidth(m) : CIDWidth(m, get(dw))
213+
return (dw === CosNull) ? CIDWidth(m) : CIDWidth(m, Float32(get(dw)))
214214
end
215215

216216
get_character_width(n::CosName, afm::AdobeFontMetrics) =

src/PDFonts.jl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -389,14 +389,13 @@ function read_cmap(stm::IO)
389389
end
390390

391391
struct CIDWidth
392-
imap::IntervalTree{UInt16, Int}
393-
dw::Int
394-
CIDWidth(m::IntervalTree{UInt16, Int}, tdw::Int) = new(m, tdw)
392+
imap::IntervalTree{UInt16, Float32}
393+
dw::Float32
395394
end
396395

397-
CIDWidth(m::IntervalTree{UInt16, Int}) = CIDWidth(m, 1000)
398-
CIDWidth(tdw::Int) = CIDWidth(IntervalTree{UInt16, Int}(), tdw)
399-
CIDWidth() = CIDWidth(1000)
396+
CIDWidth(m::IntervalTree{UInt16, Float32}) = CIDWidth(m, 1000f0)
397+
CIDWidth(tdw::Float32) = CIDWidth(IntervalTree{UInt16, Float32}(), tdw)
398+
CIDWidth() = CIDWidth(1000f0)
400399

401400
mutable struct PDFont
402401
doc::PDDoc

test/runtests.jl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ using PDFIO.Common: read_pkcs12
1313

1414
include("debugIO.jl")
1515

16-
pdftest_ver = "0.0.6"
16+
pdftest_ver = "0.0.7"
1717
pdftest_link = "https://github.com/sambitdash/PDFTest/archive/v"*pdftest_ver
1818

1919
zipfile = "pdftest-"*pdftest_ver
@@ -364,6 +364,24 @@ end
364364
end
365365
end
366366

367+
@testset "Floating point CIDWidth" begin
368+
@test begin
369+
filename="sample-google-doc.pdf"
370+
DEBUG && println(filename)
371+
resfile, template, filename = local_testfiles(filename)
372+
doc = pdDocOpen(filename)
373+
io = util_open(resfile, "w")
374+
try
375+
extract_text(io, doc)
376+
finally
377+
util_close(io)
378+
pdDocClose(doc)
379+
end
380+
@test files_equal(resfile, template)
381+
length(utilPrintOpenFiles()) == 0
382+
end
383+
end
384+
367385
@testset "Non-standard CMap" begin
368386
@test begin
369387
filename="16-969_o7jp.pdf"

0 commit comments

Comments
 (0)