Skip to content

Commit 82f4e00

Browse files
committed
PageLabel support
Also includes lots of style fixes. Common dataobjects used etc.
1 parent e855efc commit 82f4e00

File tree

12 files changed

+722
-437
lines changed

12 files changed

+722
-437
lines changed

REQUIRE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ Libz 0.2
55
StringEncodings
66
TimeZones
77
Documenter
8+
LabelNumerals

src/BufferParser.jl

Lines changed: 22 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,28 @@
11
using BufferedStreams
22

3-
export skipv,
4-
advance!,
5-
locate_keyword!,
6-
chomp_space!,
7-
chomp_eol!
3+
export skipv,
4+
advance!,
5+
locate_keyword!,
6+
chomp_space!,
7+
chomp_eol!
88

9-
@inline function chomp_space!(ps::BufferedInputStream)
10-
while !eof(ps) && ispdfspace(peek(ps))
11-
skip(ps,1)
12-
end
13-
end
9+
@inline chomp_space!(ps::BufferedInputStream) =
10+
while !eof(ps) && ispdfspace(peek(ps)) skip(ps,1) end
1411

15-
@inline function chomp_eol!(ps::BufferedInputStream)
16-
@inbounds while !eof(ps) && is_crorlf(peek(ps))
17-
skip(ps,1)
18-
end
19-
end
12+
@inline chomp_eol!(ps::BufferedInputStream) =
13+
@inbounds while !eof(ps) && is_crorlf(peek(ps)) skip(ps,1) end
2014

21-
function skipv(ps::BufferedInputStream, c::UInt8)
22-
if !eof(ps) && (peek(ps) == c)
23-
skip(ps,1)
24-
else
25-
error("Expected '$(Char(c))' here")
26-
end
15+
@inline function skipv(ps::BufferedInputStream, c::UInt8)
16+
ch = 0xff
17+
!eof(ps) && ((ch = peek(ps)) == c) && return skip(ps,1)
18+
error("Found '$(UInt8(ch))' Expected '$(Char(c))' here")
2719
end
2820

29-
function skipv(ps::BufferedInputStream, cs::UInt8...)
30-
for c in cs
31-
skipv(ps, c)
32-
end
33-
end
21+
@inline skipv(ps::BufferedInputStream, cs::UInt8...) = for c in cs skipv(ps, c) end
3422

35-
function skipv(ps::BufferedInputStream, cs::Array{UInt8,1})
36-
for c in cs
37-
skipv(ps, c)
38-
end
39-
end
23+
@inline skipv(ps::BufferedInputStream, cs::Vector{UInt8}) = for c in cs skipv(ps, c) end
4024

41-
@inline advance!(ps::BufferedInputStream)=read(ps,UInt8)
25+
@inline advance!(ps::BufferedInputStream) = read(ps,UInt8)
4226

4327
function kmp_preprocess(P)
4428
m = length(P)
@@ -82,15 +66,15 @@ function locate_keyword!(ps::BufferedInputStream, keyword, maxoffset=length(keyw
8266
break
8367
end
8468
if (offset >= maxoffset)
85-
break
69+
break
8670
end
8771
end
8872
if found
89-
unmark(ps)
90-
return (offset - length(keyword))
73+
unmark(ps)
74+
return (offset - length(keyword))
9175
else
92-
reset(ps)
93-
peek(ps)
94-
return -1
76+
reset(ps)
77+
peek(ps)
78+
return -1
9579
end
9680
end

src/CDObject.jl

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,49 @@
11
export CDTextString, CDDate, CDRect
22

3+
"""
4+
'''
5+
CDTextString
6+
'''
7+
PDF file format structure provides two primary string types. Hexadecimal string `CosXString`
8+
and literal string `CosLiteralString`. However, these are mere binary representation of
9+
string types without having any encoding associated for semantic representation.
10+
Determination of encoding is carried out mostly by associated fonts and character maps in
11+
the content stream. There are also strings used in descriptions and other attributes of a
12+
PDF file where no font or mapping information is provided. This represents the string type
13+
in such situations. Typically, strings in PDFs are 3 types.
14+
15+
1. Text string
16+
a. PDDocEncoded string - Similar to ISO_8859-1
17+
b. UTF-16BE strings
18+
2. ASCII string
19+
3. Byte string - Pure binary data no interpretation
20+
21+
1 and 2 can be represented by the `CDTextString`. `convert` methods are provided to
22+
translate the `CosString` to `CDTextString`
23+
"""
324
const CDTextString = String
425

526
using TimeZones
627

728
"""
8-
PDF files support the string format: (D:YYYYMMDDHHmmSSOHH'mm)
29+
```
30+
CDDate
31+
```
32+
Internally represented as string objects, these are timezone enabled date and time objects.
933
34+
PDF files support the string format: (D:YYYYMMDDHHmmSSOHH'mm)
1035
"""
1136
struct CDDate
1237
d::ZonedDateTime
1338
CDDate(d::ZonedDateTime) = new(d)
1439
end
1540

41+
"""
42+
```
43+
CDDate
44+
```
45+
PDF files support the string format: (D:YYYYMMDDHHmmSSOHH'mm)
46+
"""
1647
function CDDate(s::CDTextString)
1748
s = ascii(s)
1849
if startswith(s, "D:")
@@ -30,6 +61,12 @@ end
3061

3162
Base.show(io::IO, dt::CDDate) = show(io, dt.d)
3263

64+
"""
65+
```
66+
CDRect
67+
```
68+
An `CosArray` representation of a rectangle in the lower left and upper right point format
69+
"""
3370
struct CDRect{T <: Number}
3471
llx::T
3572
lly::T

0 commit comments

Comments
 (0)