@@ -9,7 +9,7 @@ export PDPageObject,
9
9
PDPage_EndGroup
10
10
11
11
using BufferedStreams
12
- import Base: show
12
+ import Base: show, isless
13
13
14
14
"""
15
15
```
@@ -51,15 +51,15 @@ abstract type PDPageObject end
51
51
A representation of a content object with operator and operand. See [`PDPageObject`](@ref)
52
52
for more details.
53
53
"""
54
- mutable struct PDPageElement <: PDPageObject
54
+ mutable struct PDPageElement{S} <: PDPageObject
55
55
t:: Symbol
56
56
version:: Tuple{Int,Int}
57
57
noperand:: Int
58
58
operands:: Vector{CosObject}
59
59
end
60
60
61
61
PDPageElement (ts:: AbstractString ,ver:: Tuple{Int,Int} ,nop:: Int = 0 )=
62
- PDPageElement (Symbol (ts),ver,nop,Vector {CosObject} ())
62
+ PDPageElement {Symbol(ts)} (Symbol (ts),ver,nop,Vector {CosObject} ())
63
63
64
64
function show (io:: IO , e:: PDPageElement )
65
65
for op in e. operands
@@ -219,8 +219,6 @@ function collect_object(grp::PDPageObjectGroup, tr::PDPageTextRun,
219
219
return tr
220
220
end
221
221
222
-
223
-
224
222
function collect_inline_image (img:: PDPageInlineImage , name:: CosName ,
225
223
bis:: BufferedInputStream )
226
224
value = parse_value (bis, get_pdfcontentops)
@@ -231,13 +229,13 @@ function collect_inline_image(img::PDPageInlineImage, elem::PDPageElement,
231
229
bis:: BufferedInputStream )
232
230
if (elem. t == Symbol (" ID" ))
233
231
while (! img. isRead && ! eof (bis))
234
- b1 = peek (bis)
232
+ b1 = BufferedStreams . peek (bis)
235
233
if (b1 == LATIN_UPPER_E)
236
234
mark (bis)
237
235
skip (bis,1 );
238
- b2 = peek (bis)
236
+ b2 = BufferedStreams . peek (bis)
239
237
if (b2 == LATIN_UPPER_I)
240
- skip (bis,1 );b3 = peek (bis)
238
+ skip (bis,1 );b3 = BufferedStreams . peek (bis)
241
239
if (ispdfspace (b3))
242
240
skip (bis,1 )
243
241
img. isRead= true
@@ -458,70 +456,236 @@ const PD_CONTENT_OPERATORS = Dict(
458
456
function get_pdfcontentops (b:: Vector{UInt8} )
459
457
arr = get (PD_CONTENT_OPERATORS, String (b), CosNull)
460
458
(arr == CosNull) && return CosNull
461
- return eval (Expr (:call ,arr... ))
459
+ return eval (Expr (:call , arr... ))
460
+ end
461
+
462
+ struct TextLayout
463
+ a:: Float32
464
+ b:: Float32
465
+ c:: Float32
466
+ d:: Float32
467
+ x:: Float32
468
+ y:: Float32
469
+ text:: String
470
+ end
471
+
472
+ function isless (tl1:: TextLayout , tl2:: TextLayout )
473
+ dy = tl1. y - tl2. y
474
+ dx = tl1. x - tl2. x
475
+ ytol = tl1. d/ 2
476
+
477
+ dy < - ytol && return true
478
+ dy > ytol && return false
479
+ return dx > 0
480
+ end
481
+
482
+ using DataStructures
483
+
484
+ function init_graphics_state ()
485
+ state = Vector {Dict} ()
486
+ push! (state, Dict ())
487
+
488
+ state[end ][:text_layout ] = mutable_binary_maxheap (TextLayout)
489
+
490
+ # Graphics state
491
+ state[end ][:CTM ] = eye (3 )
492
+
493
+ # Text states
494
+ state[end ][:Tc ] = 0.0
495
+ state[end ][:Tw ] = 0.0
496
+ state[end ][:Tz ] = 100.0
497
+ state[end ][:TL ] = 0.0
498
+ state[end ][:Tr ] = 0
499
+ state[end ][:Ts ] = 0.0
500
+ return state
501
+ end
502
+
503
+ function show_text_layout! (io:: IO , state:: Vector{Dict} )
504
+ heap = state[end ][:text_layout ]
505
+ x = 0.0
506
+ y = - 1.0
507
+ while (! isempty (heap))
508
+ tlayout = pop! (heap)
509
+ # @printf "%f,%f,%f,%f,%f,%f,%s\n" tlayout.a tlayout.b tlayout.c tlayout.d tlayout.x tlayout.y tlayout.text
510
+
511
+ # Horizontal Text
512
+ if abs (tlayout. b) < 0.001 && abs (tlayout. c) < 0.001
513
+ w = abs (tlayout. a)
514
+ h = abs (tlayout. d)
515
+ # Vertical or any other angle text
516
+ else
517
+ w = h = sqrt (tlayout. a* tlayout. d - tlayout. b* tlayout. c)
518
+ y = - 1.0 # Reset the old positions.
519
+ end
520
+ @assert w > 0.1
521
+ @assert h > 0.1
522
+ while (y > tlayout. y + h)
523
+ print (io, ' \n ' )
524
+ y -= h
525
+ x = 0.0
526
+ end
527
+ y = tlayout. y
528
+ if (x > tlayout. x)
529
+ x = tlayout. x
530
+ end
531
+ while x < tlayout. x
532
+ print (io, ' ' )
533
+ x += w
534
+ end
535
+ len = length (tlayout. text)
536
+ print (io, tlayout. text)
537
+ x += w* len
538
+ end
462
539
end
463
540
464
- function showtext (io :: IO , grp:: PDPageObjectGroup , state:: Vector{Dict} = Vector {Dict} ())
541
+ function evalContent! ( grp:: PDPageObjectGroup , state:: Vector{Dict} = Vector {Dict} ())
465
542
for obj in grp. objs
466
- showtext (io, obj, state)
543
+ evalContent! ( obj, state)
467
544
end
468
- return io
545
+ return state
469
546
end
470
547
471
- function showtext (io:: IO , tr:: PDPageTextRun , state:: Vector{Dict} = Vector {Dict} ())
548
+ function evalContent! (tr:: PDPageTextRun , state:: Vector{Dict} = Vector {Dict} ())
549
+ evalContent! (tr. elem, state)
550
+ tfs = get (state[end ], :fontsize , 0 )
551
+
552
+ th = state[end ][:Tz ]/ 100.0
553
+ ts = state[end ][:Ts ]
554
+
555
+ tsm = tfs == 0 ? eye (3 ) : [tfs* th 0.0 0.0 ; 0.0 tfs 0.0 ; 0.0 ts 1.0 ]
556
+
557
+ tm = state[end ][:Tm ]
558
+ ctm = state[end ][:CTM ]
559
+ trm = tsm* tm* ctm
560
+
472
561
fontname, font = get (state[end ], :font , (CosNull, CosNull))
473
562
page = get (state[end ], :page , CosNull)
474
- (tr . elem . t == Symbol ( " \' " ) || tr . elem . t == Symbol ( " \" " )) && print (io, " \n " )
563
+ text = " "
475
564
for s in tr. ss
476
- text = String (get_encoded_string (s, fontname, page))
477
- write (io, text)
565
+ text *= String (get_encoded_string (s, fontname, page))
478
566
end
479
- return io
567
+
568
+ a = trm[1 , 1 ]
569
+ b = trm[1 , 2 ]
570
+ c = trm[2 , 1 ]
571
+ d = trm[2 , 2 ]
572
+ e = trm[3 , 1 ]
573
+ f = trm[3 , 2 ]
574
+
575
+ heap = state[end ][:text_layout ]
576
+ if ! get (state[end ], :in_artifact , false )
577
+ push! (heap, TextLayout (a, b, c, d, e, f, text))
578
+ end
579
+ return state
480
580
end
481
581
482
- showtext (io:: IO , pdo:: PDPageTextObject , state:: Vector{Dict} = Vector {Dict} ()) =
483
- showtext (io, pdo. group, state)
582
+ function evalContent! (pdo:: PDPageTextObject , state:: Vector{Dict} = Vector {Dict} ())
583
+ state[end ][:Tm ] = eye (3 )
584
+ state[end ][:Tlm ] = eye (3 )
585
+ state[end ][:Trm ] = eye (3 )
586
+ evalContent! (pdo. group, state)
587
+ delete! (state[end ], :Tm )
588
+ delete! (state[end ], :Tlm )
589
+ delete! (state[end ], :Trm )
590
+ return state
591
+ end
484
592
485
- function showtext (io :: IO , pdo:: PDPageMarkedContent , state:: Vector{Dict} )
593
+ function evalContent! ( pdo:: PDPageMarkedContent , state:: Vector{Dict} )
486
594
tag = pdo. group. objs[1 ]. operands[1 ] # can be used for XML tagging.
487
- tag == cn " Artifact" && return io # Do not print Artifact types
488
- return showtext (io, pdo. group, state)
595
+ if tag == cn " Artifact"
596
+ state[end ][:in_artifact ] = true
597
+ evalContent! (pdo. group, state)
598
+ delete! (state[end ], :in_artifact )
599
+ return state
600
+ end
601
+ return evalContent! (pdo. group, state)
602
+ end
603
+
604
+ evalContent! (pdo:: PDPageElement{S} , state:: Vector{Dict} ) where S = state
605
+
606
+ function evalContent! (pdo:: PDPageElement{:q} , state:: Vector{Dict} )
607
+ push! (state, copy (state[end ]))
608
+ return state
609
+ end
610
+
611
+ function evalContent! (pdo:: PDPageElement{:Q} , state:: Vector{Dict} )
612
+ pop! (state)
613
+ return state
614
+ end
615
+
616
+ function evalContent! (pdo:: PDPageElement{:Tm} , state:: Vector{Dict} )
617
+ a = get (pdo. operands[1 ])
618
+ b = get (pdo. operands[2 ])
619
+ c = get (pdo. operands[3 ])
620
+ d = get (pdo. operands[4 ])
621
+ e = get (pdo. operands[5 ])
622
+ f = get (pdo. operands[6 ])
623
+ tm = [a b 0.0 ; c d 0.0 ; e f 1.0 ]
624
+ tlm = [a b 0.0 ; c d 0.0 ; e f 1.0 ]
625
+ state[end ][:Tm ] = tm
626
+ state[end ][:Tlm ] = tlm
627
+ return state
489
628
end
490
629
491
- function showtext (io :: IO , pdo:: PDPageElement , state:: Vector{Dict} = Vector {Dict} () )
630
+ function evalContent! ( pdo:: PDPageElement{:Tf} , state:: Vector{Dict} )
492
631
page = get (state[end ], :page , CosNull)
493
- page === CosNull && return io
494
- if pdo. t == :q
495
- push! (state, copy (state[end ]))
496
- return io
497
- elseif pdo. t == :Q
498
- pop! (state)
499
- return io
500
- end
501
- pdo. t == Symbol (" T*" ) && return print (io, " \n " )
502
- (pdo. t == :Td || pdo. t == :TD ) && get (pdo. operands[2 ]) > 0 &&
503
- return print (io, " \n " )
504
- # If the previous text matrix was at a value higher than the current in y-axis
505
- # by 1-unit enter a line-break.
506
- if pdo. t == :Tm
507
- nyloc = get (pdo. operands[6 ])
508
- oyloc = - 10000
509
- yloc = get (state[end ], :yloc , Vector {Float32} ())
510
- if length (yloc) != 0
511
- oyloc = yloc[end ]
512
- end
513
- if (nyloc < oyloc - 1 ); print (io, ' \n ' ); end
514
- push! (yloc, nyloc)
515
- return io
516
- end
517
- pdo. t != :Tf && return io
632
+ page === CosNull && return state
518
633
fontname = pdo. operands[1 ]
519
634
font = page_find_font (page, fontname)
520
- font === CosNull && return io
635
+ font === CosNull && return state
521
636
state[end ][:font ] = (fontname, font)
522
- return io
637
+ fontsize = get (pdo. operands[2 ])
638
+ state[end ][:fontsize ] = fontsize
639
+ return state
640
+ end
641
+
642
+ for op in [" Tc" , " Tw" , " Tz" , " TL" , " Tr" , " Ts" ]
643
+ @eval evalContent! (pdo:: PDPageElement{Symbol($op)} , state:: Vector{Dict} ) =
644
+ (state[end ][Symbol ($ op)] = get (pdo. operands[1 ]); state)
645
+ end
646
+
647
+ function set_text_pos! (tx, ty, state:: Vector{Dict} )
648
+ tmul = [1.0 0.0 0.0 ; 0.0 1.0 0.0 ; tx ty 1.0 ]
649
+ tlm = state[end ][:Tlm ]
650
+ tm = tlm = tmul* tlm
651
+
652
+ state[end ][:Tm ] = tm
653
+ state[end ][:Tlm ] = tlm
654
+ return state
655
+ end
656
+
657
+ function offset_text_leading! (state:: Vector{Dict} )
658
+ tl = state[end ][:TL ]
659
+ return set_text_pos! (0 , - tl, state)
660
+ end
661
+
662
+ function evalContent! (pdo:: PDPageElement{:TD} , state:: Vector{Dict} )
663
+ tx = get (pdo. operands[1 ])
664
+ ty = get (pdo. operands[2 ])
665
+
666
+ state[end ][:TL ] = - ty
667
+ set_text_pos! (tx, ty, state)
668
+ end
669
+
670
+ function evalContent! (pdo:: PDPageElement{:Td} , state:: Vector{Dict} )
671
+ tx = get (pdo. operands[1 ])
672
+ ty = get (pdo. operands[2 ])
673
+
674
+ set_text_pos! (tx, ty, state)
675
+ end
676
+
677
+ evalContent! (pdo:: PDPageElement{Symbol("T*")} , state:: Vector{Dict} ) =
678
+ offset_text_leading! (state)
679
+
680
+ evalContent! (pdo:: PDPageElement{Symbol("\'")} , state:: Vector{Dict} ) =
681
+ offset_text_leading! (state)
682
+
683
+ function evalContent! (pdo:: PDPageElement{Symbol("\"")} , state:: Vector{Dict} )
684
+ state[end ][:Tw ] = get (pdo. operands[1 ])
685
+ state[end ][:Tc ] = get (pdo. operands[2 ])
686
+ offset_text_leading! (state)
523
687
end
524
688
525
- showtext (io :: IO , pdo:: PDPageInlineImage , state:: Vector{Dict} = Vector {Dict} ()) = io
689
+ evalContent! ( pdo:: PDPageInlineImage , state:: Vector{Dict} = Vector {Dict} ()) = state
526
690
527
- showtext (io :: IO , pdo:: CosObject , state:: Vector{Dict} = Vector {Dict} ()) = io
691
+ evalContent! ( pdo:: CosObject , state:: Vector{Dict} = Vector {Dict} ()) = state
0 commit comments