@@ -457,48 +457,69 @@ const PD_CONTENT_OPERATORS = Dict(
457
457
458
458
function get_pdfcontentops (b:: Vector{UInt8} )
459
459
arr = get (PD_CONTENT_OPERATORS, String (b), CosNull)
460
- if (arr == CosNull)
461
- return CosNull
462
- else
463
- return eval (Expr (:call ,arr... ))
464
- end
460
+ (arr == CosNull) && return CosNull
461
+ return eval (Expr (:call ,arr... ))
465
462
end
466
463
467
- function showtext (io:: IO , grp:: PDPageObjectGroup , state:: Dict = Dict ())
464
+ function showtext (io:: IO , grp:: PDPageObjectGroup , state:: Vector{ Dict} = Vector { Dict} ())
468
465
for obj in grp. objs
469
466
showtext (io, obj, state)
470
467
end
471
468
return io
472
469
end
473
470
474
- function showtext (io:: IO , tr:: PDPageTextRun , state:: Dict = Dict ())
475
- fontname, font = get (state, :font , (CosNull, CosNull))
476
- page = get (state, :page , CosNull)
471
+ function showtext (io:: IO , tr:: PDPageTextRun , state:: Vector{Dict} = Vector {Dict} ())
472
+ fontname, font = get (state[end ], :font , (CosNull, CosNull))
473
+ page = get (state[end ], :page , CosNull)
474
+ (tr. elem. t == Symbol (" \' " ) || tr. elem. t == Symbol (" \" " )) && print (io, " LF\n " )
477
475
for s in tr. ss
478
- text = get_encoded_string (s, fontname, page)
476
+ text = String ( get_encoded_string (s, fontname, page) )
479
477
write (io, text)
480
478
end
481
479
return io
482
480
end
483
481
484
- showtext (io:: IO , pdo:: PDPageTextObject , state:: Dict = Dict ()) = showtext (io, pdo. group, state)
482
+ showtext (io:: IO , pdo:: PDPageTextObject , state:: Vector{Dict} = Vector {Dict} ()) =
483
+ showtext (io, pdo. group, state)
485
484
486
- function showtext (io:: IO , pdo:: PDPageMarkedContent , state:: Dict )
485
+ function showtext (io:: IO , pdo:: PDPageMarkedContent , state:: Vector{ Dict} )
487
486
tag = pdo. group. objs[1 ]. operands[1 ] # can be used for XML tagging.
488
- showtext (io, pdo. group, state)
489
- print (io, ' \n ' )
490
- return io
487
+ tag == cn " Artifact" && return io # Do not print Artifact types
488
+ return showtext (io, pdo. group, state)
491
489
end
492
490
493
- function showtext (io:: IO , pdo:: PDPageElement , state:: Dict = Dict ())
494
- page = get (state, :page , CosName )
491
+ function showtext (io:: IO , pdo:: PDPageElement , state:: Vector{ Dict} = Vector { Dict} ())
492
+ page = get (state[ end ] , :page , CosNull )
495
493
page === CosNull && return io
494
+ if pdo. t == :q
495
+ push! (state, copy (state[end ]))
496
+ return io
497
+ elseif pdo. t == :Q
498
+ pop! (state)
499
+ return io
500
+ end
501
+ pdo. t == Symbol (" T*" ) && return print (io, " \n " )
502
+ (pdo. t == :Td || pdo. t == :TD ) && get (pdo. operands[2 ]) > 0 &&
503
+ return print (io, " \n " )
504
+ # If the previous text matrix was at a value higher than the current in y-axis
505
+ # by 1-unit enter a line-break.
506
+ if pdo. t == :Tm
507
+ nyloc = get (pdo. operands[6 ])
508
+ oyloc = - 10000
509
+ yloc = get (state[end ], :yloc , Vector {Float32} ())
510
+ if length (yloc) != 0
511
+ oyloc = yloc[end ]
512
+ end
513
+ if (nyloc < oyloc - 1 ); print (io, ' \n ' ); end
514
+ push! (yloc, nyloc)
515
+ return io
516
+ end
496
517
pdo. t != :Tf && return io
497
518
fontname = pdo. operands[1 ]
498
519
font = page_find_font (page, fontname)
499
520
font === CosNull && return io
500
- state[:font ] = (fontname, font)
521
+ state[end ][ :font ] = (fontname, font)
501
522
return io
502
523
end
503
524
504
- showtext (io:: IO , pdo:: CosObject , state:: Dict = Dict ()) = (show (io, pdo); io)
525
+ showtext (io:: IO , pdo:: CosObject , state:: Vector{ Dict} = Vector { Dict} ()) = (show (io, pdo); io)
0 commit comments