Skip to content

Commit 1a2137c

Browse files
authored
Another approach to remove the type stability (#105)
To be reviewed along with #104. Pre-create the objects and `deepcopy` when needed.
1 parent 3d31927 commit 1a2137c

File tree

1 file changed

+84
-85
lines changed

1 file changed

+84
-85
lines changed

src/PDPageElement.jl

Lines changed: 84 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -403,98 +403,97 @@ end
403403
|MP||(PDF 1.2) Define marked-content point|320|
404404
=#
405405
const PD_CONTENT_OPERATORS = Dict(
406-
"\'"=>[PDPageTextRun,"\'",(1,0),1],
407-
"\""=>[PDPageTextRun,"\"",(1,0),3],
408-
"b"=>[PDPageElement,"b",(1,0),0],
409-
"b*"=>[PDPageElement,"b*",(1,0),0],
410-
"B"=>[PDPageElement,"B",(1,0),0],
411-
"B*"=>[PDPageElement,"B*",(1,0),0],
412-
"BDC"=>[PDPage_BeginGroup,"BDC",(1,2),2,PDPageMarkedContent],
413-
"BI"=>[PDPage_BeginInlineImage,"BI",(1,0),0,PDPageInlineImage],
414-
"BMC"=>[PDPage_BeginGroup,"BMC",(1,2),1,PDPageMarkedContent],
415-
"BT"=>[PDPage_BeginGroup,"BT",(1,0),0,PDPageTextObject],
416-
"BX"=>[PDPageElement,"BX",(1,1),0],
417-
"c"=>[PDPageElement,"c",(1,0),6],
418-
"cm"=>[PDPageElement,"cm",(1,0),6],
419-
"cs"=>[PDPageElement,"cs",(1,1),1],
420-
"CS"=>[PDPageElement,"CS",(1,1),1],
421-
"d"=>[PDPageElement,"d",(1,0),2],
422-
"d0"=>[PDPageElement,"d0",(1,0),2],
423-
"d1"=>[PDPageElement,"d1",(1,0),6],
424-
"Do"=>[PDPageElement,"Do",(1,0),1],
425-
"DP"=>[PDPageElement,"DP",(1,2),0],
426-
"EI"=>[PDPageElement,"EI",(1,0),0],
427-
"EMC"=>[PDPage_EndGroup,"EMC",(1,2),0],
428-
"ET"=>[PDPage_EndGroup,"ET",(1,0),0],
429-
"EX"=>[PDPageElement,"EX",(1,1),0],
430-
"f"=>[PDPageElement,"f",(1,0),0],
431-
"f*"=>[PDPageElement,"f*",(1,0),0],
432-
"F"=>[PDPageElement,"F",(1,0),0],
433-
"g"=>[PDPageElement,"g",(1,0),1],
434-
"G"=>[PDPageElement,"G",(1,0),1],
435-
"gs"=>[PDPageElement,"gs",(1,2),1],
436-
"h"=>[PDPageElement,"h",(1,0),0],
437-
"i"=>[PDPageElement,"i",(1,0),1],
438-
"ID"=>[PDPageElement,"ID",(1,0),0],
439-
"j"=>[PDPageElement,"j",(1,0),1],
440-
"J"=>[PDPageElement,"J",(1,0),1],
441-
"k"=>[PDPageElement,"k",(1,0),4],
442-
"K"=>[PDPageElement,"K",(1,0),4],
443-
"l"=>[PDPageElement,"l",(1,0),2],
444-
"m"=>[PDPageElement,"m",(1,0),2],
445-
"M"=>[PDPageElement,"M",(1,0),1],
446-
"MP"=>[PDPageElement,"MP",(1,2),0],
447-
"n"=>[PDPageElement,"n",(1,0),0],
448-
"q"=>[PDPageElement,"q",(1,0),0],
449-
"Q"=>[PDPageElement,"Q",(1,0),0],
450-
"re"=>[PDPageElement,"re",(1,0),4],
451-
"rg"=>[PDPageElement,"rg",(1,0),3],
452-
"RG"=>[PDPageElement,"RG",(1,0),3],
453-
"ri"=>[PDPageElement,"ri",(1,0),1],
454-
"s"=>[PDPageElement,"s",(1,0),0],
455-
"S"=>[PDPageElement,"S",(1,0),0],
456-
"sc"=>[PDPageElement,"sc",(1,1),-1],
457-
"SC"=>[PDPageElement,"SC",(1,1),-1],
458-
"scn"=>[PDPageElement,"scn",(1,2),-1],
459-
"SCN"=>[PDPageElement,"SCN",(1,2),-1],
460-
"sh"=>[PDPageElement,"sh",(1,3),1],
461-
"T*"=>[PDPageElement,"T*",(1,0),0],
462-
"Tc"=>[PDPageElement,"Tc",(1,0),1],
463-
"Td"=>[PDPageElement,"Td",(1,0),2],
464-
"TD"=>[PDPageElement,"TD",(1,0),2],
465-
"Tf"=>[PDPageElement,"Tf",(1,0),2],
466-
"Tj"=>[PDPageTextRun,"Tj",(1,0),1],
467-
"TJ"=>[PDPageTextRun,"TJ",(1,0),1],
468-
"TL"=>[PDPageElement,"TL",(1,0),1],
469-
"Tm"=>[PDPageElement,"Tm",(1,0),6],
470-
"Tr"=>[PDPageElement,"Tr",(1,0),1],
471-
"Ts"=>[PDPageElement,"Ts",(1,0),1],
472-
"Tw"=>[PDPageElement,"Tw",(1,0),1],
473-
"Tz"=>[PDPageElement,"Tz",(1,0),1],
474-
"v"=>[PDPageElement,"v",(1,0),4],
475-
"w"=>[PDPageElement,"w",(1,0),1],
476-
"W"=>[PDPageElement,"W",(1,0),0],
477-
"W*"=>[PDPageElement,"W*",(1,0),0],
478-
"y"=>[PDPageElement,"y",(1,0),4]
406+
"\'"=>PDPageTextRun("\'", (1,0),1),
407+
"\""=>PDPageTextRun("\"", (1,0),3),
408+
"Tj"=>PDPageTextRun("Tj", (1,0),1),
409+
"TJ"=>PDPageTextRun("TJ", (1,0),1),
410+
411+
"BDC"=>PDPage_BeginGroup("BDC", (1,2),2,PDPageMarkedContent),
412+
"BMC"=>PDPage_BeginGroup("BMC", (1,2),1,PDPageMarkedContent),
413+
"BT"=> PDPage_BeginGroup("BT", (1,0),0,PDPageTextObject),
414+
415+
"EMC"=>PDPage_EndGroup("EMC",(1,2),0),
416+
"ET"=> PDPage_EndGroup("ET",(1,0),0),
417+
418+
"BI"=> PDPage_BeginInlineImage("BI",(1,0),0,PDPageInlineImage),
419+
420+
"b"=> PDPageElement("b",(1,0),0),
421+
"b*"=> PDPageElement("b*",(1,0),0),
422+
"B"=> PDPageElement("B",(1,0),0),
423+
"B*"=> PDPageElement("B*",(1,0),0),
424+
"BX"=> PDPageElement("BX",(1,1),0),
425+
"c"=> PDPageElement("c",(1,0),6),
426+
"cm"=> PDPageElement("cm",(1,0),6),
427+
"cs"=> PDPageElement("cs",(1,1),1),
428+
"CS"=> PDPageElement("CS",(1,1),1),
429+
"d"=> PDPageElement("d",(1,0),2),
430+
"d0"=> PDPageElement("d0",(1,0),2),
431+
"d1"=> PDPageElement("d1",(1,0),6),
432+
"Do"=> PDPageElement("Do",(1,0),1),
433+
"DP"=> PDPageElement("DP",(1,2),0),
434+
"EI"=> PDPageElement("EI",(1,0),0),
435+
"EX"=> PDPageElement("EX",(1,1),0),
436+
"f"=> PDPageElement("f",(1,0),0),
437+
"f*"=> PDPageElement("f*",(1,0),0),
438+
"F"=> PDPageElement("F",(1,0),0),
439+
"g"=> PDPageElement("g",(1,0),1),
440+
"G"=> PDPageElement("G",(1,0),1),
441+
"gs"=> PDPageElement("gs",(1,2),1),
442+
"h"=> PDPageElement("h",(1,0),0),
443+
"i"=> PDPageElement("i",(1,0),1),
444+
"ID"=> PDPageElement("ID",(1,0),0),
445+
"j"=> PDPageElement("j",(1,0),1),
446+
"J"=> PDPageElement("J",(1,0),1),
447+
"k"=> PDPageElement("k",(1,0),4),
448+
"K"=> PDPageElement("K",(1,0),4),
449+
"l"=> PDPageElement("l",(1,0),2),
450+
"m"=> PDPageElement("m",(1,0),2),
451+
"M"=> PDPageElement("M",(1,0),1),
452+
"MP"=> PDPageElement("MP",(1,2),0),
453+
"n"=> PDPageElement("n",(1,0),0),
454+
"q"=> PDPageElement("q",(1,0),0),
455+
"Q"=> PDPageElement("Q",(1,0),0),
456+
"re"=> PDPageElement("re",(1,0),4),
457+
"rg"=> PDPageElement("rg",(1,0),3),
458+
"RG"=> PDPageElement("RG",(1,0),3),
459+
"ri"=> PDPageElement("ri",(1,0),1),
460+
"s"=> PDPageElement("s",(1,0),0),
461+
"S"=> PDPageElement("S",(1,0),0),
462+
"sc"=> PDPageElement("sc",(1,1),-1),
463+
"SC"=> PDPageElement("SC",(1,1),-1),
464+
"scn"=>PDPageElement("scn",(1,2),-1),
465+
"SCN"=>PDPageElement("SCN",(1,2),-1),
466+
"sh"=> PDPageElement("sh",(1,3),1),
467+
"T*"=> PDPageElement("T*",(1,0),0),
468+
"Tc"=> PDPageElement("Tc",(1,0),1),
469+
"Td"=> PDPageElement("Td",(1,0),2),
470+
"TD"=> PDPageElement("TD",(1,0),2),
471+
"Tf"=> PDPageElement("Tf",(1,0),2),
472+
"TL"=> PDPageElement("TL",(1,0),1),
473+
"Tm"=> PDPageElement("Tm",(1,0),6),
474+
"Tr"=> PDPageElement("Tr",(1,0),1),
475+
"Ts"=> PDPageElement("Ts",(1,0),1),
476+
"Tw"=> PDPageElement("Tw",(1,0),1),
477+
"Tz"=> PDPageElement("Tz",(1,0),1),
478+
"v"=> PDPageElement("v",(1,0),4),
479+
"w"=> PDPageElement("w",(1,0),1),
480+
"W"=> PDPageElement("W",(1,0),0),
481+
"W*"=> PDPageElement("W*",(1,0),0),
482+
"y"=> PDPageElement("y",(1,0),4)
479483
)
480484

481485
function get_pdfcontentops(b::Vector{UInt8})
482486
# PDF content operators are never longer than 3 bytes and may not be
483487
# delimited. Hence, search for the longest 3 byte keyword, then 2 bytes
484-
# and lastly 1
485-
arr, l, sb = nothing, length(b), b
486-
if l > 3
487-
sb, l = b[1:3], 3
488-
end
489-
s = l
490-
while arr == nothing && s > 0
491-
arr = get(PD_CONTENT_OPERATORS, String(sb[1:s]), nothing)
488+
# and lastly 1 byte.
489+
s = ss = min(length(b), 3)
490+
while s > 0
491+
str = String(b[1:s])
492+
obj = get(PD_CONTENT_OPERATORS, str, nothing)
493+
obj !== nothing && return s, deepcopy(obj)
492494
s -= 1
493495
end
494-
if arr !== nothing
495-
return s+1, eval(Expr(:call, arr...))
496-
end
497-
error("Invalid content operator: $(String(b))")
496+
error("No content operators found in: $(String(b[1:ss]))")
498497
end
499498

500499
struct TextLayout

0 commit comments

Comments
 (0)