@@ -81,55 +81,60 @@ def __init__(
81
81
self .page_obj = page_obj # Reference to the PageObject for font width maps
82
82
self .obj = obj
83
83
self .pdf = pdf
84
- self . orientations = orientations
84
+
85
85
self .space_width = space_width
86
86
self .content_key = content_key
87
87
self .visitor_operand_before = visitor_operand_before
88
88
self .visitor_operand_after = visitor_operand_after
89
- self .visitor_text = visitor_text
90
-
91
- # Text state
92
- self .text : str = ""
93
- self .output : str = ""
94
- self .rtl_dir : bool = False # right-to-left
95
89
96
90
# Matrix state
97
91
self .cm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
98
92
self .tm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
99
- self .cm_stack : List [Tuple [Any , ...]] = []
100
-
101
- # Previous matrices for tracking changes
93
+ self .cm_stack : List [
94
+ Tuple [
95
+ List [float ],
96
+ Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
97
+ float ,
98
+ float ,
99
+ float ,
100
+ float ,
101
+ float ,
102
+ ]
103
+ ] = []
104
+
105
+ # Store the last modified matrices; can be an intermediate position
102
106
self .cm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
103
107
self .tm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
104
108
105
- # Memo matrices for visitor callbacks
109
+ # Store the position at the beginning of building the text
106
110
self .memo_cm : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
107
111
self .memo_tm : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
108
112
109
113
# Font and text scaling state
110
- self .char_scale : float = 1.0
111
- self .space_scale : float = 1.0
114
+ self .char_scale = 1.0
115
+ self .space_scale = 1.0
112
116
self ._space_width : float = 500.0 # will be set correctly at first Tf
113
- self .TL : float = 0.0
114
- self .font_size : float = 12.0 # init just in case
117
+ self .TL = 0.0
118
+ self .font_size = 12.0 # init just in case
119
+
120
+ # Text state
121
+ self .text : str = ""
122
+ self .output : str = ""
123
+ self .rtl_dir : bool = False # right-to-left
115
124
116
- # Character map state
117
125
self .cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]] = (
118
126
"charmap" ,
119
127
{},
120
128
"NotInitialized" ,
121
129
None ,
122
130
) # (encoding, CMAP, font resource name, font)
131
+ self .orientations : Tuple [int , ...] = orientations
132
+ self .visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]] = None
133
+ self .cmaps : Dict [str , Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ]] = {}
123
134
124
135
# Actual string size tracking
125
136
self ._actual_str_size : Dict [str , float ] = {"str_widths" : 0.0 , "space_width" : 0.0 , "str_height" : 0.0 }
126
137
127
- # Character maps for fonts
128
- self .cmaps : Dict [
129
- str ,
130
- Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ],
131
- ] = {}
132
-
133
138
# Resources dictionary
134
139
self .resources_dict : Optional [DictionaryObject ] = None
135
140
@@ -231,8 +236,7 @@ def _process_operation(self, operator: bytes, operands: List[Any]) -> None:
231
236
if self .visitor_operand_after is not None :
232
237
self .visitor_operand_after (operator , operands , self .cm_matrix , self .tm_matrix )
233
238
234
- def _compute_str_widths (self , str_widths : float ) -> float :
235
- """Compute string widths."""
239
+ def compute_str_widths (self , str_widths : float ) -> float :
236
240
return str_widths / 1000
237
241
238
242
def _flush_text (self ) -> None :
@@ -355,22 +359,22 @@ def _handle_operation_move_text_position(self, operands: List[Any]) -> None:
355
359
tx , ty = float (operands [0 ]), float (operands [1 ])
356
360
self .tm_matrix [4 ] += tx * self .tm_matrix [0 ] + ty * self .tm_matrix [2 ]
357
361
self .tm_matrix [5 ] += tx * self .tm_matrix [1 ] + ty * self .tm_matrix [3 ]
358
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
362
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
359
363
self ._actual_str_size ["str_widths" ] = 0.0
360
364
self ._handle_position_change (str_widths )
361
365
362
366
def _handle_operation_set_text_matrix (self , operands : List [Any ]) -> None :
363
367
"""Handle Tm (Set text matrix) operation."""
364
368
self .tm_matrix = [float (operand ) for operand in operands [:6 ]]
365
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
369
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
366
370
self ._actual_str_size ["str_widths" ] = 0.0
367
371
self ._handle_position_change (str_widths )
368
372
369
373
def _handle_operation_move_to_next_line (self , operands : List [Any ]) -> None :
370
374
"""Handle T* (Move to next line) operation."""
371
375
self .tm_matrix [4 ] -= self .TL * self .tm_matrix [2 ]
372
376
self .tm_matrix [5 ] -= self .TL * self .tm_matrix [3 ]
373
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
377
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
374
378
self ._actual_str_size ["str_widths" ] = 0.0
375
379
self ._handle_position_change (str_widths )
376
380
@@ -389,7 +393,7 @@ def _handle_operation_show_text(self, operands: List[Any]) -> None:
389
393
self ._space_width ,
390
394
self ._actual_str_size ,
391
395
)
392
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
396
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
393
397
self ._handle_position_change (str_widths )
394
398
395
399
def _handle_operation_show_text_with_positioning (self , operands : List [Any ]) -> None :
@@ -471,7 +475,7 @@ def _handle_position_change(self, str_widths: float) -> None:
471
475
self .font_size ,
472
476
self .visitor_text ,
473
477
str_widths ,
474
- self ._compute_str_widths (self ._actual_str_size ["space_width" ]),
478
+ self .compute_str_widths (self ._actual_str_size ["space_width" ]),
475
479
self ._actual_str_size ["str_height" ],
476
480
)
477
481
if self .text == "" :
@@ -482,16 +486,15 @@ def _handle_position_change(self, str_widths: float) -> None:
482
486
483
487
def _get_actual_font_widths (
484
488
self ,
485
- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
489
+ cmap : Tuple [
490
+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
491
+ ],
486
492
text_operands : str ,
487
493
font_size : float ,
488
494
space_width : float ,
489
495
) -> Tuple [float , float , float ]:
490
- """Get actual font widths for text operands."""
491
496
font_widths : float = 0
492
497
font_name : str = cmap [2 ]
493
-
494
- # Use the page object's font width maps
495
498
if font_name not in self .page_obj ._font_width_maps :
496
499
if cmap [3 ] is None :
497
500
font_width_map : Dict [Any , float ] = {}
@@ -505,7 +508,6 @@ def _get_actual_font_widths(
505
508
if actual_space_width == 0 :
506
509
actual_space_width = space_width
507
510
self .page_obj ._font_width_maps [font_name ] = (font_width_map , space_char , actual_space_width )
508
-
509
511
font_width_map = self .page_obj ._font_width_maps [font_name ][0 ]
510
512
space_char = self .page_obj ._font_width_maps [font_name ][1 ]
511
513
actual_space_width = self .page_obj ._font_width_maps [font_name ][2 ]
@@ -516,27 +518,26 @@ def _get_actual_font_widths(
516
518
font_widths += actual_space_width
517
519
continue
518
520
font_widths += compute_font_width (font_width_map , char )
519
-
520
521
return (font_widths * font_size , space_width * font_size , font_size )
521
522
522
-
523
-
524
523
def _handle_tj (
525
524
self ,
526
525
text : str ,
527
526
operands : List [Union [str , TextStringObject ]],
528
527
cm_matrix : List [float ],
529
528
tm_matrix : List [float ],
530
- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
529
+ cmap : Tuple [
530
+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
531
+ ],
531
532
orientations : Tuple [int , ...],
532
533
font_size : float ,
533
534
rtl_dir : bool ,
534
535
visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]],
535
536
space_width : float ,
536
537
actual_str_size : Dict [str , float ],
537
538
) -> Tuple [str , bool , Dict [str , float ]]:
538
- """Handle text showing operations."""
539
- text_operands , is_str_operands = get_text_operands ( operands , cm_matrix , tm_matrix , cmap , orientations )
539
+ text_operands , is_str_operands = get_text_operands (
540
+ operands , cm_matrix , tm_matrix , cmap , orientations )
540
541
if is_str_operands :
541
542
text += text_operands
542
543
else :
@@ -550,13 +551,8 @@ def _handle_tj(
550
551
rtl_dir ,
551
552
visitor_text ,
552
553
)
553
-
554
- font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = self ._get_actual_font_widths (
555
- cmap ,
556
- text_operands ,
557
- font_size ,
558
- space_width ,
559
- )
554
+ font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = (
555
+ self ._get_actual_font_widths (cmap , text_operands , font_size , space_width ))
560
556
actual_str_size ["str_widths" ] += font_widths
561
557
562
558
return text , rtl_dir , actual_str_size
0 commit comments