@@ -81,12 +81,11 @@ def __init__(
81
81
self .page_obj = page_obj # Reference to the PageObject for font width maps
82
82
self .obj = obj
83
83
self .pdf = pdf
84
- self . orientations = orientations
84
+
85
85
self .space_width = space_width
86
86
self .content_key = content_key
87
87
self .visitor_operand_before = visitor_operand_before
88
88
self .visitor_operand_after = visitor_operand_after
89
- self .visitor_text = visitor_text
90
89
91
90
# Text state
92
91
self .text : str = ""
@@ -96,7 +95,17 @@ def __init__(
96
95
# Matrix state
97
96
self .cm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
98
97
self .tm_matrix : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
99
- self .cm_stack : List [Tuple [Any , ...]] = []
98
+ self .cm_stack : List [
99
+ Tuple [
100
+ List [float ],
101
+ Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
102
+ float ,
103
+ float ,
104
+ float ,
105
+ float ,
106
+ float ,
107
+ ]
108
+ ] = []
100
109
101
110
# Previous matrices for tracking changes
102
111
self .cm_prev : List [float ] = [1.0 , 0.0 , 0.0 , 1.0 , 0.0 , 0.0 ]
@@ -120,16 +129,16 @@ def __init__(
120
129
"NotInitialized" ,
121
130
None ,
122
131
) # (encoding, CMAP, font resource name, font)
123
-
124
- # Actual string size tracking
125
- self ._actual_str_size : Dict [str , float ] = {"str_widths" : 0.0 , "space_width" : 0.0 , "str_height" : 0.0 }
126
-
127
- # Character maps for fonts
132
+ self .orientations = orientations
133
+ self .visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]] = None
128
134
self .cmaps : Dict [
129
135
str ,
130
136
Tuple [str , float , Union [str , Dict [int , str ]], Dict [str , str ], DictionaryObject ],
131
137
] = {}
132
138
139
+ # Actual string size tracking
140
+ self ._actual_str_size : Dict [str , float ] = {"str_widths" : 0.0 , "space_width" : 0.0 , "str_height" : 0.0 }
141
+
133
142
# Resources dictionary
134
143
self .resources_dict : Optional [DictionaryObject ] = None
135
144
@@ -231,8 +240,7 @@ def _process_operation(self, operator: bytes, operands: List[Any]) -> None:
231
240
if self .visitor_operand_after is not None :
232
241
self .visitor_operand_after (operator , operands , self .cm_matrix , self .tm_matrix )
233
242
234
- def _compute_str_widths (self , str_widths : float ) -> float :
235
- """Compute string widths."""
243
+ def compute_str_widths (self , str_widths : float ) -> float :
236
244
return str_widths / 1000
237
245
238
246
def _flush_text (self ) -> None :
@@ -355,22 +363,22 @@ def _handle_operation_move_text_position(self, operands: List[Any]) -> None:
355
363
tx , ty = float (operands [0 ]), float (operands [1 ])
356
364
self .tm_matrix [4 ] += tx * self .tm_matrix [0 ] + ty * self .tm_matrix [2 ]
357
365
self .tm_matrix [5 ] += tx * self .tm_matrix [1 ] + ty * self .tm_matrix [3 ]
358
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
366
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
359
367
self ._actual_str_size ["str_widths" ] = 0.0
360
368
self ._handle_position_change (str_widths )
361
369
362
370
def _handle_operation_set_text_matrix (self , operands : List [Any ]) -> None :
363
371
"""Handle Tm (Set text matrix) operation."""
364
372
self .tm_matrix = [float (operand ) for operand in operands [:6 ]]
365
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
373
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
366
374
self ._actual_str_size ["str_widths" ] = 0.0
367
375
self ._handle_position_change (str_widths )
368
376
369
377
def _handle_operation_move_to_next_line (self , operands : List [Any ]) -> None :
370
378
"""Handle T* (Move to next line) operation."""
371
379
self .tm_matrix [4 ] -= self .TL * self .tm_matrix [2 ]
372
380
self .tm_matrix [5 ] -= self .TL * self .tm_matrix [3 ]
373
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
381
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
374
382
self ._actual_str_size ["str_widths" ] = 0.0
375
383
self ._handle_position_change (str_widths )
376
384
@@ -389,7 +397,7 @@ def _handle_operation_show_text(self, operands: List[Any]) -> None:
389
397
self ._space_width ,
390
398
self ._actual_str_size ,
391
399
)
392
- str_widths = self ._compute_str_widths (self ._actual_str_size ["str_widths" ])
400
+ str_widths = self .compute_str_widths (self ._actual_str_size ["str_widths" ])
393
401
self ._handle_position_change (str_widths )
394
402
395
403
def _handle_operation_show_text_with_positioning (self , operands : List [Any ]) -> None :
@@ -471,7 +479,7 @@ def _handle_position_change(self, str_widths: float) -> None:
471
479
self .font_size ,
472
480
self .visitor_text ,
473
481
str_widths ,
474
- self ._compute_str_widths (self ._actual_str_size ["space_width" ]),
482
+ self .compute_str_widths (self ._actual_str_size ["space_width" ]),
475
483
self ._actual_str_size ["str_height" ],
476
484
)
477
485
if self .text == "" :
@@ -482,16 +490,15 @@ def _handle_position_change(self, str_widths: float) -> None:
482
490
483
491
def _get_actual_font_widths (
484
492
self ,
485
- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
493
+ cmap : Tuple [
494
+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
495
+ ],
486
496
text_operands : str ,
487
497
font_size : float ,
488
498
space_width : float ,
489
499
) -> Tuple [float , float , float ]:
490
- """Get actual font widths for text operands."""
491
500
font_widths : float = 0
492
501
font_name : str = cmap [2 ]
493
-
494
- # Use the page object's font width maps
495
502
if font_name not in self .page_obj ._font_width_maps :
496
503
if cmap [3 ] is None :
497
504
font_width_map : Dict [Any , float ] = {}
@@ -505,7 +512,6 @@ def _get_actual_font_widths(
505
512
if actual_space_width == 0 :
506
513
actual_space_width = space_width
507
514
self .page_obj ._font_width_maps [font_name ] = (font_width_map , space_char , actual_space_width )
508
-
509
515
font_width_map = self .page_obj ._font_width_maps [font_name ][0 ]
510
516
space_char = self .page_obj ._font_width_maps [font_name ][1 ]
511
517
actual_space_width = self .page_obj ._font_width_maps [font_name ][2 ]
@@ -516,27 +522,26 @@ def _get_actual_font_widths(
516
522
font_widths += actual_space_width
517
523
continue
518
524
font_widths += compute_font_width (font_width_map , char )
519
-
520
525
return (font_widths * font_size , space_width * font_size , font_size )
521
526
522
-
523
-
524
527
def _handle_tj (
525
528
self ,
526
529
text : str ,
527
530
operands : List [Union [str , TextStringObject ]],
528
531
cm_matrix : List [float ],
529
532
tm_matrix : List [float ],
530
- cmap : Tuple [Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]],
533
+ cmap : Tuple [
534
+ Union [str , Dict [int , str ]], Dict [str , str ], str , Optional [DictionaryObject ]
535
+ ],
531
536
orientations : Tuple [int , ...],
532
537
font_size : float ,
533
538
rtl_dir : bool ,
534
539
visitor_text : Optional [Callable [[Any , Any , Any , Any , Any ], None ]],
535
540
space_width : float ,
536
541
actual_str_size : Dict [str , float ],
537
542
) -> Tuple [str , bool , Dict [str , float ]]:
538
- """Handle text showing operations."""
539
- text_operands , is_str_operands = get_text_operands ( operands , cm_matrix , tm_matrix , cmap , orientations )
543
+ text_operands , is_str_operands = get_text_operands (
544
+ operands , cm_matrix , tm_matrix , cmap , orientations )
540
545
if is_str_operands :
541
546
text += text_operands
542
547
else :
@@ -550,13 +555,8 @@ def _handle_tj(
550
555
rtl_dir ,
551
556
visitor_text ,
552
557
)
553
-
554
- font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = self ._get_actual_font_widths (
555
- cmap ,
556
- text_operands ,
557
- font_size ,
558
- space_width ,
559
- )
558
+ font_widths , actual_str_size ["space_width" ], actual_str_size ["str_height" ] = (
559
+ self ._get_actual_font_widths (cmap , text_operands , font_size , space_width ))
560
560
actual_str_size ["str_widths" ] += font_widths
561
561
562
562
return text , rtl_dir , actual_str_size
0 commit comments