@@ -575,21 +575,32 @@ def get_single_block_parsing_res(
575
575
single_block_layout_parsing_res = []
576
576
input_img = overall_ocr_res ["doc_preprocessor_res" ]["output_img" ]
577
577
seal_index = 0
578
+ with_doc_title = False
579
+ max_block_area = 0.0
580
+ paragraph_title_indexs = []
578
581
579
582
layout_det_res_list , _ = _remove_overlap_blocks (
580
583
deepcopy (layout_det_res ["boxes" ]),
581
584
threshold = 0.5 ,
582
585
smaller = True ,
583
586
)
584
587
585
- for box_info in layout_det_res_list :
588
+ for box_idx , box_info in enumerate ( layout_det_res_list ) :
586
589
block_bbox = box_info ["coordinate" ]
587
590
label = box_info ["label" ]
588
591
rec_res = {"boxes" : [], "rec_texts" : [], "rec_labels" : [], "flag" : False }
589
592
seg_start_coordinate = float ("inf" )
590
593
seg_end_coordinate = float ("-inf" )
591
594
num_of_lines = 1
592
595
596
+ if label == "doc_title" :
597
+ with_doc_title = True
598
+ elif label == "paragraph_title" :
599
+ paragraph_title_indexs .append (box_idx )
600
+
601
+ block_area = (block_bbox [2 ] - block_bbox [0 ]) * (block_bbox [3 ] - block_bbox [1 ])
602
+ max_block_area = max (max_block_area , block_area )
603
+
593
604
if label == "table" :
594
605
for table_res in table_res_list :
595
606
if len (table_res ["cell_box_list" ]) == 0 :
@@ -679,9 +690,22 @@ def get_single_block_parsing_res(
679
690
"seg_start_coordinate" : seg_start_coordinate ,
680
691
"seg_end_coordinate" : seg_end_coordinate ,
681
692
"num_of_lines" : num_of_lines ,
693
+ "block_area" : block_area ,
682
694
},
683
695
)
684
696
697
+ if (
698
+ not with_doc_title
699
+ and len (paragraph_title_indexs ) == 1
700
+ and single_block_layout_parsing_res [paragraph_title_indexs [0 ]].get (
701
+ "block_area" , 0
702
+ )
703
+ > max_block_area * 0.3
704
+ ):
705
+ single_block_layout_parsing_res [paragraph_title_indexs [0 ]][
706
+ "block_label"
707
+ ] = "doc_title"
708
+
685
709
if len (layout_det_res_list ) == 0 :
686
710
for ocr_rec_box , ocr_rec_text in zip (
687
711
overall_ocr_res ["rec_boxes" ], overall_ocr_res ["rec_texts" ]
0 commit comments