File tree 1 file changed +20
-8
lines changed
paddlex/inference/pipelines/layout_parsing 1 file changed +20
-8
lines changed Original file line number Diff line number Diff line change @@ -253,24 +253,36 @@ def _to_markdown(self) -> dict:
253
253
254
254
def _format_data (obj ):
255
255
256
- def format_title (content_value ):
256
+ def format_title (title ):
257
257
"""
258
- Normalize chapter title by ensuring one space between numbering and title content.
258
+ Normalize chapter title.
259
+ Add the '#' to indicate the level of the title.
259
260
If numbering exists, ensure there's exactly one space between it and the title content.
260
261
If numbering does not exist, return the original title unchanged.
261
262
262
- :param content_value : Original chapter title string.
263
+ :param title : Original chapter title string.
263
264
:return: Normalized chapter title string.
264
265
"""
265
- match = self .title_pattern .match (content_value )
266
+ match = self .title_pattern .match (title )
266
267
if match :
267
268
numbering = match .group (1 ).strip ()
268
269
title_content = match .group (3 ).lstrip ()
269
270
# Return numbering and title content separated by one space
270
- return numbering + " " + title_content
271
- else :
272
- # No numbering detected; return original title
273
- return content_value
271
+ title = numbering + " " + title_content
272
+
273
+ title = title .rstrip ("." )
274
+ level = (
275
+ title .count (
276
+ "." ,
277
+ )
278
+ + 1
279
+ if "." in title
280
+ else 1
281
+ )
282
+ return f"#{ '#' * level } { title } " .replace ("-\n " , "" ).replace (
283
+ "\n " ,
284
+ " " ,
285
+ )
274
286
275
287
def format_centered_text (key ):
276
288
return (
You can’t perform that action at this time.
0 commit comments