Skip to content

Commit 9447a5e

Browse files
committed
bugfix: add # to start of paragraph title
1 parent 7e5b4eb commit 9447a5e

File tree

1 file changed

+20
-8
lines changed

1 file changed

+20
-8
lines changed

paddlex/inference/pipelines/layout_parsing/result_v2.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -253,24 +253,36 @@ def _to_markdown(self) -> dict:
253253

254254
def _format_data(obj):
255255

256-
def format_title(content_value):
256+
def format_title(title):
257257
"""
258-
Normalize chapter title by ensuring one space between numbering and title content.
258+
Normalize chapter title.
259+
Add the '#' to indicate the level of the title.
259260
If numbering exists, ensure there's exactly one space between it and the title content.
260261
If numbering does not exist, return the original title unchanged.
261262
262-
:param content_value: Original chapter title string.
263+
:param title: Original chapter title string.
263264
:return: Normalized chapter title string.
264265
"""
265-
match = self.title_pattern.match(content_value)
266+
match = self.title_pattern.match(title)
266267
if match:
267268
numbering = match.group(1).strip()
268269
title_content = match.group(3).lstrip()
269270
# Return numbering and title content separated by one space
270-
return numbering + " " + title_content
271-
else:
272-
# No numbering detected; return original title
273-
return content_value
271+
title = numbering + " " + title_content
272+
273+
title = title.rstrip(".")
274+
level = (
275+
title.count(
276+
".",
277+
)
278+
+ 1
279+
if "." in title
280+
else 1
281+
)
282+
return f"#{'#' * level} {title}".replace("-\n", "").replace(
283+
"\n",
284+
" ",
285+
)
274286

275287
def format_centered_text(key):
276288
return (

0 commit comments

Comments
 (0)