Skip to content

Commit 452a023

Browse files
committed
🐛 fix: filter bad chars in comment content
1 parent e922e71 commit 452a023

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

packages/biliass/rust/src/reader/xml.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ fn filter_bad_chars(string: &str) -> String {
152152
if ('\u{00}'..='\u{08}').contains(&c)
153153
|| c == '\u{0b}'
154154
|| c == '\u{0c}'
155+
|| c == '\u{2028}'
156+
|| c == '\u{2029}'
155157
|| ('\u{0e}'..='\u{1f}').contains(&c)
156158
{
157159
'\u{fffd}'

packages/biliass/src/biliass/biliass.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def read_comments_bilibili_protobuf(protobuf: bytes | str, fontsize: float) -> G
8585
elem.progress / 1000, # 视频内出现的时间
8686
elem.ctime, # 弹幕的发送时间(时间戳)
8787
i,
88-
c,
88+
filter_bad_chars(c),
8989
{1: 0, 4: 2, 5: 1, 6: 3}[elem.mode],
9090
elem.color,
9191
size,
@@ -98,7 +98,7 @@ def read_comments_bilibili_protobuf(protobuf: bytes | str, fontsize: float) -> G
9898
elem.progress / 1000,
9999
elem.ctime,
100100
i,
101-
c,
101+
filter_bad_chars(c),
102102
"bilipos",
103103
elem.color,
104104
elem.fontsize,
@@ -589,6 +589,10 @@ def convert_type2(row, height, bottom_reserved):
589589
return height - bottom_reserved - row
590590

591591

592+
def filter_bad_chars(string: str) -> str:
593+
return re.sub("[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\u2028\u2029]", "\ufffd", string)
594+
595+
592596
class safe_list(list):
593597
def get(self, index, default=None):
594598
def is_empty(value):

tests/test_biliass/test_corpus

0 commit comments

Comments
 (0)