File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -214,19 +214,20 @@ def clean_mayhem_title_text(text):
214
214
if not text :
215
215
return ""
216
216
217
- # Remove links
218
- link_regex = r"\[( [^\]]+) \]\(( [^)]+) \)"
217
+ # Remove links (and add limit to avoid catastrophic backtracking)
218
+ link_regex = r"\[[^\]]{1,100}? \]\([^)]{1,200}? \)"
219
219
text = re .sub (link_regex , "" , text )
220
220
221
221
# Remove URL encoded characters
222
222
url_encoding_regex = r"&#x\d+;"
223
223
text = re .sub (url_encoding_regex , "" , text )
224
224
225
225
# Remove single or double quotes
226
- text = text .replace ('"' , "" ).replace ("'" , "" )
226
+ quotes_regex = r"[\"']"
227
+ text = re .sub (quotes_regex , "" , text )
227
228
228
229
# Remove TDID
229
- tdid_regex = r"TDID-\d+\s-\s"
230
+ tdid_regex = r"TDID-\d+\s* -\s*|TDID-\d+- "
230
231
text = re .sub (tdid_regex , "" , text )
231
232
232
233
return text .strip ()
You can’t perform that action at this time.
0 commit comments