From 86b843d259c63d717b4fe5162cc98b5304c62240 Mon Sep 17 00:00:00 2001 From: Beau Horenberger <36315656+horenbergerb@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:11:38 -0800 Subject: [PATCH] Fix bug processing duplicate punctuation in TextPreprocessor.py Strings like 'then, he' were being compressed to 'then,he' which reduced audio quality. --- GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py index b90bd929e..1a97f1ec3 100644 --- a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py +++ b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py @@ -20,7 +20,7 @@ language=os.environ.get("language","Auto") language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language i18n = I18nAuto(language=language) -punctuation = set(['!', '?', '…', ',', '.', '-'," "]) +punctuation = set(['!', '?', '…', ',', '.', '-']) def get_first(text:str) -> str: pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]" @@ -234,11 +234,13 @@ def filter_text(self,texts): return _text - def replace_consecutive_punctuation(self,text): + def replace_consecutive_punctuation(self, text): + # Collapse consecutive punctuation marks punctuations = ''.join(re.escape(p) for p in punctuation) pattern = f'([{punctuations}])([{punctuations}])+' - result = re.sub(pattern, r'\1', text) - return result - + text = re.sub(pattern, r'\1', text) + # Collapse multiple spaces to a single space + text = re.sub(r'\s+', ' ', text) + return text