From 86b843d259c63d717b4fe5162cc98b5304c62240 Mon Sep 17 00:00:00 2001
From: Beau Horenberger <36315656+horenbergerb@users.noreply.github.com>
Date: Fri, 3 Jan 2025 13:11:38 -0800
Subject: [PATCH] Fix bug processing duplicate punctuation in
 TextPreprocessor.py

Strings like 'then, he' were being compressed to 'then,he' which reduced audio quality.
---
 GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
index b90bd929e..1a97f1ec3 100644
--- a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
+++ b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py
@@ -20,7 +20,7 @@
 language=os.environ.get("language","Auto")
 language=sys.argv[-1] if sys.argv[-1] in scan_language_list() else language
 i18n = I18nAuto(language=language)
-punctuation = set(['!', '?', '…', ',', '.', '-'," "])
+punctuation = set(['!', '?', '…', ',', '.', '-'])
 
 def get_first(text:str) -> str:
     pattern = "[" + "".join(re.escape(sep) for sep in splits) + "]"
@@ -234,11 +234,13 @@ def filter_text(self,texts):
         return _text
     
 
-    def replace_consecutive_punctuation(self,text):
+    def replace_consecutive_punctuation(self, text):
+        # Collapse consecutive punctuation marks
         punctuations = ''.join(re.escape(p) for p in punctuation)
         pattern = f'([{punctuations}])([{punctuations}])+'
-        result = re.sub(pattern, r'\1', text)
-        return result
-
+        text = re.sub(pattern, r'\1', text)
 
+        # Collapse multiple spaces to a single space
+        text = re.sub(r'\s+', ' ', text)
 
+        return text