Merge pull request #210 from fireblade2534/preserve-custom-phenomes

fireblade2534 · web-flow · commit d67570ab213c · 2025-03-02T14:37:07.000-05:00
This fix allows for inputing custom pronuncations through text. For example: "This is a test of a [bla bla](/ðɪs ɪz ˈoʊnli ɐ tˈɛst/) system." It ensures that normalization does not affect custom prnouncations
diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py
@@ -270,7 +270,6 @@ def normalize_text(text: str,normalization_options: NormalizationOptions) -> str
     text = text.replace(chr(8216), "'").replace(chr(8217), "'")
     text = text.replace("«", chr(8220)).replace("»", chr(8221))
     text = text.replace(chr(8220), '"').replace(chr(8221), '"')
-    text = text.replace("(", "«").replace(")", "»")
 
     # Handle CJK punctuation and some non standard chars
     for a, b in zip("、。！，：；？–", ",.!,:;?-"):
diff --git a/api/src/services/text_processing/text_processor.py b/api/src/services/text_processing/text_processor.py
@@ -2,7 +2,7 @@
 
 import re
 import time
-from typing import AsyncGenerator, List, Tuple
+from typing import AsyncGenerator, Dict, List, Tuple
 
 from loguru import logger
 
@@ -12,6 +12,9 @@
 from .vocabulary import tokenize
 from ...structures.schemas import NormalizationOptions
 
+# Pre-compiled regex patterns for performance
+CUSTOM_PHONEMES = re.compile(r"(\[([^\]]|\n)*?\])(\(\/([^\/)]|\n)*?\/\))")
+
 def process_text_chunk(
     text: str, language: str = "a", skip_phonemize: bool = False
 ) -> List[int]:
@@ -85,12 +88,21 @@ def process_text(text: str, language: str = "a") -> List[int]:
     return process_text_chunk(text, language)
 
 
-def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
+def get_sentence_info(text: str, custom_phenomes_list: Dict[str, str]) -> List[Tuple[str, List[int], int]]:
     """Process all sentences and return info."""
     sentences = re.split(r"([.!?;:])(?=\s|$)", text)
+    phoneme_length, min_value = len(custom_phenomes_list), 0
+    
     results = []
     for i in range(0, len(sentences), 2):
         sentence = sentences[i].strip()
+        for replaced in range(min_value, phoneme_length):
+            current_id = f"</|custom_phonemes_{replaced}|/>"
+            if current_id in sentence:
+                sentence = sentence.replace(current_id, custom_phenomes_list.pop(current_id))
+                min_value += 1
+                
+            
         punct = sentences[i + 1] if i + 1 < len(sentences) else ""
 
         if not sentence:
@@ -102,6 +114,10 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
 
     return results
 
+def handle_custom_phonemes(s: re.Match[str], phenomes_list: Dict[str,str]) -> str:
+    latest_id = f"</|custom_phonemes_{len(phenomes_list)}|/>"
+    phenomes_list[latest_id] = s.group(0).strip()
+    return latest_id
 
 async def smart_split(
     text: str, 
@@ -114,15 +130,18 @@ async def smart_split(
     chunk_count = 0
     logger.info(f"Starting smart split for {len(text)} chars")
 
+    custom_phoneme_list = {}
+
     # Normalize text
     if settings.advanced_text_normalization and normalization_options.normalize:
         if lang_code in ["a","b","en-us","en-gb"]:
+            text = CUSTOM_PHONEMES.sub(lambda s: handle_custom_phonemes(s, custom_phoneme_list), text)
             text=normalize_text(text,normalization_options)
         else:
             logger.info("Skipping text normalization as it is only supported for english")
 
     # Process all sentences
-    sentences = get_sentence_info(text)
+    sentences = get_sentence_info(text, custom_phoneme_list)
 
     current_chunk = []
     current_tokens = []
@@ -245,4 +264,4 @@ async def smart_split(
     total_time = time.time() - start_time
     logger.info(
         f"Split completed in {total_time * 1000:.2f}ms, produced {chunk_count} chunks"
-    )
+    )