From 31b99d6173685ff3e6ac0f9610d9f3c28dd9b712 Mon Sep 17 00:00:00 2001 From: fangzhehao Date: Mon, 28 Oct 2024 11:09:41 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E6=94=AF=E6=8C=81=E8=87=AA=E5=AE=9A?= =?UTF-8?q?=E4=B9=89=E8=AF=BB=E9=9F=B3=EF=BC=88=E5=A4=9A=E9=9F=B3=E5=AD=97?= =?UTF-8?q?=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPT_SoVITS/text/chinese2.py | 47 ++++++++++++++++++++++++++++++------- GPT_SoVITS/text/cleaner.py | 2 ++ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py index f716b4107..b98a6ee09 100644 --- a/GPT_SoVITS/text/chinese2.py +++ b/GPT_SoVITS/text/chinese2.py @@ -131,13 +131,32 @@ def _merge_erhua(initials: list[str], new_finals.append(phn) return new_initials, new_finals - +def remove_brackets(s): + if s.startswith('(') and s.endswith(')'): + return s[1:-1] + return s +def custom_pinyin(seg:str): + ''' + 自定义拼音,格式:这个字的读音是角(jue2)色,而不是角(jiao3)色 + 拼音格式符合 pinyin.Style.TON3(声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示。如: 中国 -> ``zhong1 guo2``) + 逻辑:使用正则截取原字符串中指定的拼音,符合截取拼音后的纯汉字字符串和自定义拼音列表,无自定义拼音的返回空: return:这个字的读音是角色,而不是角色 ["","","","","","","","jue2","","","","","","jiao3",""] + ''' + reg=re.compile('\([a-zA-Z]+\d\)') + custom_pys=reg.findall(seg) + result=["" for s in range(len(seg))] + for custom_py in custom_pys: + index=seg.index(custom_py) + result[index-1]=remove_brackets(custom_py) + seg=seg.replace(custom_py,"",1) + result=result[:len(seg)] + return seg,result def _g2p(segments): phones_list = [] word2ph = [] for seg in segments: pinyins = [] + seg, customer_pinyin = custom_pinyin(seg) # Replace all English words in the sentence seg = re.sub("[a-zA-Z]+", "", seg) seg_cut = psg.lcut(seg) @@ -162,7 +181,8 @@ def _g2p(segments): else: # g2pw采用整句推理 pinyins = g2pw.lazy_pinyin(seg, neutral_tone_with_five=True, style=Style.TONE3) - + pinyins = [customer_pinyin[index] if customer_pinyin[index] != "" else item for index,item in enumerate(pinyins) ] + print(f"g2pw seg:{seg} pinyins:{pinyins}") pre_word_length = 0 for word, pos in seg_cut: sub_initials = [] @@ -176,7 +196,7 @@ def _g2p(segments): word_pinyins = pinyins[pre_word_length:now_word_length] # 多音字消歧 - word_pinyins = correct_pronunciation(word,word_pinyins) + # word_pinyins = correct_pronunciation(word,word_pinyins) # 拿缓存 for pinyin in word_pinyins: if pinyin[0].isalpha(): @@ -272,6 +292,7 @@ def replace_consecutive_punctuation(text): def text_normalize(text): # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization tx = TextNormalizer() + text, custom_py = custom_pinyin(text) sentences = tx.normalize(text) dest_text = "" for sentence in sentences: @@ -279,8 +300,19 @@ def text_normalize(text): # 避免重复标点引起的参考泄露 dest_text = replace_consecutive_punctuation(dest_text) + if len(dest_text)==len(text): + result="" + for index,rune in enumerate(dest_text): + result=result+rune + if text[index]==dest_text[index] and custom_py[index]!="": + result=result+"("+custom_py[index]+")" + dest_text=result + else: + print("text_normalize 后长度不一致") return dest_text - +def clean_custom_pinyin(text): + text, _ = custom_pinyin(text) + return text # 不排除英文的文本格式化 def mix_text_normalize(text): # https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization @@ -296,11 +328,8 @@ def mix_text_normalize(text): if __name__ == "__main__": - text = "啊——但是《原神》是由,米哈\游自主,研发的一款全.新开放世界.冒险游戏" - text = "呣呣呣~就是…大人的鼹鼠党吧?" - text = "你好" - text = text_normalize(text) - print(g2p(text)) + text = "这个字的读音是角(jue2)色,而不是角(jiao3)色" + print(custom_pinyin(text)) # # 示例用法 diff --git a/GPT_SoVITS/text/cleaner.py b/GPT_SoVITS/text/cleaner.py index 98535f273..7b0f9ff28 100644 --- a/GPT_SoVITS/text/cleaner.py +++ b/GPT_SoVITS/text/cleaner.py @@ -40,6 +40,8 @@ def clean_text(text, language, version=None): norm_text=text if language == "zh" or language=="yue":########## phones, word2ph = language_module.g2p(norm_text) + if hasattr(language_module,"clean_custom_pinyin"): + norm_text=language_module.clean_custom_pinyin(norm_text) assert len(phones) == sum(word2ph) assert len(norm_text) == len(word2ph) elif language == "en": From 5202a7793d2629a5a6eac97e2663448cbe8ce1d9 Mon Sep 17 00:00:00 2001 From: fangzhehao Date: Wed, 20 Nov 2024 18:16:37 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=A4=9A=E9=9F=B3=E5=AD=97=E5=AE=9E?= =?UTF-8?q?=E7=8E=B0=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/api.py b/api.py index c5f7024fe..e1f6f6515 100644 --- a/api.py +++ b/api.py @@ -314,6 +314,7 @@ def get_bert_inf(phones, word2ph, norm_text, language): from text import chinese def get_phones_and_bert(text,language,version,final=False): + LangSegment.setKeepPinyin(True) #用于保留切分后的中文拼音 if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}: language = language.replace("all_","") if language == "en": From c32813913bf11446d65ec8f0fbdfe9c9acd05a59 Mon Sep 17 00:00:00 2001 From: fangzhehao Date: Mon, 9 Dec 2024 11:36:33 +0800 Subject: [PATCH 3/3] =?UTF-8?q?api=5Fv2=20=E6=94=AF=E6=8C=81=E5=A4=9A?= =?UTF-8?q?=E9=9F=B3=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py | 1 + GPT_SoVITS/text/chinese2.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py index b90bd929e..ddd176b05 100644 --- a/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py +++ b/GPT_SoVITS/TTS_infer_pack/TextPreprocessor.py @@ -117,6 +117,7 @@ def segment_and_extract_feature_for_text(self, text:str, language:str, version:s return self.get_phones_and_bert(text, language, version) def get_phones_and_bert(self, text:str, language:str, version:str, final:bool=False): + LangSegment.setKeepPinyin(True) if language in {"en", "all_zh", "all_ja", "all_ko", "all_yue"}: language = language.replace("all_","") if language == "en": diff --git a/GPT_SoVITS/text/chinese2.py b/GPT_SoVITS/text/chinese2.py index b98a6ee09..cc359c48e 100644 --- a/GPT_SoVITS/text/chinese2.py +++ b/GPT_SoVITS/text/chinese2.py @@ -177,6 +177,9 @@ def _g2p(segments): # assert len(sub_initials) == len(sub_finals) == len(word) initials = sum(initials, []) finals = sum(finals, []) + initials=[to_initials(customer_pinyin[index]) if customer_pinyin[index] != "" else item for index,item in enumerate(initials)] + finals=[to_finals_tone3(customer_pinyin[index]) if customer_pinyin[index] != "" else item for index,item in enumerate(finals)] + print("customer_pinyin:",customer_pinyin) print("pypinyin结果",initials,finals) else: # g2pw采用整句推理 @@ -215,7 +218,7 @@ def _g2p(segments): initials = sum(initials, []) finals = sum(finals, []) - # print("g2pw结果",initials,finals) + print("g2pw结果",initials,finals) for c, v in zip(initials, finals): raw_pinyin = c + v