diff --git a/.github/workflows/export-kokoro.yaml b/.github/workflows/export-kokoro.yaml index 360a14b7b9..94e9fe121d 100644 --- a/.github/workflows/export-kokoro.yaml +++ b/.github/workflows/export-kokoro.yaml @@ -3,7 +3,7 @@ name: export-kokoro-to-onnx on: push: branches: - - fix-export-kokoro-1.0-2 + - refactor-kokoro-2 workflow_dispatch: @@ -34,24 +34,94 @@ jobs: - name: Install Python dependencies shell: bash run: | - pip install kokoro "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch + pip install kokoro "numpy<=1.26.4" onnx==1.16.0 onnxruntime==1.17.1 librosa soundfile piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html misaki[en] misaki[zh] torch==2.6.0+cpu -f https://download.pytorch.org/whl/torch sherpa-onnx - name: Run + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} shell: bash run: | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 tar xf espeak-ng-data.tar.bz2 rm espeak-ng-data.tar.bz2 + cp -a ./espeak-ng-data ./scripts/kokoro/v0.19 + cp -a ./espeak-ng-data ./scripts/kokoro/v1.0 + cp -a ./espeak-ng-data ./scripts/kokoro/v1.1-zh + + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + cd scripts/kokoro v=${{ matrix.version }} if [[ $v = "0.19" ]]; then + cd v0.19 ./run.sh + + if false; then + # generate samples + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf + mkdir -p hf/kokoro/v0.19/mp3 + ./generate_samples.py + pushd hf + git pull + git add . + git commit -m 'add kokoro samples for v0.19' + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main + popd + rm -rf hf + fi + elif [[ $v == "1.0" ]]; then cd v1.0 ./run.sh + + if false; then + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf + mkdir -p hf/kokoro/v1.0/mp3 + + curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2 + tar xvf dict.tar.bz2 + rm dict.tar.bz2 + + curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst + curl -SL -o number-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst + curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst + + ./generate_samples.py + pushd hf + git pull + git add . + git commit -m 'add kokoro samples for v1.0' + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main + popd + rm -rf hf + fi + elif [[ $v == "1.1-zh" ]]; then cd v1.1-zh ./run.sh + + if false; then + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf + mkdir -p hf/kokoro/v1.1-zh/mp3 + + curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2 + tar xvf dict.tar.bz2 + rm dict.tar.bz2 + + curl -SL -o date-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst + curl -SL -o number-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst + curl -SL -o phone-zh.fst https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst + + ./generate_samples.py + pushd hf + git pull + git add . + git commit -m 'add kokoro samples for v1.1-zh' + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main + popd + rm -rf hf + fi else echo "Unknown version $v" exit 1 @@ -61,19 +131,39 @@ jobs: if: matrix.version == '0.19' shell: bash run: | - src=scripts/kokoro + src=scripts/kokoro/v0.19 d=kokoro-en-v0_19 + mkdir $d cp -a LICENSE $d/LICENSE cp -a espeak-ng-data $d/ - cp -v $src/kokoro-v0_19.onnx $d/model.onnx + cp -v $src/model.onnx $d/model.onnx cp -v $src/voices.bin $d/ cp -v $src/tokens.txt $d/ - cp -v $src/README-new.md $d/README.md + cp -v $src/../README.md $d/README.md + ls -lh $d/ + tar cjfv $d.tar.bz2 $d + + ls -lh $d.tar.bz2 + + - name: Collect results 0.19 (int8) + if: matrix.version == '0.19' + shell: bash + run: | + src=scripts/kokoro/v0.19 + + d=kokoro-int8-en-v0_19 + + mkdir $d + cp -a LICENSE $d/LICENSE + cp -a espeak-ng-data $d/ + cp -v $src/model.int8.onnx $d/model.int8.onnx + cp -v $src/voices.bin $d/ + cp -v $src/tokens.txt $d/ + cp -v $src/../README.md $d/README.md ls -lh $d/ tar cjfv $d.tar.bz2 $d - rm -rf $d ls -lh $d.tar.bz2 @@ -219,41 +309,39 @@ jobs: git config --global user.email "csukuangfj@gmail.com" git config --global user.name "Fangjun Kuang" - rm -rf huggingface + dirs=( + kokoro-en-v0_19 + # kokoro-int8-en-v0_19 + ) + export GIT_LFS_SKIP_SMUDGE=1 export GIT_CLONE_PROTECTION_ACTIVE=false + for d in ${dirs[@]}; do + rm -rf huggingface - git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 huggingface - cd huggingface - rm -rf ./* - git fetch - git pull + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 huggingface + cd huggingface + rm -rf ./* - git lfs track "cmn_dict" - git lfs track "ru_dict" - git lfs track "*.wav" + git lfs track "*.onnx" + git lfs track af_dict + git lfs track ar_dict + git lfs track cmn_dict + git lfs track da_dict en_dict fa_dict hu_dict ia_dict it_dict lb_dict phondata ru_dict ta_dict + git lfs track ur_dict yue_dict - cp -a ../espeak-ng-data ./ - mkdir -p test_wavs - cp -v ../scripts/kokoro/kokoro-v0_19.onnx ./model.onnx + cp -a ../$d ./ - cp -v ../scripts/kokoro/kokoro-v0_19-*.wav ./test_wavs/ + git add . - cp -v ../scripts/kokoro/tokens.txt . - cp -v ../scripts/kokoro/voices.bin . - cp -v ../scripts/kokoro/README-new.md ./README.md - cp -v ../LICENSE ./ + ls -lh - git lfs track "*.onnx" - git add . + git status - ls -lh - - git status - - git commit -m "add models" - git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true + git commit -m "add models" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-en-v0_19 main || true + done - name: Publish to huggingface 1.0 float32 if: matrix.version == '1.0' diff --git a/scripts/kokoro/.gitignore b/scripts/kokoro/.gitignore index a4343f2f1d..9e3cdcf10a 100644 --- a/scripts/kokoro/.gitignore +++ b/scripts/kokoro/.gitignore @@ -1,3 +1,4 @@ +espeak-ng-data voices.json voices.bin README-new.md diff --git a/scripts/kokoro/README.md b/scripts/kokoro/README.md index 5a0e09c29a..2628230d2e 100644 --- a/scripts/kokoro/README.md +++ b/scripts/kokoro/README.md @@ -1,10 +1,6 @@ # Introduction -This folder contains scripts for adding meta data to models -from https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files - -See also -https://huggingface.co/hexgrad/Kokoro-82M/tree/main +Please see also +https://huggingface.co/hexgrad/Kokoro-82M and -https://huggingface.co/spaces/hexgrad/Kokoro-TTS - +https://huggingface.co/hexgrad/Kokoro-82M/discussions/14 diff --git a/scripts/kokoro/add_meta_data.py b/scripts/kokoro/add_meta_data.py deleted file mode 100755 index 5dfa74d898..0000000000 --- a/scripts/kokoro/add_meta_data.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) - - -import argparse -import json -from pathlib import Path - -import numpy as np -import onnx - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--model", type=str, required=True, help="input and output onnx model" - ) - - parser.add_argument("--voices", type=str, required=True, help="Path to voices.json") - return parser.parse_args() - - -def load_voices(filename): - with open(filename) as f: - voices = json.load(f) - for key in voices: - voices[key] = np.array(voices[key], dtype=np.float32) - return voices - - -def get_vocab(): - _pad = "$" - _punctuation = ';:,.!?¡¿—…"«»“” ' - _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" - symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa) - dicts = {} - for i in range(len((symbols))): - dicts[symbols[i]] = i - return dicts - - -def generate_tokens(): - token2id = get_vocab() - with open("tokens.txt", "w", encoding="utf-8") as f: - for s, i in token2id.items(): - f.write(f"{s} {i}\n") - - -def main(): - args = get_args() - print(args.model, args.voices) - - model = onnx.load(args.model) - voices = load_voices(args.voices) - - if Path("./tokens.txt").is_file(): - print("./tokens.txt exist, skip generating it") - else: - generate_tokens() - - keys = list(voices.keys()) - print(",".join(keys)) - - if Path("./voices.bin").is_file(): - print("./voices.bin exists, skip generating it") - else: - with open("voices.bin", "wb") as f: - for k in keys: - f.write(voices[k].tobytes()) - - speaker2id_str = "" - id2speaker_str = "" - sep = "" - for i, s in enumerate(keys): - speaker2id_str += f"{sep}{s}->{i}" - id2speaker_str += f"{sep}{i}->{s}" - sep = "," - - meta_data = { - "model_type": "kokoro", - "language": "English", - "has_espeak": 1, - "sample_rate": 24000, - "version": 1, - "voice": "en-us", - "style_dim": ",".join(map(str, voices[keys[0]].shape)), - "n_speakers": len(keys), - "speaker2id": speaker2id_str, - "id2speaker": id2speaker_str, - "speaker_names": ",".join(keys), - "model_url": "https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files", - "see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS", - "see_also_2": "https://huggingface.co/hexgrad/Kokoro-82M", - "maintainer": "k2-fsa", - } - - print(model.metadata_props) - - while len(model.metadata_props): - model.metadata_props.pop() - - for key, value in meta_data.items(): - meta = model.metadata_props.add() - meta.key = key - meta.value = str(value) - print("--------------------") - - print(model.metadata_props) - - onnx.save(model, args.model) - - print(f"Please see {args.model}, ./voices.bin, and ./tokens.txt") - - -if __name__ == "__main__": - main() diff --git a/scripts/kokoro/run.sh b/scripts/kokoro/run.sh deleted file mode 100755 index e1fae471f8..0000000000 --- a/scripts/kokoro/run.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) - -set -ex - -cat > README-new.md <{i}" + id2speaker_str += f"{sep}{i}->{s}" + sep = "," + + meta_data = { + "model_type": "kokoro", + "language": "English", + "has_espeak": 1, + "sample_rate": 24000, + "version": 1, + "voice": "en-us", + "style_dim": ",".join(map(str, style.shape)), + "n_speakers": len(speaker2id), + "speaker2id": speaker2id_str, + "id2speaker": id2speaker_str, + "speaker_names": ",".join(map(str, speaker2id.keys())), + "model_url": "https://huggingface.co/hexgrad/kLegacy/", + "see_also": "https://huggingface.co/spaces/hexgrad/Kokoro-TTS", + "maintainer": "k2-fsa", + "comment": "This is kokoro v0.19 and supports only English", + } + + print(model.metadata_props) + + while len(model.metadata_props): + model.metadata_props.pop() + + for key, value in meta_data.items(): + meta = model.metadata_props.add() + meta.key = key + meta.value = str(value) + print("--------------------") + + print(model.metadata_props) + + onnx.save(model, args.model) + + print(f"Please see {args.model}, ./voices.bin, and ./tokens.txt") + + +if __name__ == "__main__": + main() diff --git a/scripts/kokoro/v0.19/dynamic_quantization.py b/scripts/kokoro/v0.19/dynamic_quantization.py new file mode 100755 index 0000000000..77428f4f25 --- /dev/null +++ b/scripts/kokoro/v0.19/dynamic_quantization.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) + +from pathlib import Path + +import onnxruntime +from onnxruntime.quantization import QuantType, quantize_dynamic + + +def show(filename): + session_opts = onnxruntime.SessionOptions() + session_opts.log_severity_level = 3 + sess = onnxruntime.InferenceSession(filename, session_opts) + for i in sess.get_inputs(): + print(i) + + print("-----") + + for i in sess.get_outputs(): + print(i) + + +""" +NodeArg(name='tokens', type='tensor(int64)', shape=[1, 'tokens1']) +NodeArg(name='style', type='tensor(float)', shape=[1, 256]) +NodeArg(name='speed', type='tensor(float)', shape=[1]) +----- +NodeArg(name='audio', type='tensor(float)', shape=['audio0']) +""" + + +def main(): + show("./model.onnx") + + if not Path("./model.int8.onnx").is_file(): + quantize_dynamic( + model_input="model.onnx", + model_output="model.int8.onnx", + # op_types_to_quantize=["MatMul"], + weight_type=QuantType.QUInt8, + ) + else: + print("./model.int8.onnx exists - skip") + + +if __name__ == "__main__": + main() diff --git a/scripts/kokoro/v0.19/generate_samples.py b/scripts/kokoro/v0.19/generate_samples.py new file mode 100755 index 0000000000..f4adde2d4a --- /dev/null +++ b/scripts/kokoro/v0.19/generate_samples.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) +""" +Generate samples for +https://k2-fsa.github.io/sherpa/onnx/tts/all/ +""" + +import sherpa_onnx +import soundfile as sf + +from generate_voices_bin import speaker2id + +config = sherpa_onnx.OfflineTtsConfig( + model=sherpa_onnx.OfflineTtsModelConfig( + kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig( + model="./model.onnx", + voices="./voices.bin", + tokens="./tokens.txt", + data_dir="./espeak-ng-data", + ), + num_threads=2, + ), + max_num_sentences=1, +) + +if not config.validate(): + raise ValueError("Please check your config") + +tts = sherpa_onnx.OfflineTts(config) +text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." + +for s, i in speaker2id.items(): + print(s, i, len(speaker2id)) + audio = tts.generate(text, sid=i, speed=1.0) + + sf.write( + f"./hf/kokoro/v0.19/mp3/{i}-{s}.mp3", + audio.samples, + samplerate=audio.sample_rate, + ) diff --git a/scripts/kokoro/v0.19/generate_tokens.py b/scripts/kokoro/v0.19/generate_tokens.py new file mode 100755 index 0000000000..9caa6edccd --- /dev/null +++ b/scripts/kokoro/v0.19/generate_tokens.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) + + +def get_vocab(): + # https://huggingface.co/hexgrad/kLegacy/blob/main/v0.19/kokoro.py#L75 + _pad = "$" + _punctuation = ';:,.!?¡¿—…"«»“” ' + _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ" + symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa) + dicts = {} + for i in range(len((symbols))): + dicts[symbols[i]] = i + return dicts + + +def main(): + token2id = get_vocab() + with open("tokens.txt", "w", encoding="utf-8") as f: + for s, i in token2id.items(): + f.write(f"{s} {i}\n") + + +if __name__ == "__main__": + main() diff --git a/scripts/kokoro/v0.19/generate_voices_bin.py b/scripts/kokoro/v0.19/generate_voices_bin.py new file mode 100755 index 0000000000..cc4d2f3a9f --- /dev/null +++ b/scripts/kokoro/v0.19/generate_voices_bin.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) +import torch +from pathlib import Path + + +id2speaker = { + 0: "af", + 1: "af_bella", + 2: "af_nicole", + 3: "af_sarah", + 4: "af_sky", + 5: "am_adam", + 6: "am_michael", + 7: "bf_emma", + 8: "bf_isabella", + 9: "bm_george", + 10: "bm_lewis", +} + +speaker2id = {speaker: idx for idx, speaker in id2speaker.items()} + + +def main(): + if Path("./voices.bin").is_file(): + print("./voices.bin exists - skip") + return + + with open("voices.bin", "wb") as f: + for _, speaker in id2speaker.items(): + m = torch.load( + f"kLegacy/v0.19/voices/{speaker}.pt", + weights_only=True, + map_location="cpu", + ).numpy() + # m.shape (511, 1, 256) + + f.write(m.tobytes()) + + +if __name__ == "__main__": + main() diff --git a/scripts/kokoro/v0.19/run.sh b/scripts/kokoro/v0.19/run.sh new file mode 100755 index 0000000000..0bd059d757 --- /dev/null +++ b/scripts/kokoro/v0.19/run.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) + +set -ex + +cat > README-new.md < Dict[str, int]: @@ -171,10 +173,6 @@ def __call__(self, text: str, voice): return audio -def test(model, voice, text) -> np.ndarray: - pass - - def main(): args = get_args() print(vars(args)) diff --git a/scripts/kokoro/v1.0/generate_samples.py b/scripts/kokoro/v1.0/generate_samples.py new file mode 100755 index 0000000000..8f864ed416 --- /dev/null +++ b/scripts/kokoro/v1.0/generate_samples.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) +""" +Generate samples for +https://k2-fsa.github.io/sherpa/onnx/tts/all/ +""" + +import sherpa_onnx +import soundfile as sf + +from generate_voices_bin import speaker2id + +config = sherpa_onnx.OfflineTtsConfig( + model=sherpa_onnx.OfflineTtsModelConfig( + kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig( + model="./kokoro.onnx", + voices="./voices.bin", + tokens="./tokens.txt", + data_dir="./espeak-ng-data", + dict_dir="./dict", + lexicon="./lexicon-zh.txt,./lexicon-us-en.txt", + ), + num_threads=2, + debug=True, + ), + rule_fsts="./phone-zh.fst,./date-zh.fst,./number-zh.fst", + max_num_sentences=1, +) + +if not config.validate(): + raise ValueError("Please check your config") + +tts = sherpa_onnx.OfflineTts(config) +text = "This model supports both Chinese and English. 小米的核心价值观是什么?答案是真诚热爱!有困难,请拨打110 或者18601200909。I am learning 机器学习. 我在研究 machine learning。What do you think 中英文说的如何呢? 今天是 2025年6月18号." + +print("text", text) + +for s, i in speaker2id.items(): + print(s, i, len(speaker2id)) + audio = tts.generate(text, sid=i, speed=1.0) + + sf.write( + f"./hf/kokoro/v1.0/mp3/{i}-{s}.mp3", + audio.samples, + samplerate=audio.sample_rate, + ) diff --git a/scripts/kokoro/v1.1-zh/generate_samples.py b/scripts/kokoro/v1.1-zh/generate_samples.py new file mode 100755 index 0000000000..b0f9a815d3 --- /dev/null +++ b/scripts/kokoro/v1.1-zh/generate_samples.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) +""" +Generate samples for +https://k2-fsa.github.io/sherpa/onnx/tts/all/ +""" + +import sherpa_onnx +import soundfile as sf + +from generate_voices_bin import speaker2id + +config = sherpa_onnx.OfflineTtsConfig( + model=sherpa_onnx.OfflineTtsModelConfig( + kokoro=sherpa_onnx.OfflineTtsKokoroModelConfig( + model="./kokoro.onnx", + voices="./voices.bin", + tokens="./tokens.txt", + data_dir="./espeak-ng-data", + dict_dir="./dict", + lexicon="./lexicon-zh.txt,./lexicon-us-en.txt", + ), + num_threads=2, + debug=True, + ), + rule_fsts="./phone-zh.fst,./date-zh.fst,./number-zh.fst", + max_num_sentences=1, +) + +if not config.validate(): + raise ValueError("Please check your config") + +tts = sherpa_onnx.OfflineTts(config) +text = "This model supports both Chinese and English. 小米的核心价值观是什么?答案是真诚热爱!有困难,请拨打110 或者18601200909。I am learning 机器学习. 我在研究 machine learning。What do you think 中英文说的如何呢? 今天是 2025年6月18号." + +print("text", text) + +for s, i in speaker2id.items(): + print(s, i, len(speaker2id)) + audio = tts.generate(text, sid=i, speed=1.0) + + sf.write( + f"./hf/kokoro/v1.1-zh/mp3/{i}-{s}.mp3", + audio.samples, + samplerate=audio.sample_rate, + ) diff --git a/scripts/kokoro/v1.1-zh/run.sh b/scripts/kokoro/v1.1-zh/run.sh index 09ea2112e5..8d439de7c0 100755 --- a/scripts/kokoro/v1.1-zh/run.sh +++ b/scripts/kokoro/v1.1-zh/run.sh @@ -11,6 +11,8 @@ fi if [ ! -f config.json ]; then # see https://huggingface.co/hexgrad/Kokoro-82M/blob/main/config.json curl -SL -O https://huggingface.co/hexgrad/Kokoro-82M-v1.1-zh/resolve/main/config.json + mkdir -p Kokoro-82M + cp ./config.json ./Kokoro-82M fi voices=( diff --git a/sherpa-onnx/python/csrc/offline-tts.cc b/sherpa-onnx/python/csrc/offline-tts.cc index e811bc4b50..5958096119 100644 --- a/sherpa-onnx/python/csrc/offline-tts.cc +++ b/sherpa-onnx/python/csrc/offline-tts.cc @@ -34,7 +34,7 @@ static void PybindOfflineTtsConfig(py::module *m) { .def(py::init(), py::arg("model"), py::arg("rule_fsts") = "", - py::arg("rule_fars") = "", py::arg("max_num_sentences") = 2, + py::arg("rule_fars") = "", py::arg("max_num_sentences") = 1, py::arg("silence_scale") = 0.2) .def_readwrite("model", &PyClass::model) .def_readwrite("rule_fsts", &PyClass::rule_fsts)