Skip to content

Commit fa2af5d

Browse files
authored
Add TTS demo for C# API (#557)
1 parent 035a82d commit fa2af5d

File tree

13 files changed

+500
-9
lines changed

13 files changed

+500
-9
lines changed

.github/workflows/test-dot-net-nuget.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,19 @@ jobs:
5555
./run-zipformer.sh
5656
./run-whisper.sh
5757
./run-tdnn-yesno.sh
58+
59+
cd ../offline-tts
60+
./run-aishell3.sh
61+
./run-piper.sh
62+
ls -lh
63+
64+
cd ../..
65+
66+
mkdir tts
67+
68+
cp dotnet-examples/offline-tts/*.wav ./tts
69+
70+
- uses: actions/upload-artifact@v3
71+
with:
72+
name: dot-net-tts-generated-test-files-${{ matrix.os }}
73+
path: tts

.github/workflows/test-dot-net.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ jobs:
131131
- name: Copy files
132132
shell: bash
133133
run: |
134+
cp -v scripts/dotnet/examples/offline-tts.csproj dotnet-examples/offline-tts/
134135
cp -v scripts/dotnet/examples/offline-decode-files.csproj dotnet-examples/offline-decode-files/
135136
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
136137
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
@@ -153,3 +154,19 @@ jobs:
153154
./run-zipformer.sh
154155
./run-whisper.sh
155156
./run-tdnn-yesno.sh
157+
158+
cd ../offline-tts
159+
./run-aishell3.sh
160+
./run-piper.sh
161+
ls -lh
162+
163+
cd ../..
164+
165+
mkdir tts
166+
167+
cp dotnet-examples/offline-tts/*.wav ./tts
168+
169+
- uses: actions/upload-artifact@v3
170+
with:
171+
name: dot-net-tts-generated-test-files-${{ matrix.os }}
172+
path: tts

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
22
project(sherpa-onnx)
33

4-
set(SHERPA_ONNX_VERSION "1.9.9")
4+
set(SHERPA_ONNX_VERSION "1.9.10")
55

66
# Disable warning about
77
#

dotnet-examples/.notes

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# How to create a new project in this folder
2+
3+
```bash
4+
mkdir offline-tts
5+
cd offline-tts
6+
dotnet new console
7+
cd ..
8+
dotnet sln ./sherpa-onnx.sln add ./offline-tts
9+
```
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
//
3+
// This file shows how to use a non-streaming TTS model for text-to-speech
4+
// Please refer to
5+
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
6+
// and
7+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
8+
// to download pre-trained models
9+
using CommandLine.Text;
10+
using CommandLine;
11+
using SherpaOnnx;
12+
using System.Collections.Generic;
13+
using System;
14+
15+
class OfflineTtsDemo
16+
{
17+
class Options
18+
{
19+
20+
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
21+
public string RuleFsts { get; set; }
22+
23+
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
24+
public string DataDir { get; set; }
25+
26+
[Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
27+
public float LengthScale { get; set; }
28+
29+
[Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
30+
public float NoiseScale { get; set; }
31+
32+
[Option("vits-noise-scale-w", Required = false, Default = 0.8f, HelpText = "noise_scale_w for VITS models")]
33+
public float NoiseScaleW { get; set; }
34+
35+
[Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
36+
public string Lexicon { get; set; }
37+
38+
[Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
39+
public string Tokens { get; set; }
40+
41+
[Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
42+
public int MaxNumSentences { get; set; }
43+
44+
[Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
45+
public int Debug { get; set; }
46+
47+
[Option("vits-model", Required = true, HelpText = "Path to VITS model")]
48+
public string Model { get; set; }
49+
50+
[Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
51+
public int SpeakerId { get; set; }
52+
53+
[Option("text", Required = true, HelpText = "Text to synthesize")]
54+
public string Text { get; set; }
55+
56+
[Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
57+
public string OutputFilename { get; set; }
58+
}
59+
60+
static void Main(string[] args)
61+
{
62+
var parser = new CommandLine.Parser(with => with.HelpWriter = null);
63+
var parserResult = parser.ParseArguments<Options>(args);
64+
65+
parserResult
66+
.WithParsed<Options>(options => Run(options))
67+
.WithNotParsed(errs => DisplayHelp(parserResult, errs));
68+
}
69+
70+
private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
71+
{
72+
string usage = @"
73+
# vits-aishell3
74+
75+
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
76+
tar xf vits-zh-aishell3.tar.bz2
77+
78+
dotnet run \
79+
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
80+
--vits-tokens=./vits-zh-aishell3/tokens.txt \
81+
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
82+
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
83+
--sid=66 \
84+
--debug=1 \
85+
--output-filename=./aishell3-66.wav \
86+
--text=这是一个语音合成测试
87+
88+
# Piper models
89+
90+
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
91+
tar xf vits-piper-en_US-amy-low.tar.bz2
92+
93+
dotnet run \
94+
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
95+
--vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
96+
--vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
97+
--debug=1 \
98+
--output-filename=./amy.wav \
99+
--text='This is a text to speech application in dotnet with Next Generation Kaldi'
100+
101+
Please refer to
102+
https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
103+
to download more models.
104+
";
105+
106+
var helpText = HelpText.AutoBuild(result, h =>
107+
{
108+
h.AdditionalNewLineAfterOption = false;
109+
h.Heading = usage;
110+
h.Copyright = "Copyright (c) 2024 Xiaomi Corporation";
111+
return HelpText.DefaultParsingErrorsHandler(result, h);
112+
}, e => e);
113+
Console.WriteLine(helpText);
114+
}
115+
116+
private static void Run(Options options)
117+
{
118+
OfflineTtsConfig config = new OfflineTtsConfig();
119+
config.Model.Vits.Model = options.Model;
120+
config.Model.Vits.Lexicon = options.Lexicon;
121+
config.Model.Vits.Tokens = options.Tokens;
122+
config.Model.Vits.DataDir = options.DataDir;
123+
config.Model.Vits.NoiseScale = options.NoiseScale;
124+
config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
125+
config.Model.Vits.LengthScale = options.LengthScale;
126+
config.Model.NumThreads = 1;
127+
config.Model.Debug = options.Debug;
128+
config.Model.Provider = "cpu";
129+
config.RuleFsts = options.RuleFsts;
130+
config.MaxNumSentences = options.MaxNumSentences;
131+
132+
OfflineTts tts = new OfflineTts(config);
133+
float speed = 1.0f / options.LengthScale;
134+
int sid = options.SpeakerId;
135+
OfflineTtsGeneratedAudio audio = tts.Generate(options.Text, speed, sid);
136+
bool ok = audio.SaveToWaveFile(options.OutputFilename);
137+
138+
if (ok)
139+
{
140+
Console.WriteLine($"Wrote to {options.OutputFilename} succeeded!");
141+
}
142+
else
143+
{
144+
Console.WriteLine($"Failed to write {options.OutputFilename}");
145+
}
146+
}
147+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<RootNamespace>offline_tts</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="CommandLineParser" Version="2.9.1" />
13+
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
14+
</ItemGroup>
15+
16+
</Project>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
3+
if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then
4+
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
5+
tar xf vits-zh-aishell3.tar.bz2
6+
rm vits-zh-aishell3.tar.bz2
7+
fi
8+
9+
dotnet run \
10+
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
11+
--vits-tokens=./vits-zh-aishell3/tokens.txt \
12+
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
13+
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
14+
--sid=66 \
15+
--debug=1 \
16+
--output-filename=./aishell3-66.wav \
17+
--text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。"
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env bash
2+
3+
if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then
4+
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
5+
tar xf vits-piper-en_US-amy-low.tar.bz2
6+
rm vits-piper-en_US-amy-low.tar.bz2
7+
fi
8+
9+
dotnet run \
10+
--vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
11+
--vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
12+
--vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
13+
--debug=1 \
14+
--output-filename=./amy.wav \
15+
--text="This is a text to speech application in dotnet with Next Generation Kaldi"
16+

dotnet-examples/sherpa-onnx.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-decode-files", "off
99
EndProject
1010
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-recognition-from-microphone", "speech-recognition-from-microphone\speech-recognition-from-microphone.csproj", "{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}"
1111
EndProject
12+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\offline-tts.csproj", "{72196886-7143-4043-96E2-BCACEC6C79EB}"
13+
EndProject
1214
Global
1315
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1416
Debug|Any CPU = Debug|Any CPU
@@ -30,5 +32,9 @@ Global
3032
{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}.Debug|Any CPU.Build.0 = Debug|Any CPU
3133
{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}.Release|Any CPU.ActiveCfg = Release|Any CPU
3234
{FE4EA1FF-062A-46B3-B78D-C828FED7B82E}.Release|Any CPU.Build.0 = Release|Any CPU
35+
{72196886-7143-4043-96E2-BCACEC6C79EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
36+
{72196886-7143-4043-96E2-BCACEC6C79EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
37+
{72196886-7143-4043-96E2-BCACEC6C79EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
38+
{72196886-7143-4043-96E2-BCACEC6C79EB}.Release|Any CPU.Build.0 = Release|Any CPU
3339
EndGlobalSection
3440
EndGlobal
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<RootNamespace>offline_tts</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<PropertyGroup>
12+
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
13+
</PropertyGroup>
14+
15+
<ItemGroup>
16+
<PackageReference Include="CommandLineParser" Version="2.9.1" />
17+
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
18+
</ItemGroup>
19+
20+
</Project>

0 commit comments

Comments
 (0)