Skip to content

Add TTS for node-addon-api #871

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/scripts/test-nodejs-addon-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ d=nodejs-addon-examples
echo "dir: $d"
cd $d

echo "----------streaming asr----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
Expand All @@ -31,6 +33,8 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
node ./test_asr_streaming_paraformer.js
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

echo "----------non-streaming asr----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
Expand Down Expand Up @@ -58,3 +62,35 @@ rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2

node ./test_asr_non_streaming_paraformer.js
rm -rf sherpa-onnx-paraformer-zh-2023-03-28

echo "----------tts----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2

node ./test_tts_non_streaming_vits_piper_en.js
rm -rf vits-piper-en_GB-cori-medium

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2

node ./test_tts_non_streaming_vits_coqui_de.js
rm -rf vits-coqui-de-css10

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

node ./test_tts_non_streaming_vits_zh_ll.js
rm -rf sherpa-onnx-vits-zh-ll

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2

node ./test_tts_non_streaming_vits_zh_aishell3.js
rm -rf vits-icefall-zh-aishell3

ls -lh
2 changes: 1 addition & 1 deletion .github/workflows/npm-addon-linux-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ jobs:
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
..

make -j
make -j2
make install
cd ..

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,4 @@ sherpa-onnx-ced-*
node_modules
package-lock.json
sherpa-onnx-nemo-*
sherpa-onnx-vits-*
40 changes: 40 additions & 0 deletions nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,43 @@ node ./test_asr_non_streaming_paraformer.js
npm install naudiodon2
node ./test_vad_asr_non_streaming_paraformer_microphone.js
```

## Text-to-speech with piper VITS models (TTS)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
rm vits-piper-en_GB-cori-medium.tar.bz2

node ./test_tts_non_streaming_vits_piper_en.js
```

## Text-to-speech with piper Coqui-ai/TTS models (TTS)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
tar xvf vits-coqui-de-css10.tar.bz2
rm vits-coqui-de-css10.tar.bz2

node ./test_tts_non_streaming_vits_coqui_de.js
```

## Text-to-speech with vits Chinese models (1/2)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
rm sherpa-onnx-vits-zh-ll.tar.bz2

node ./test_tts_non_streaming_vits_zh_ll.js
```

## Text-to-speech with vits Chinese models (2/2)

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2

node ./test_tts_non_streaming_vits_zh_aishell3.js
```
43 changes: 43 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-coqui-de-css10/model.onnx',
tokens: './vits-coqui-de-css10/tokens.txt',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text = 'Alles hat ein Ende, nur die Wurst hat zwei.'

let start = performance.now();
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-coqui-de.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
46 changes: 46 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_piper_en.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx',
tokens: './vits-piper-en_GB-cori-medium/tokens.txt',
dataDir: './vits-piper-en_GB-cori-medium/espeak-ng-data',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'


let start = performance.now();
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-piper-en.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
48 changes: 48 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_zh_aishell3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './vits-icefall-zh-aishell3/model.onnx',
tokens: './vits-icefall-zh-aishell3/tokens.txt',
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
ruleFsts:
'./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
ruleFars: './vits-icefall-zh-aishell3/rule.far',
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
'他在长沙出生,长白山长大,去过长江,现在他是一个银行的行长,主管行政工作。有困难,请拨110,或者13020240513。今天是2024年5月13号, 他上个月的工资是12345块钱。'

let start = performance.now();
const audio = tts.generate({text: text, sid: 88, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-zh-aishell3.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
48 changes: 48 additions & 0 deletions nodejs-addon-examples/test_tts_non_streaming_vits_zh_ll.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;

// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
function createOfflineTts() {
const config = {
model: {
vits: {
model: './sherpa-onnx-vits-zh-ll/model.onnx',
tokens: './sherpa-onnx-vits-zh-ll/tokens.txt',
lexicon: './sherpa-onnx-vits-zh-ll/lexicon.txt',
dictDir: './sherpa-onnx-vits-zh-ll/dict',
},
debug: true,
numThreads: 1,
provider: 'cpu',
},
maxNumStences: 1,
ruleFsts:
'./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/number.fst',
};
return new sherpa_onnx.OfflineTts(config);
}

const tts = createOfflineTts();

const text =
'当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月13号,拨打110或者18920240513。123456块钱。'

let start = performance.now();
const audio = tts.generate({text: text, sid: 2, speed: 1.0});
let stop = performance.now();
const elapsed_seconds = (stop - start) / 1000;
const duration = audio.samples.length / audio.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))

const filename = 'test-zh-ll.wav';
sherpa_onnx.writeWave(
filename, {samples: audio.samples, sampleRate: audio.sampleRate});

console.log(`Saved to ${filename}`);
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});

index += 1;
}
Expand Down
2 changes: 1 addition & 1 deletion nodejs-addon-examples/test_vad_microphone.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ ai.on('data', data => {
.split(' ')[0]}.wav`;
sherpa_onnx.writeWave(
filename,
{samples: segment.samples, sampleRate: vad.config.sampleRate})
{samples: segment.samples, sampleRate: vad.config.sampleRate});
const duration = segment.samples.length / vad.config.sampleRate;
console.log(`${index} End of speech. Duration: ${duration} seconds`);
console.log(`Saved to ${filename}`);
Expand Down
1 change: 1 addition & 0 deletions scripts/node-addon-api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC})

set(srcs
src/non-streaming-asr.cc
src/non-streaming-tts.cc
src/sherpa-onnx-node-addon-api.cc
src/streaming-asr.cc
src/vad.cc
Expand Down
4 changes: 2 additions & 2 deletions scripts/node-addon-api/lib/addon.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ for (const p of possible_paths) {
}

if (!found) {
let msg =
`Could not find sherpa-onnx. Tried\n\n ${possible_paths.join('\n ')}\n`
let msg = `Could not find sherpa-onnx-node. Tried\n\n ${
possible_paths.join('\n ')}\n`
if (os.platform() == 'darwin' && process.env.DYLD_LIBRARY_PATH &&
!process.env.DYLD_LIBRARY_PATH.includes(
`node_modules/sherpa-onnx-${platform_arch}`)) {
Expand Down
25 changes: 25 additions & 0 deletions scripts/node-addon-api/lib/non-streaming-tts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
const addon = require('./addon.js');

class OfflineTts {
constructor(config) {
this.handle = addon.createOfflineTts(config);
this.config = config;

this.numSpeakers = addon.getOfflineTtsNumSpeakers(this.handle);
this.sampleRate = addon.getOfflineTtsSampleRate(this.handle);
}

/*
input obj: {text: "xxxx", sid: 0, speed: 1.0}
where text is a string, sid is a int32, speed is a float

return an object {samples: Float32Array, sampleRate: <a number>}
*/
generate(obj) {
return addon.offlineTtsGenerate(this.handle, obj);
}
}

module.exports = {
OfflineTts,
}
Loading
Loading