Skip to content

Add keyword spotting API for node-addon-api #877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/scripts/node-addon/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fi
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"

# SHERPA_ONNX_VERSION=1.0.23
# SHERPA_ONNX_VERSION=1.0.24

if [ -z $owner ]; then
owner=k2-fsa
Expand Down
9 changes: 9 additions & 0 deletions .github/scripts/test-nodejs-addon-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ d=nodejs-addon-examples
echo "dir: $d"
cd $d

echo "----------keyword spotting----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

node ./test_keyword_spotter_transducer.js
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01

echo "----------add punctuations----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/npm-addon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:

SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
# SHERPA_ONNX_VERSION=1.0.23
# SHERPA_ONNX_VERSION=1.0.24

src_dir=.github/scripts/node-addon
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json
Expand Down
21 changes: 21 additions & 0 deletions nodejs-addon-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ The following tables list the examples in this folder.
|[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model|
|[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model|

## Keyword spotting

|File| Description|
|---|---|
|[./test_keyword_spotter_transducer.js](./test_keyword_spotter_transducer.js)| Keyword spotting from a file using a Zipformer model|
|[./test_keyword_spotter_transducer_microphone.js](./test_keyword_spotter_transducer_microphone.js)| Keyword spotting from a microphone using a Zipformer model|

## Streaming speech-to-text from files

|File| Description|
Expand Down Expand Up @@ -325,3 +332,17 @@ rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2

node ./test_punctuation.js
```

## Keyword spotting

```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2

node ./test_keyword_spotter_transducer.js

# To run keyword spotting using a microphone
npm install naudiodon2
node ./test_keyword_spotter_transducer_microphone.js
```
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,5 @@ ai.on('data', data => {
}
});

ai.on('close', () => {
console.log('Free resources');
stream.free();
recognizer.free();
});

ai.start();
console.log('Started! Please speak')
5 changes: 0 additions & 5 deletions nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,6 @@ ai.on('data', data => {
}
});

ai.on('close', () => {
console.log('Free resources');
stream.free();
recognizer.free();
});

ai.start();
console.log('Started! Please speak')
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,5 @@ ai.on('data', data => {
}
});

ai.on('close', () => {
console.log('Free resources');
stream.free();
recognizer.free();
});

ai.start();
console.log('Started! Please speak')
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,5 @@ ai.on('data', data => {
}
});

ai.on('close', () => {
console.log('Free resources');
stream.free();
recognizer.free();
});

ai.start();
console.log('Started! Please speak')
66 changes: 66 additions & 0 deletions nodejs-addon-examples/test_keyword_spotter_transducer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;


// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
const config = {
'featConfig': {
'sampleRate': 16000,
'featureDim': 80,
},
'modelConfig': {
'transducer': {
'encoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'decoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'joiner':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
},
'tokens':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
'numThreads': 1,
'provider': 'cpu',
'debug': 1,
},
'keywordsFile':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt',
};

const waveFilename =
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';

const kws = new sherpa_onnx.KeywordSpotter(config);
console.log('Started')
let start = performance.now();
const stream = kws.createStream();
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});

const tailPadding = new Float32Array(wave.sampleRate * 0.4);
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});

const detectedKeywords = [];
while (kws.isReady(stream)) {
const keyword = kws.getResult(stream).keyword;
if (keyword != '') {
detectedKeywords.push(keyword);
}
kws.decode(stream);
}
let stop = performance.now();

console.log('Done')

const elapsed_seconds = (stop - start) / 1000;
const duration = wave.samples.length / wave.sampleRate;
const real_time_factor = elapsed_seconds / duration;
console.log('Wave duration', duration.toFixed(3), 'secodns')
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
console.log(
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
real_time_factor.toFixed(3))
console.log(waveFilename)
console.log('result\n', detectedKeywords)
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
//
const portAudio = require('naudiodon2');
// console.log(portAudio.getDevices());

const sherpa_onnx = require('sherpa-onnx-node');

function createKeywordSpotter() {
const config = {
'featConfig': {
'sampleRate': 16000,
'featureDim': 80,
},
'modelConfig': {
'transducer': {
'encoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'decoder':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
'joiner':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
},
'tokens':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
'numThreads': 2,
'provider': 'cpu',
'debug': 1,
},
'keywordsFile':
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt',
};

return new sherpa_onnx.KeywordSpotter(config);
}

const kws = createKeywordSpotter();
const stream = kws.createStream();

let lastText = '';
let segmentIndex = 0;

const ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
closeOnError: true, // Close the stream if an audio error is detected, if
// set false then just log the error
deviceId: -1, // Use -1 or omit the deviceId to select the default device
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: kws.config.featConfig.sampleRate
}
});

const display = new sherpa_onnx.Display(50);

ai.on('data', data => {
const samples = new Float32Array(data.buffer);

stream.acceptWaveform(
{sampleRate: kws.config.featConfig.sampleRate, samples: samples});

while (kws.isReady(stream)) {
kws.decode(stream);
}

const keyword = kws.getResult(stream).keyword
if (keyword != '') {
display.print(segmentIndex, keyword);
segmentIndex += 1;
}
});

ai.start();
console.log('Started! Please speak.')
console.log(`Only words from ${kws.config.keywordsFile} can be recognized`)
1 change: 1 addition & 0 deletions scripts/node-addon-api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC})

set(srcs
src/audio-tagging.cc
src/keyword-spotting.cc
src/non-streaming-asr.cc
src/non-streaming-tts.cc
src/punctuation.cc
Expand Down
32 changes: 32 additions & 0 deletions scripts/node-addon-api/lib/keyword-spotter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
const addon = require('./addon.js');
const streaming_asr = require('./streaming-asr.js');

class KeywordSpotter {
constructor(config) {
this.handle = addon.createKeywordSpotter(config);
this.config = config
}

createStream() {
const handle = addon.createKeywordStream(this.handle);
return new streaming_asr.OnlineStream(handle);
}

isReady(stream) {
return addon.isKeywordStreamReady(this.handle, stream.handle);
}

decode(stream) {
addon.decodeKeywordStream(this.handle, stream.handle);
}

getResult(stream) {
const jsonStr = addon.getKeywordResultAsJson(this.handle, stream.handle);

return JSON.parse(jsonStr);
}
}

module.exports = {
KeywordSpotter,
}
2 changes: 2 additions & 0 deletions scripts/node-addon-api/lib/sherpa-onnx.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const slid = require('./spoken-language-identification.js');
const sid = require('./speaker-identification.js');
const at = require('./audio-tagg.js');
const punct = require('./punctuation.js');
const kws = require('./keyword-spotter.js');

module.exports = {
OnlineRecognizer: streaming_asr.OnlineRecognizer,
Expand All @@ -22,4 +23,5 @@ module.exports = {
SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager,
AudioTagging: at.AudioTagging,
Punctuation: punct.Punctuation,
KeywordSpotter: kws.KeywordSpotter,
}
Loading
Loading