Skip to content

Add JavaScript API (WASM) for homophone replacer #2157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,18 @@ tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

node ./test-offline-sense-voice.js

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

node ./test-offline-sense-voice-with-hr.js

rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
rm -rf dict replace.fst test-hr.wav lexicon.txt

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
ls -lh
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-wheels-linux-cuda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04]
os: [ubuntu-22.04]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-wheels-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
os: [ubuntu-22.04]
python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312", "cp313"]
manylinux: [manylinux2014] #, manylinux_2_28]

Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/test-build-wheel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ jobs:
matrix:
# See https://github.com/actions/runner-images
include:
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.7"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.8"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.9"
- os: ubuntu-22.04
python-version: "3.10"
Expand All @@ -48,7 +48,7 @@ jobs:
- os: ubuntu-22.04
python-version: "3.12"

- os: macos-12
- os: macos-13
python-version: "3.8"

- os: macos-13
Expand Down Expand Up @@ -137,8 +137,8 @@ jobs:
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH

which sherpa-onnx
sherpa-onnx --help
2 changes: 1 addition & 1 deletion .github/workflows/test-nodejs-addon-npm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-latest, macos-14, ubuntu-20.04, ubuntu-22.04, windows-latest]
os: [macos-latest, macos-14, ubuntu-latest, ubuntu-22.04, windows-latest]
node-version: ["16", "17", "18", "19", "21", "22"]

steps:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/test-pip-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ jobs:
matrix:
# See https://github.com/actions/runner-images
include:
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.7"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.8"
- os: ubuntu-20.04
- os: ubuntu-22.04
python-version: "3.9"
- os: ubuntu-22.04
python-version: "3.10"
Expand All @@ -45,7 +45,7 @@ jobs:
- os: ubuntu-22.04
python-version: "3.13"

- os: macos-12
- os: macos-13
python-version: "3.8"

- os: macos-13
Expand Down Expand Up @@ -110,8 +110,8 @@ jobs:
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.9/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.2/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.12.10/x64/bin:$PATH
export PATH=/c/hostedtoolcache/windows/Python/3.13.3/x64/bin:$PATH

sherpa-onnx --help
sherpa-onnx-keyword-spotter --help
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14]
os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14]
python-version: ["3.10"]
model_type: ["transducer", "paraformer", "nemo_ctc", "whisper", "tdnn"]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-python-online-websocket-server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-22.04, windows-latest, macos-latest, macos-14]
os: [ubuntu-latest, ubuntu-22.04, windows-latest, macos-latest, macos-14]
python-version: ["3.10"]
model_type: ["transducer", "paraformer", "zipformer2-ctc"]

Expand Down
24 changes: 23 additions & 1 deletion nodejs-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,32 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
node ./test-offline-paraformer.js
```

## ./test-offline-sense-voice-with-hr.js

[./test-offline-sense-voice-with-hr.js](./test-offline-sense-voice-with-hr.js) demonstrates
how to decode a file with a non-streaming SenseVoice model with homophone replacer.

You can use the following command to run it:

```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

node ./test-offline-sense-voice-with-hr.js
```

## ./test-offline-sense-voice.js

[./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates
how to decode a file with a non-streaming Paraformer model.
how to decode a file with a non-streaming SenseVoice model.

You can use the following command to run it:

Expand Down
40 changes: 40 additions & 0 deletions nodejs-examples/test-offline-sense-voice-with-hr.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright (c) 2024-2025 Xiaomi Corporation (authors: Fangjun Kuang)

const sherpa_onnx = require('sherpa-onnx');

function createOfflineRecognizer() {
let modelConfig = {
senseVoice: {
model:
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
language: '',
useInverseTextNormalization: 1,
},
tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
};

let config = {
modelConfig: modelConfig,
hr: {
dictDir: './dict',
lexicon: './lexicon.txt',
ruleFsts: './replace.fst',
},
};

return sherpa_onnx.createOfflineRecognizer(config);
}

const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();

const waveFilename = './test-hr.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);

recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);

stream.free();
recognizer.free();
65 changes: 61 additions & 4 deletions wasm/asr/sherpa-onnx-asr.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ function freeConfig(config, Module) {
freeConfig(config.ctcFstDecoder, Module)
}

if ('hr' in config) {
freeConfig(config.hr, Module)
}

Module._free(config.ptr);
}

Expand Down Expand Up @@ -281,6 +285,34 @@ function initSherpaOnnxFeatureConfig(config, Module) {
return {ptr: ptr, len: len};
}

function initSherpaOnnxHomophoneReplacerConfig(config, Module) {
const len = 3 * 4;
const ptr = Module._malloc(len);

const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;

const bufferLen = dictDirLen + lexiconLen + ruleFstsLen;

const buffer = Module._malloc(bufferLen);
let offset = 0
Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
offset += dictDirLen;

Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
offset += lexiconLen;

Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
offset += ruleFstsLen;

Module.setValue(ptr, buffer, 'i8*');
Module.setValue(ptr + 4, buffer + dictDirLen, 'i8*');
Module.setValue(ptr + 8, buffer + dictDirLen + lexiconLen, 'i8*');

return {ptr: ptr, len: len, buffer: buffer};
}

function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
const len = 2 * 4;
const ptr = Module._malloc(len);
Expand Down Expand Up @@ -317,12 +349,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
config.hotwordsBufSize = 0;
}

if (!('hr' in config)) {
config.hr = {
dictDir: '',
lexicon: '',
ruleFsts: '',
};
}

const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
config.ctcFstDecoderConfig, Module)
const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);

const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4;
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4 + hr.len;
const ptr = Module._malloc(len);

let offset = 0;
Expand Down Expand Up @@ -411,9 +452,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32');
offset += 4;

Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
offset += hr.len;

return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
ctcFstDecoder: ctcFstDecoder
ctcFstDecoder: ctcFstDecoder, hr: hr,
}
}

Expand Down Expand Up @@ -989,11 +1033,20 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
};
}

if (!('hr' in config)) {
config.hr = {
dictDir: '',
lexicon: '',
ruleFsts: '',
};
}

const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);

const len = feat.len + model.len + lm.len + 7 * 4;
const len = feat.len + model.len + lm.len + 7 * 4 + hr.len;
const ptr = Module._malloc(len);

let offset = 0;
Expand Down Expand Up @@ -1056,8 +1109,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
offset += 4;

Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
offset += hr.len;

return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm,
hr: hr,
}
}

Expand Down
8 changes: 7 additions & 1 deletion wasm/asr/sherpa-onnx-wasm-main-asr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
sizeof(SherpaOnnxFeatureConfig) +
sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4,
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4 +
sizeof(SherpaOnnxHomophoneReplacerConfig),
"");

void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
Expand Down Expand Up @@ -82,6 +83,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
fprintf(stdout, "max_active: %d\n",
config->ctc_fst_decoder_config.max_active);

fprintf(stdout, "----------hr config----------\n");
fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
Expand Down
7 changes: 6 additions & 1 deletion wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
sizeof(SherpaOnnxFeatureConfig) +
sizeof(SherpaOnnxOfflineLMConfig) +
sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4,
sizeof(SherpaOnnxOfflineModelConfig) + 7 * 4 +
sizeof(SherpaOnnxHomophoneReplacerConfig),
"");

void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
Expand Down Expand Up @@ -137,6 +138,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty);
fprintf(stdout, "----------hr config----------\n");
fprintf(stdout, "dict_dir: %s\n", config->hr.dict_dir);
fprintf(stdout, "lexicon: %s\n", config->hr.lexicon);
fprintf(stdout, "rule_fsts: %s\n", config->hr.rule_fsts);
}

void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
Expand Down
Loading