Skip to content

Support replacing homophonic phrases #2153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/scripts/test-offline-ctc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,29 @@ for m in model.onnx model.int8.onnx; do
done
done

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

for m in model.onnx model.int8.onnx; do
for use_itn in 0 1; do
echo "$m $w $use_itn"
time $EXE \
--tokens=$repo/tokens.txt \
--sense-voice-model=$repo/$m \
--sense-voice-use-itn=$use_itn \
--hr-lexicon=./lexicon.txt \
--hr-dict-dir=./dict \
--hr-rule-fsts=./replace.fst \
./test-hr.wav
done
done

rm -rf dict replace.fst test-hr.wav lexicon.txt

# test wav reader for non-standard wav files
waves=(
Expand Down
12 changes: 12 additions & 0 deletions .github/scripts/test-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,18 @@ rm $name
ls -lh $repo
python3 ./python-api-examples/offline-sense-voice-ctc-decode-files.py

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt

python3 ./python-api-examples/offline-sense-voice-ctc-decode-files-with-hr.py

rm -rf dict replace.fst test-hr.wav lexicon.txt

if [[ $(uname) == Linux ]]; then
# It needs ffmpeg
log "generate subtitles (Chinese)"
Expand Down
16 changes: 8 additions & 8 deletions cmake/kaldifst.cmake
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
function(download_kaldifst)
include(FetchContent)

set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.11.tar.gz")
set(kaldifst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldifst-1.7.11.tar.gz")
set(kaldifst_HASH "SHA256=b43b3332faa2961edc730e47995a58cd4e22ead21905d55b0c4a41375b4a525f")
set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.13.tar.gz")
set(kaldifst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldifst-1.7.13.tar.gz")
set(kaldifst_HASH "SHA256=f8dc15fdaf314d7c9c3551ad8c11ed15da0f34de36446798bbd1b90fa7946eb2")

# If you don't have access to the Internet,
# please pre-download kaldifst
set(possible_file_locations
$ENV{HOME}/Downloads/kaldifst-1.7.11.tar.gz
${CMAKE_SOURCE_DIR}/kaldifst-1.7.11.tar.gz
${CMAKE_BINARY_DIR}/kaldifst-1.7.11.tar.gz
/tmp/kaldifst-1.7.11.tar.gz
/star-fj/fangjun/download/github/kaldifst-1.7.11.tar.gz
$ENV{HOME}/Downloads/kaldifst-1.7.13.tar.gz
${CMAKE_SOURCE_DIR}/kaldifst-1.7.13.tar.gz
${CMAKE_BINARY_DIR}/kaldifst-1.7.13.tar.gz
/tmp/kaldifst-1.7.13.tar.gz
/star-fj/fangjun/download/github/kaldifst-1.7.13.tar.gz
)

foreach(f IN LISTS possible_file_locations)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming SenseVoice CTC model from
https://github.com/FunAudioLLM/SenseVoice
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

For instance,

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2

wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
"""

from pathlib import Path

import sherpa_onnx
import soundfile as sf


def create_recognizer():
model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx"
tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
test_wav = "./test-hr.wav"

if not Path(model).is_file() or not Path(test_wav).is_file():
raise ValueError(
"""Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
and
https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
"""
)
return (
sherpa_onnx.OfflineRecognizer.from_sense_voice(
model=model,
tokens=tokens,
use_itn=True,
debug=True,
hr_lexicon="./lexicon.txt",
hr_dict_dir="./dict",
hr_rule_fsts="./replace.fst",
),
test_wav,
)


def main():
recognizer, wave_filename = create_recognizer()

audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
audio = audio[:, 0] # only use the first channel

# audio is a 1-D float32 numpy array normalized to the range [-1, 1]
# sample_rate does not need to be 16000 Hz

stream = recognizer.create_stream()
stream.accept_waveform(sample_rate, audio)
recognizer.decode_stream(stream)
print(wave_filename)
print(stream.result)


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions sherpa-onnx/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ set(sources
features.cc
file-utils.cc
fst-utils.cc
homophone-replacer.cc
hypothesis.cc
jieba.cc
keyword-spotter-impl.cc
keyword-spotter.cc
offline-ctc-fst-decoder-config.cc
Expand Down
Loading
Loading