k2-fsa · csukuangfj · Apr 27, 2025 · Apr 25, 2025 · Apr 27, 2025
diff --git a/.github/scripts/test-offline-ctc.sh b/.github/scripts/test-offline-ctc.sh
@@ -98,6 +98,29 @@ for m in model.onnx model.int8.onnx; do
   done
 done
 
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
+tar xf dict.tar.bz2
+rm dict.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
+
+for m in model.onnx model.int8.onnx; do
+  for use_itn in 0 1; do
+    echo "$m $w $use_itn"
+    time $EXE \
+      --tokens=$repo/tokens.txt \
+      --sense-voice-model=$repo/$m \
+      --sense-voice-use-itn=$use_itn \
+      --hr-lexicon=./lexicon.txt \
+      --hr-dict-dir=./dict \
+      --hr-rule-fsts=./replace.fst \
+      ./test-hr.wav
+  done
+done
+
+rm -rf dict replace.fst test-hr.wav lexicon.txt
 
 # test wav reader for non-standard wav files
 waves=(

diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh
@@ -95,6 +95,18 @@ rm $name
 ls -lh $repo
 python3 ./python-api-examples/offline-sense-voice-ctc-decode-files.py
 
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
+tar xf dict.tar.bz2
+rm dict.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
+
+python3 ./python-api-examples/offline-sense-voice-ctc-decode-files-with-hr.py
+
+rm -rf dict replace.fst test-hr.wav lexicon.txt
+
 if [[ $(uname) == Linux ]]; then
   # It needs ffmpeg
   log  "generate subtitles (Chinese)"

diff --git a/cmake/kaldifst.cmake b/cmake/kaldifst.cmake
@@ -1,18 +1,18 @@
 function(download_kaldifst)
   include(FetchContent)
 
-  set(kaldifst_URL  "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.11.tar.gz")
-  set(kaldifst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldifst-1.7.11.tar.gz")
-  set(kaldifst_HASH "SHA256=b43b3332faa2961edc730e47995a58cd4e22ead21905d55b0c4a41375b4a525f")
+  set(kaldifst_URL  "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.13.tar.gz")
+  set(kaldifst_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldifst-1.7.13.tar.gz")
+  set(kaldifst_HASH "SHA256=f8dc15fdaf314d7c9c3551ad8c11ed15da0f34de36446798bbd1b90fa7946eb2")
 
   # If you don't have access to the Internet,
   # please pre-download kaldifst
   set(possible_file_locations
-    $ENV{HOME}/Downloads/kaldifst-1.7.11.tar.gz
-    ${CMAKE_SOURCE_DIR}/kaldifst-1.7.11.tar.gz
-    ${CMAKE_BINARY_DIR}/kaldifst-1.7.11.tar.gz
-    /tmp/kaldifst-1.7.11.tar.gz
-    /star-fj/fangjun/download/github/kaldifst-1.7.11.tar.gz
+    $ENV{HOME}/Downloads/kaldifst-1.7.13.tar.gz
+    ${CMAKE_SOURCE_DIR}/kaldifst-1.7.13.tar.gz
+    ${CMAKE_BINARY_DIR}/kaldifst-1.7.13.tar.gz
+    /tmp/kaldifst-1.7.13.tar.gz
+    /star-fj/fangjun/download/github/kaldifst-1.7.13.tar.gz
   )
 
   foreach(f IN LISTS possible_file_locations)

diff --git a/python-api-examples/offline-sense-voice-ctc-decode-files-with-hr.py b/python-api-examples/offline-sense-voice-ctc-decode-files-with-hr.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+"""
+This file shows how to use a non-streaming SenseVoice CTC model from
+https://github.com/FunAudioLLM/SenseVoice
+to decode files.
+
+Please download model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+
+For instance,
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
+tar xf dict.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
+"""
+
+from pathlib import Path
+
+import sherpa_onnx
+import soundfile as sf
+
+
+def create_recognizer():
+    model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx"
+    tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
+    test_wav = "./test-hr.wav"
+
+    if not Path(model).is_file() or not Path(test_wav).is_file():
+        raise ValueError(
+            """Please download model files from
+            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+            and
+            https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
+            """
+        )
+    return (
+        sherpa_onnx.OfflineRecognizer.from_sense_voice(
+            model=model,
+            tokens=tokens,
+            use_itn=True,
+            debug=True,
+            hr_lexicon="./lexicon.txt",
+            hr_dict_dir="./dict",
+            hr_rule_fsts="./replace.fst",
+        ),
+        test_wav,
+    )
+
+
+def main():
+    recognizer, wave_filename = create_recognizer()
+
+    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+
+    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
+    # sample_rate does not need to be 16000 Hz
+
+    stream = recognizer.create_stream()
+    stream.accept_waveform(sample_rate, audio)
+    recognizer.decode_stream(stream)
+    print(wave_filename)
+    print(stream.result)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt
@@ -20,7 +20,9 @@ set(sources
   features.cc
   file-utils.cc
   fst-utils.cc
+  homophone-replacer.cc
   hypothesis.cc
+  jieba.cc
   keyword-spotter-impl.cc
   keyword-spotter.cc
   offline-ctc-fst-decoder-config.cc