k2-fsa · csukuangfj · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/.github/scripts/test-offline-ctc.sh b/.github/scripts/test-offline-ctc.sh
@@ -15,6 +15,39 @@ echo "PATH: $PATH"
 
 which $EXE
 
+for type in base small; do
+  log "------------------------------------------------------------"
+  log "Run Dolphin CTC models ($type int8)"
+  log "------------------------------------------------------------"
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
+  tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
+  rm sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
+
+  $EXE \
+    --dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
+    --tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/tokens.txt \
+    --debug=1 \
+    ./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav
+
+  rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
+
+  log "------------------------------------------------------------"
+  log "Run Dolphin CTC models ($type)"
+  log "------------------------------------------------------------"
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
+  tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
+  rm sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
+
+  $EXE \
+    --dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/model.onnx \
+    --tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/tokens.txt \
+    --debug=1 \
+    ./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/test_wavs/0.wav
+
+  rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
+done
+
+
 log "------------------------------------------------------------"
 log "Run NeMo GigaAM Russian models"
 log "------------------------------------------------------------"

diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh
@@ -8,6 +8,15 @@ log() {
   echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 
+log "test offline dolphin ctc"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
+
+python3 ./python-api-examples/offline-dolphin-ctc-decode-files.py
+
+rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
+
 log "test offline speech enhancement (GTCRN)"
 
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx

diff --git a/.github/workflows/export-dophin-ctc-to-onnx.yaml b/.github/workflows/export-dophin-ctc-to-onnx.yaml
@@ -0,0 +1,48 @@
+name: export-dolphin-ctc-to-onnx
+
+on:
+  workflow_dispatch:
+
+concurrency:
+  group: export-dolphin-ctc-to-onnx-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  export-dolphin-ctc-to-onnx:
+    if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
+    name: ${{ matrix.model_type }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [macos-latest]
+        model_type: [small, base]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download ${{ matrix.model_type }}
+        shell: bash
+        run: |
+          git lfs install
+          type=${{ matrix.model_type }}
+
+          git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
+          git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
+
+          rm -rf sherpa-onnx-dolphin-*/.git*
+
+          ls -lha sherpa-onnx-dolphin-*/
+
+          tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
+          tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
+
+      - name: Release
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          file: ./*.tar.bz2
+          overwrite: true
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: asr-models
diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml
@@ -205,6 +205,16 @@ jobs:
           overwrite: true
           file: sherpa-onnx-*.tar.bz2
 
+      - name: Test offline CTC
+        shell: bash
+        run: |
+          du -h -d1 .
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline
+
+          .github/scripts/test-offline-ctc.sh
+          du -h -d1 .
+
       - name: Test offline speech denoiser
         shell: bash
         run: |
@@ -249,16 +259,6 @@ jobs:
           .github/scripts/test-offline-moonshine.sh
           du -h -d1 .
 
-      - name: Test offline CTC
-        shell: bash
-        run: |
-          du -h -d1 .
-          export PATH=$PWD/build/bin:$PATH
-          export EXE=sherpa-onnx-offline
-
-          .github/scripts/test-offline-ctc.sh
-          du -h -d1 .
-
       - name: Test C++ API
         shell: bash
         run: |

diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml
@@ -162,6 +162,14 @@ jobs:
           overwrite: true
           file: sherpa-onnx-*osx-universal2*.tar.bz2
 
+      - name: Test offline CTC
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin:$PATH
+          export EXE=sherpa-onnx-offline
+
+          .github/scripts/test-offline-ctc.sh
+
       - name: Test offline speech denoiser
         shell: bash
         run: |
@@ -226,14 +234,6 @@ jobs:
 
           .github/scripts/test-online-punctuation.sh
 
-      - name: Test offline CTC
-        shell: bash
-        run: |
-          export PATH=$PWD/build/bin:$PATH
-          export EXE=sherpa-onnx-offline
-
-          .github/scripts/test-offline-ctc.sh
-
       - name: Test online CTC
         shell: bash
         run: |

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,3 +1,7 @@
+if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
+  set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
+endif()
+
 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 
 set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum OS X deployment version. Used only for macOS")

diff --git a/python-api-examples/offline-dolphin-ctc-decode-files.py b/python-api-examples/offline-dolphin-ctc-decode-files.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+"""
+This file shows how to use a non-streaming CTC model from Dolphin
+to decode files.
+
+Please download model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+"""
+
+from pathlib import Path
+import time
+
+import sherpa_onnx
+import soundfile as sf
+
+
+def create_recognizer():
+    model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
+    tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
+    test_wav = (
+        "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
+    )
+
+    if not Path(model).is_file() or not Path(test_wav).is_file():
+        raise ValueError(
+            """Please download model files from
+            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+            """
+        )
+    return (
+        sherpa_onnx.OfflineRecognizer.from_dolphin_ctc(
+            model=model,
+            tokens=tokens,
+            debug=True,
+        ),
+        test_wav,
+    )
+
+
+def main():
+    recognizer, wave_filename = create_recognizer()
+
+    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+
+    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
+    # sample_rate does not need to be 16000 Hz
+
+    start = time.time()
+    stream = recognizer.create_stream()
+    stream.accept_waveform(sample_rate, audio)
+    recognizer.decode_stream(stream)
+    end = time.time()
+
+    print(wave_filename)
+    print(stream.result)
+
+    elapsed_seconds = end - start
+    audio_duration = len(audio) / sample_rate
+    real_time_factor = elapsed_seconds / audio_duration
+
+    print(f"Elapsed seconds: {elapsed_seconds:.3f}")
+    print(f"Audio duration in seconds: {audio_duration:.3f}")
+    print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt
@@ -27,6 +27,8 @@ set(sources
   offline-ctc-fst-decoder.cc
   offline-ctc-greedy-search-decoder.cc
   offline-ctc-model.cc
+  offline-dolphin-model-config.cc
+  offline-dolphin-model.cc
   offline-fire-red-asr-greedy-search-decoder.cc
   offline-fire-red-asr-model-config.cc
   offline-fire-red-asr-model.cc

diff --git a/sherpa-onnx/csrc/offline-ctc-model.cc b/sherpa-onnx/csrc/offline-ctc-model.cc
@@ -20,6 +20,7 @@
 
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/macros.h"
+#include "sherpa-onnx/csrc/offline-dolphin-model.h"
 #include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
 #include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
@@ -110,6 +111,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
 
 std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
     const OfflineModelConfig &config) {
+  if (!config.dolphin.model.empty()) {
+    return std::make_unique<OfflineDolphinModel>(config);
+  }
+
   // TODO(fangjun): Refactor it. We don't need to use model_type here
   ModelType model_type = ModelType::kUnknown;
 
@@ -160,6 +165,10 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
 template <typename Manager>
 std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
     Manager *mgr, const OfflineModelConfig &config) {
+  if (!config.dolphin.model.empty()) {
+    return std::make_unique<OfflineDolphinModel>(mgr, config);
+  }
+
   // TODO(fangjun): Refactor it. We don't need to use model_type here
   ModelType model_type = ModelType::kUnknown;
 

diff --git a/sherpa-onnx/csrc/offline-ctc-model.h b/sherpa-onnx/csrc/offline-ctc-model.h
@@ -64,6 +64,10 @@ class OfflineCtcModel {
   // return true for models from https://github.com/salute-developers/GigaAM
   // return false otherwise
   virtual bool IsGigaAM() const { return false; }
+
+  // For Dolphin models, they use global CMVN
+  virtual void NormalizeFeatures(float *features, int32_t num_frames,
+                                 int32_t feat_dim) const {}
 };
 
 }  // namespace sherpa_onnx

diff --git a/sherpa-onnx/csrc/offline-dolphin-model-config.cc b/sherpa-onnx/csrc/offline-dolphin-model-config.cc
@@ -0,0 +1,35 @@
+// sherpa-onnx/csrc/offline-dolphin-model-config.cc
+//
+// Copyright (c)  2025  Xiaomi Corporation
+
+#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"
+
+#include "sherpa-onnx/csrc/file-utils.h"
+#include "sherpa-onnx/csrc/macros.h"
+
+namespace sherpa_onnx {
+
+void OfflineDolphinModelConfig::Register(ParseOptions *po) {
+  po->Register("dolphin-model", &model,
+               "Path to model.onnx of Dolphin CTC branch.");
+}
+
+bool OfflineDolphinModelConfig::Validate() const {
+  if (!FileExists(model)) {
+    SHERPA_ONNX_LOGE("Dolphin model '%s' does not exist", model.c_str());
+    return false;
+  }
+
+  return true;
+}
+
+std::string OfflineDolphinModelConfig::ToString() const {
+  std::ostringstream os;
+
+  os << "OfflineDolphinModelConfig(";
+  os << "model=\"" << model << "\")";
+
+  return os.str();
+}
+
+}  // namespace sherpa_onnx
diff --git a/sherpa-onnx/csrc/offline-dolphin-model-config.h b/sherpa-onnx/csrc/offline-dolphin-model-config.h
@@ -0,0 +1,27 @@
+// sherpa-onnx/csrc/offline-dolphin-model-config.h
+//
+// Copyright (c)  2025  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
+
+#include <string>
+
+#include "sherpa-onnx/csrc/parse-options.h"
+
+namespace sherpa_onnx {
+
+struct OfflineDolphinModelConfig {
+  std::string model;
+
+  OfflineDolphinModelConfig() = default;
+  explicit OfflineDolphinModelConfig(const std::string &model) : model(model) {}
+
+  void Register(ParseOptions *po);
+  bool Validate() const;
+
+  std::string ToString() const;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
diff --git a/sherpa-onnx/csrc/offline-dolphin-model-meta-data.h b/sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
@@ -0,0 +1,21 @@
+// sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
+#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
+
+#include <string>
+#include <vector>
+
+namespace sherpa_onnx {
+
+struct OfflineDolphinModelMetaData {
+  int32_t vocab_size;
+  int32_t subsampling_factor = 4;
+  std::vector<float> mean;
+  std::vector<float> inv_stddev;
+};
+
+}  // namespace sherpa_onnx
+
+#endif  // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_