Skip to content

Add C++ and Python API for Dolphin CTC models #2085

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/scripts/test-offline-ctc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,39 @@ echo "PATH: $PATH"

which $EXE

for type in base small; do
log "------------------------------------------------------------"
log "Run Dolphin CTC models ($type int8)"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2

$EXE \
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/tokens.txt \
--debug=1 \
./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav

rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02

log "------------------------------------------------------------"
log "Run Dolphin CTC models ($type)"
log "------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2

$EXE \
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/model.onnx \
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/tokens.txt \
--debug=1 \
./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/test_wavs/0.wav

rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
done


log "------------------------------------------------------------"
log "Run NeMo GigaAM Russian models"
log "------------------------------------------------------------"
Expand Down
9 changes: 9 additions & 0 deletions .github/scripts/test-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ log() {
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

log "test offline dolphin ctc"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

python3 ./python-api-examples/offline-dolphin-ctc-decode-files.py

rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

log "test offline speech enhancement (GTCRN)"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
Expand Down
48 changes: 48 additions & 0 deletions .github/workflows/export-dophin-ctc-to-onnx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: export-dolphin-ctc-to-onnx

on:
workflow_dispatch:

concurrency:
group: export-dolphin-ctc-to-onnx-${{ github.ref }}
cancel-in-progress: true

jobs:
export-dolphin-ctc-to-onnx:
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name: ${{ matrix.model_type }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest]
model_type: [small, base]

steps:
- uses: actions/checkout@v4

- name: Download ${{ matrix.model_type }}
shell: bash
run: |
git lfs install
type=${{ matrix.model_type }}

git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02

rm -rf sherpa-onnx-dolphin-*/.git*

ls -lha sherpa-onnx-dolphin-*/

tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02

- name: Release
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models
20 changes: 10 additions & 10 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,16 @@ jobs:
overwrite: true
file: sherpa-onnx-*.tar.bz2

- name: Test offline CTC
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline

.github/scripts/test-offline-ctc.sh
du -h -d1 .

- name: Test offline speech denoiser
shell: bash
run: |
Expand Down Expand Up @@ -249,16 +259,6 @@ jobs:
.github/scripts/test-offline-moonshine.sh
du -h -d1 .

- name: Test offline CTC
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline

.github/scripts/test-offline-ctc.sh
du -h -d1 .

- name: Test C++ API
shell: bash
run: |
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ jobs:
overwrite: true
file: sherpa-onnx-*osx-universal2*.tar.bz2

- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline

.github/scripts/test-offline-ctc.sh

- name: Test offline speech denoiser
shell: bash
run: |
Expand Down Expand Up @@ -226,14 +234,6 @@ jobs:

.github/scripts/test-online-punctuation.sh

- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline

.github/scripts/test-offline-ctc.sh

- name: Test online CTC
shell: bash
run: |
Expand Down
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
endif()

cmake_minimum_required(VERSION 3.13 FATAL_ERROR)

set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum OS X deployment version. Used only for macOS")
Expand Down
69 changes: 69 additions & 0 deletions python-api-examples/offline-dolphin-ctc-decode-files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3

"""
This file shows how to use a non-streaming CTC model from Dolphin
to decode files.

Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
"""

from pathlib import Path
import time

import sherpa_onnx
import soundfile as sf


def create_recognizer():
model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
test_wav = (
"./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
)

if not Path(model).is_file() or not Path(test_wav).is_file():
raise ValueError(
"""Please download model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
"""
)
return (
sherpa_onnx.OfflineRecognizer.from_dolphin_ctc(
model=model,
tokens=tokens,
debug=True,
),
test_wav,
)


def main():
recognizer, wave_filename = create_recognizer()

audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
audio = audio[:, 0] # only use the first channel

# audio is a 1-D float32 numpy array normalized to the range [-1, 1]
# sample_rate does not need to be 16000 Hz

start = time.time()
stream = recognizer.create_stream()
stream.accept_waveform(sample_rate, audio)
recognizer.decode_stream(stream)
end = time.time()

print(wave_filename)
print(stream.result)

elapsed_seconds = end - start
audio_duration = len(audio) / sample_rate
real_time_factor = elapsed_seconds / audio_duration

print(f"Elapsed seconds: {elapsed_seconds:.3f}")
print(f"Audio duration in seconds: {audio_duration:.3f}")
print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions sherpa-onnx/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ set(sources
offline-ctc-fst-decoder.cc
offline-ctc-greedy-search-decoder.cc
offline-ctc-model.cc
offline-dolphin-model-config.cc
offline-dolphin-model.cc
offline-fire-red-asr-greedy-search-decoder.cc
offline-fire-red-asr-model-config.cc
offline-fire-red-asr-model.cc
Expand Down
9 changes: 9 additions & 0 deletions sherpa-onnx/csrc/offline-ctc-model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-dolphin-model.h"
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
#include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
#include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
Expand Down Expand Up @@ -110,6 +111,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,

std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
const OfflineModelConfig &config) {
if (!config.dolphin.model.empty()) {
return std::make_unique<OfflineDolphinModel>(config);
}

// TODO(fangjun): Refactor it. We don't need to use model_type here
ModelType model_type = ModelType::kUnknown;

Expand Down Expand Up @@ -160,6 +165,10 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
template <typename Manager>
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
Manager *mgr, const OfflineModelConfig &config) {
if (!config.dolphin.model.empty()) {
return std::make_unique<OfflineDolphinModel>(mgr, config);
}

// TODO(fangjun): Refactor it. We don't need to use model_type here
ModelType model_type = ModelType::kUnknown;

Expand Down
4 changes: 4 additions & 0 deletions sherpa-onnx/csrc/offline-ctc-model.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ class OfflineCtcModel {
// return true for models from https://github.com/salute-developers/GigaAM
// return false otherwise
virtual bool IsGigaAM() const { return false; }

// For Dolphin models, they use global CMVN
virtual void NormalizeFeatures(float *features, int32_t num_frames,
int32_t feat_dim) const {}
};

} // namespace sherpa_onnx
Expand Down
35 changes: 35 additions & 0 deletions sherpa-onnx/csrc/offline-dolphin-model-config.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// sherpa-onnx/csrc/offline-dolphin-model-config.cc
//
// Copyright (c) 2025 Xiaomi Corporation

#include "sherpa-onnx/csrc/offline-dolphin-model-config.h"

#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

void OfflineDolphinModelConfig::Register(ParseOptions *po) {
po->Register("dolphin-model", &model,
"Path to model.onnx of Dolphin CTC branch.");
}

bool OfflineDolphinModelConfig::Validate() const {
if (!FileExists(model)) {
SHERPA_ONNX_LOGE("Dolphin model '%s' does not exist", model.c_str());
return false;
}

return true;
}

std::string OfflineDolphinModelConfig::ToString() const {
std::ostringstream os;

os << "OfflineDolphinModelConfig(";
os << "model=\"" << model << "\")";

return os.str();
}

} // namespace sherpa_onnx
27 changes: 27 additions & 0 deletions sherpa-onnx/csrc/offline-dolphin-model-config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// sherpa-onnx/csrc/offline-dolphin-model-config.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_

#include <string>

#include "sherpa-onnx/csrc/parse-options.h"

namespace sherpa_onnx {

struct OfflineDolphinModelConfig {
std::string model;

OfflineDolphinModelConfig() = default;
explicit OfflineDolphinModelConfig(const std::string &model) : model(model) {}

void Register(ParseOptions *po);
bool Validate() const;

std::string ToString() const;
};

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_CONFIG_H_
21 changes: 21 additions & 0 deletions sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// sherpa-onnx/csrc/offline-dolphin-model-meta-data.h
//
// Copyright (c) 2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
#define SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_

#include <string>
#include <vector>

namespace sherpa_onnx {

struct OfflineDolphinModelMetaData {
int32_t vocab_size;
int32_t subsampling_factor = 4;
std::vector<float> mean;
std::vector<float> inv_stddev;
};

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_CSRC_OFFLINE_DOLPHIN_MODEL_META_DATA_H_
Loading
Loading