Skip to content

Commit dbff2ea

Browse files
authored
Add C API for streaming HLG decoding (#734)
1 parent db67e00 commit dbff2ea

39 files changed

+839
-8
lines changed

.github/scripts/test-dot-net.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
cd dotnet-examples/
44

5-
cd spoken-language-identification
5+
cd streaming-hlg-decoding/
6+
./run.sh
7+
8+
cd ../spoken-language-identification
69
./run.sh
710

811
cd ../online-decode-files

.github/scripts/test-nodejs-npm.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
5858
node ./test-online-zipformer2-ctc.js
5959
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
6060

61+
62+
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
63+
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
64+
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
65+
node ./test-online-zipformer2-ctc-hlg.js
66+
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
67+
6168
# offline tts
6269

6370
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2

.github/scripts/test-swift.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ echo "pwd: $PWD"
77
cd swift-api-examples
88
ls -lh
99

10+
./run-streaming-hlg-decode-file.sh
11+
rm ./streaming-hlg-decode-file
12+
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
13+
1014
./run-spoken-language-identification.sh
1115
rm -rf sherpa-onnx-whisper*
1216

@@ -31,4 +35,5 @@ sed -i.bak '20d' ./decode-file.swift
3135

3236
./run-decode-file-non-streaming.sh
3337

38+
3439
ls -lh

.github/workflows/test-dot-net.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ jobs:
178178
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
179179
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
180180
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
181+
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
181182
182183
ls -lh /tmp
183184

.github/workflows/test-go-package.yaml

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,77 @@ jobs:
6666
run: |
6767
gcc --version
6868
69-
- name: Test speaker identification
69+
- name: Test streaming HLG decoding (Linux/macOS)
70+
if: matrix.os != 'windows-latest'
71+
shell: bash
72+
run: |
73+
cd go-api-examples/streaming-hlg-decoding/
74+
./run.sh
75+
76+
- name: Test speaker identification (Linux/macOS)
77+
if: matrix.os != 'windows-latest'
7078
shell: bash
7179
run: |
7280
cd go-api-examples/speaker-identification
7381
./run.sh
7482
83+
- name: Test speaker identification (Win64)
84+
if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
85+
shell: bash
86+
run: |
87+
cd go-api-examples/speaker-identification
88+
go mod tidy
89+
cat go.mod
90+
go build
91+
92+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
93+
git clone https://github.com/csukuangfj/sr-data
94+
ls -lh
95+
echo $PWD
96+
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
97+
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
98+
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
99+
ls -lh
100+
go mod tidy
101+
go build
102+
go run ./main.go
103+
104+
- name: Test speaker identification (Win32)
105+
if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
106+
shell: bash
107+
run: |
108+
cd go-api-examples/speaker-identification
109+
go mod tidy
110+
cat go.mod
111+
ls -lh
112+
113+
go env GOARCH
114+
go env
115+
echo "------------------------------"
116+
go env -w GOARCH=386
117+
go env -w CGO_ENABLED=1
118+
go env
119+
120+
go clean
121+
go build
122+
123+
echo $PWD
124+
125+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
126+
git clone https://github.com/csukuangfj/sr-data
127+
ls -lh
128+
echo $PWD
129+
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
130+
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
131+
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
132+
ls -lh
133+
go mod tidy
134+
go build
135+
go run ./main.go
136+
137+
rm -rf sr-data
138+
rm -rf *.onnx
139+
75140
- name: Test non-streaming TTS (Linux/macOS)
76141
if: matrix.os != 'windows-latest'
77142
shell: bash

.github/workflows/test-go.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ jobs:
7474
go mod tidy
7575
go build
7676
77+
- name: Test streaming HLG decoding
78+
shell: bash
79+
run: |
80+
cd scripts/go/_internal/streaming-hlg-decoding/
81+
./run.sh
82+
7783
- name: Test speaker identification
7884
shell: bash
7985
run: |

c-api-examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
1515
add_executable(speaker-identification-c-api speaker-identification-c-api.c)
1616
target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
1717

18+
add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
19+
target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)
20+
1821
if(SHERPA_ONNX_HAS_ALSA)
1922
add_subdirectory(./asr-microphone-example)
2023
elseif((UNIX AND NOT APPLE) OR LINUX)
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// c-api-examples/streaming-hlg-decode-file-c-api.c
2+
//
3+
// Copyright (c) 2024 Xiaomi Corporation
4+
/*
5+
We use the following model as an example
6+
7+
// clang-format off
8+
9+
Download the model from
10+
https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
11+
12+
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
13+
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
14+
15+
build/bin/streaming-hlg-decode-file-c-api
16+
17+
(The above model is from https://github.com/k2-fsa/icefall/pull/1557)
18+
*/
19+
#include <stdio.h>
20+
#include <stdlib.h>
21+
#include <string.h>
22+
23+
#include "sherpa-onnx/c-api/c-api.h"
24+
25+
int32_t main() {
26+
// clang-format off
27+
//
28+
// Please download the model from
29+
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
30+
const char *model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
31+
const char *tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
32+
const char *graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
33+
const char *wav_filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
34+
// clang-format on
35+
36+
SherpaOnnxOnlineRecognizerConfig config;
37+
38+
memset(&config, 0, sizeof(config));
39+
config.feat_config.sample_rate = 16000;
40+
config.feat_config.feature_dim = 80;
41+
config.model_config.zipformer2_ctc.model = model;
42+
config.model_config.tokens = tokens;
43+
config.model_config.num_threads = 1;
44+
config.model_config.provider = "cpu";
45+
config.model_config.debug = 0;
46+
config.ctc_fst_decoder_config.graph = graph;
47+
const SherpaOnnxOnlineRecognizer *recognizer =
48+
CreateOnlineRecognizer(&config);
49+
if (!recognizer) {
50+
fprintf(stderr, "Failed to create recognizer");
51+
exit(-1);
52+
}
53+
54+
const SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
55+
56+
const SherpaOnnxDisplay *display = CreateDisplay(50);
57+
int32_t segment_id = 0;
58+
59+
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
60+
if (wave == NULL) {
61+
fprintf(stderr, "Failed to read %s\n", wav_filename);
62+
exit(-1);
63+
}
64+
65+
// simulate streaming. You can choose an arbitrary N
66+
#define N 3200
67+
68+
int16_t buffer[N];
69+
float samples[N];
70+
fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
71+
wave->sample_rate, wave->num_samples,
72+
(float)wave->num_samples / wave->sample_rate);
73+
74+
int32_t k = 0;
75+
while (k < wave->num_samples) {
76+
int32_t start = k;
77+
int32_t end =
78+
(start + N > wave->num_samples) ? wave->num_samples : (start + N);
79+
k += N;
80+
81+
AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
82+
end - start);
83+
while (IsOnlineStreamReady(recognizer, stream)) {
84+
DecodeOnlineStream(recognizer, stream);
85+
}
86+
87+
const SherpaOnnxOnlineRecognizerResult *r =
88+
GetOnlineStreamResult(recognizer, stream);
89+
90+
if (strlen(r->text)) {
91+
SherpaOnnxPrint(display, segment_id, r->text);
92+
}
93+
94+
if (IsEndpoint(recognizer, stream)) {
95+
if (strlen(r->text)) {
96+
++segment_id;
97+
}
98+
Reset(recognizer, stream);
99+
}
100+
101+
DestroyOnlineRecognizerResult(r);
102+
}
103+
104+
// add some tail padding
105+
float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
106+
AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);
107+
108+
SherpaOnnxFreeWave(wave);
109+
110+
InputFinished(stream);
111+
while (IsOnlineStreamReady(recognizer, stream)) {
112+
DecodeOnlineStream(recognizer, stream);
113+
}
114+
115+
const SherpaOnnxOnlineRecognizerResult *r =
116+
GetOnlineStreamResult(recognizer, stream);
117+
118+
if (strlen(r->text)) {
119+
SherpaOnnxPrint(display, segment_id, r->text);
120+
}
121+
122+
DestroyOnlineRecognizerResult(r);
123+
124+
DestroyDisplay(display);
125+
DestroyOnlineStream(stream);
126+
DestroyOnlineRecognizer(recognizer);
127+
fprintf(stderr, "\n");
128+
129+
return 0;
130+
}

cmake/onnxruntime.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ function(download_onnxruntime)
55
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
66
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
77
if(SHERPA_ONNX_ENABLE_WASM)
8-
include(onnxruntime-wasm-simd)
8+
include(onnxruntime-wasm-simd)
99
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
1010
if(BUILD_SHARED_LIBS)
1111
include(onnxruntime-linux-riscv64)

dotnet-examples/sherpa-onnx.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline
1515
EndProject
1616
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
1717
EndProject
18+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
19+
EndProject
1820
Global
1921
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2022
Debug|Any CPU = Debug|Any CPU
@@ -48,5 +50,9 @@ Global
4850
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
4951
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
5052
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
53+
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
54+
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
55+
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
56+
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
5157
EndGlobalSection
5258
EndGlobal
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
//
3+
// This file shows how to do streaming HLG decoding.
4+
//
5+
// 1. Download the model for testing
6+
//
7+
// curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
8+
// tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
9+
// rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
10+
//
11+
// 2. Now run it
12+
//
13+
// dotnet run
14+
15+
using SherpaOnnx;
16+
using System.Collections.Generic;
17+
using System;
18+
19+
class StreamingHlgDecodingDemo
20+
{
21+
22+
static void Main(string[] args)
23+
{
24+
var config = new OnlineRecognizerConfig();
25+
config.FeatConfig.SampleRate = 16000;
26+
config.FeatConfig.FeatureDim = 80;
27+
config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
28+
29+
config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
30+
config.ModelConfig.Provider = "cpu";
31+
config.ModelConfig.NumThreads = 1;
32+
config.ModelConfig.Debug = 0;
33+
config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
34+
35+
OnlineRecognizer recognizer = new OnlineRecognizer(config);
36+
37+
var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
38+
39+
WaveReader waveReader = new WaveReader(filename);
40+
OnlineStream s = recognizer.CreateStream();
41+
s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
42+
43+
float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
44+
s.AcceptWaveform(waveReader.SampleRate, tailPadding);
45+
s.InputFinished();
46+
47+
while (recognizer.IsReady(s))
48+
{
49+
recognizer.Decode(s);
50+
}
51+
52+
OnlineRecognizerResult r = recognizer.GetResult(s);
53+
var text = r.Text;
54+
var tokens = r.Tokens;
55+
Console.WriteLine("--------------------");
56+
Console.WriteLine(filename);
57+
Console.WriteLine("text: {0}", text);
58+
Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
59+
Console.Write("timestamps: [");
60+
r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
61+
Console.WriteLine("]");
62+
Console.WriteLine("--------------------");
63+
}
64+
}
65+
66+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../online-decode-files/WaveReader.cs
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
6+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
7+
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
8+
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
9+
fi
10+
11+
dotnet run -c Release

0 commit comments

Comments
 (0)