k2-fsa
diff --git a/‎.github/scripts/test-dot-net.sh
Lines changed: 4 additions & 1 deletion b/‎.github/scripts/test-dot-net.sh
Lines changed: 4 additions & 1 deletion
diff --git a/‎.github/scripts/test-nodejs-npm.sh
Lines changed: 7 additions & 0 deletions b/‎.github/scripts/test-nodejs-npm.sh
Lines changed: 7 additions & 0 deletions
diff --git a/‎.github/scripts/test-swift.sh
Lines changed: 5 additions & 0 deletions b/‎.github/scripts/test-swift.sh
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/test-dot-net.yaml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/test-dot-net.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/test-go-package.yaml
Lines changed: 66 additions & 1 deletion b/‎.github/workflows/test-go-package.yaml
Lines changed: 66 additions & 1 deletion
diff --git a/‎.github/workflows/test-go.yaml
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/test-go.yaml
Lines changed: 6 additions & 0 deletions
diff --git a/‎c-api-examples/CMakeLists.txt
Lines changed: 3 additions & 0 deletions b/‎c-api-examples/CMakeLists.txt
Lines changed: 3 additions & 0 deletions
diff --git a/‎c-api-examples/streaming-hlg-decode-file-c-api.c
Lines changed: 130 additions & 0 deletions b/‎c-api-examples/streaming-hlg-decode-file-c-api.c
Lines changed: 130 additions & 0 deletions
diff --git a/‎cmake/onnxruntime.cmake
Lines changed: 1 addition & 1 deletion b/‎cmake/onnxruntime.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎dotnet-examples/sherpa-onnx.sln
Lines changed: 6 additions & 0 deletions b/‎dotnet-examples/sherpa-onnx.sln
Lines changed: 6 additions & 0 deletions
diff --git a/‎dotnet-examples/streaming-hlg-decoding/Program.cs
Lines changed: 66 additions & 0 deletions b/‎dotnet-examples/streaming-hlg-decoding/Program.cs
Lines changed: 66 additions & 0 deletions
diff --git a/‎dotnet-examples/streaming-hlg-decoding/WaveReader.cs
Lines changed: 1 addition & 0 deletions b/‎dotnet-examples/streaming-hlg-decoding/WaveReader.cs
Lines changed: 1 addition & 0 deletions
diff --git a/‎dotnet-examples/streaming-hlg-decoding/run.sh
Lines changed: 11 additions & 0 deletions b/‎dotnet-examples/streaming-hlg-decoding/run.sh
Lines changed: 11 additions & 0 deletions
@@ -2,7 +2,10 @@
 
 cd dotnet-examples/
 
-cd spoken-language-identification
+cd streaming-hlg-decoding/
+./run.sh
+
+cd ../spoken-language-identification
 ./run.sh
 
 cd ../online-decode-files
 
@@ -58,6 +58,13 @@ rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
 node ./test-online-zipformer2-ctc.js
 rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
 
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+node ./test-online-zipformer2-ctc-hlg.js
+rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
+
 # offline tts
 
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
 
@@ -7,6 +7,10 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
 
+./run-streaming-hlg-decode-file.sh
+rm ./streaming-hlg-decode-file
+rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
+
 ./run-spoken-language-identification.sh
 rm -rf sherpa-onnx-whisper*
 
@@ -31,4 +35,5 @@ sed -i.bak  '20d' ./decode-file.swift
 
 ./run-decode-file-non-streaming.sh
 
+
 ls -lh
@@ -178,6 +178,7 @@ jobs:
           cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
           cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
           cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
+          cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
 
           ls -lh /tmp
 
 
@@ -66,12 +66,77 @@ jobs:
         run: |
           gcc --version
 
-      - name: Test speaker identification
+      - name: Test streaming HLG decoding (Linux/macOS)
+        if: matrix.os != 'windows-latest'
+        shell: bash
+        run: |
+          cd go-api-examples/streaming-hlg-decoding/
+          ./run.sh
+
+      - name: Test speaker identification (Linux/macOS)
+        if: matrix.os != 'windows-latest'
         shell: bash
         run: |
           cd go-api-examples/speaker-identification
           ./run.sh
 
+      - name: Test speaker identification (Win64)
+        if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
+        shell: bash
+        run: |
+          cd go-api-examples/speaker-identification
+          go mod tidy
+          cat go.mod
+          go build
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+          git clone https://github.com/csukuangfj/sr-data
+          ls -lh
+          echo $PWD
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
+          ls -lh
+          go mod tidy
+          go build
+          go run ./main.go
+
+      - name: Test speaker identification (Win32)
+        if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
+        shell: bash
+        run: |
+          cd go-api-examples/speaker-identification
+          go mod tidy
+          cat go.mod
+          ls -lh
+
+          go env GOARCH
+          go env
+          echo "------------------------------"
+          go env -w GOARCH=386
+          go env -w CGO_ENABLED=1
+          go env
+
+          go clean
+          go build
+
+          echo $PWD
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+          git clone https://github.com/csukuangfj/sr-data
+          ls -lh
+          echo $PWD
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
+          ls -lh
+          go mod tidy
+          go build
+          go run ./main.go
+
+          rm -rf sr-data
+          rm -rf *.onnx
+
       - name: Test non-streaming TTS (Linux/macOS)
         if: matrix.os != 'windows-latest'
         shell: bash
 
@@ -74,6 +74,12 @@ jobs:
           go mod tidy
           go build
 
+      - name: Test streaming HLG decoding
+        shell: bash
+        run: |
+          cd scripts/go/_internal/streaming-hlg-decoding/
+          ./run.sh
+
       - name: Test speaker identification
         shell: bash
         run: |
 
@@ -15,6 +15,9 @@ target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
 add_executable(speaker-identification-c-api speaker-identification-c-api.c)
 target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
 
+add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
+target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)
+
 if(SHERPA_ONNX_HAS_ALSA)
   add_subdirectory(./asr-microphone-example)
 elseif((UNIX AND NOT APPLE) OR LINUX)
 
@@ -0,0 +1,130 @@
+// c-api-examples/streaming-hlg-decode-file-c-api.c
+//
+// Copyright (c)  2024  Xiaomi Corporation
+/*
+We use the following model as an example
+
+// clang-format off
+
+Download the model from
+https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+
+tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+
+build/bin/streaming-hlg-decode-file-c-api
+
+(The above model is from https://github.com/k2-fsa/icefall/pull/1557)
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+  // clang-format off
+  //
+  // Please download the model from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  const char *model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
+  const char *tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
+  const char *graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
+  const char *wav_filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
+  // clang-format on
+
+  SherpaOnnxOnlineRecognizerConfig config;
+
+  memset(&config, 0, sizeof(config));
+  config.feat_config.sample_rate = 16000;
+  config.feat_config.feature_dim = 80;
+  config.model_config.zipformer2_ctc.model = model;
+  config.model_config.tokens = tokens;
+  config.model_config.num_threads = 1;
+  config.model_config.provider = "cpu";
+  config.model_config.debug = 0;
+  config.ctc_fst_decoder_config.graph = graph;
+  const SherpaOnnxOnlineRecognizer *recognizer =
+      CreateOnlineRecognizer(&config);
+  if (!recognizer) {
+    fprintf(stderr, "Failed to create recognizer");
+    exit(-1);
+  }
+
+  const SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
+
+  const SherpaOnnxDisplay *display = CreateDisplay(50);
+  int32_t segment_id = 0;
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    exit(-1);
+  }
+
+// simulate streaming. You can choose an arbitrary N
+#define N 3200
+
+  int16_t buffer[N];
+  float samples[N];
+  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
+          wave->sample_rate, wave->num_samples,
+          (float)wave->num_samples / wave->sample_rate);
+
+  int32_t k = 0;
+  while (k < wave->num_samples) {
+    int32_t start = k;
+    int32_t end =
+        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
+    k += N;
+
+    AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
+                   end - start);
+    while (IsOnlineStreamReady(recognizer, stream)) {
+      DecodeOnlineStream(recognizer, stream);
+    }
+
+    const SherpaOnnxOnlineRecognizerResult *r =
+        GetOnlineStreamResult(recognizer, stream);
+
+    if (strlen(r->text)) {
+      SherpaOnnxPrint(display, segment_id, r->text);
+    }
+
+    if (IsEndpoint(recognizer, stream)) {
+      if (strlen(r->text)) {
+        ++segment_id;
+      }
+      Reset(recognizer, stream);
+    }
+
+    DestroyOnlineRecognizerResult(r);
+  }
+
+  // add some tail padding
+  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
+  AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);
+
+  SherpaOnnxFreeWave(wave);
+
+  InputFinished(stream);
+  while (IsOnlineStreamReady(recognizer, stream)) {
+    DecodeOnlineStream(recognizer, stream);
+  }
+
+  const SherpaOnnxOnlineRecognizerResult *r =
+      GetOnlineStreamResult(recognizer, stream);
+
+  if (strlen(r->text)) {
+    SherpaOnnxPrint(display, segment_id, r->text);
+  }
+
+  DestroyOnlineRecognizerResult(r);
+
+  DestroyDisplay(display);
+  DestroyOnlineStream(stream);
+  DestroyOnlineRecognizer(recognizer);
+  fprintf(stderr, "\n");
+
+  return 0;
+}
@@ -5,7 +5,7 @@ function(download_onnxruntime)
   message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
   message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
   if(SHERPA_ONNX_ENABLE_WASM)
-      include(onnxruntime-wasm-simd)
+    include(onnxruntime-wasm-simd)
   elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
     if(BUILD_SHARED_LIBS)
       include(onnxruntime-linux-riscv64)
 
@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -48,5 +50,9 @@ Global
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 EndGlobal
@@ -0,0 +1,66 @@
+// Copyright (c)  2024  Xiaomi Corporation
+//
+// This file shows how to do streaming HLG decoding.
+//
+// 1. Download the model for testing
+//
+//  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+//  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+//  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+//
+// 2. Now run it
+//
+// dotnet run
+
+using SherpaOnnx;
+using System.Collections.Generic;
+using System;
+
+class StreamingHlgDecodingDemo
+{
+
+  static void Main(string[] args)
+  {
+    var config = new OnlineRecognizerConfig();
+    config.FeatConfig.SampleRate = 16000;
+    config.FeatConfig.FeatureDim = 80;
+    config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
+
+    config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
+    config.ModelConfig.Provider = "cpu";
+    config.ModelConfig.NumThreads = 1;
+    config.ModelConfig.Debug = 0;
+    config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
+
+    OnlineRecognizer recognizer = new OnlineRecognizer(config);
+
+    var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
+
+    WaveReader waveReader = new WaveReader(filename);
+    OnlineStream s = recognizer.CreateStream();
+    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
+
+    float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
+    s.AcceptWaveform(waveReader.SampleRate, tailPadding);
+    s.InputFinished();
+
+    while (recognizer.IsReady(s))
+    {
+      recognizer.Decode(s);
+    }
+
+    OnlineRecognizerResult r = recognizer.GetResult(s);
+    var text = r.Text;
+    var tokens = r.Tokens;
+    Console.WriteLine("--------------------");
+    Console.WriteLine(filename);
+    Console.WriteLine("text: {0}", text);
+    Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
+    Console.Write("timestamps: [");
+    r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
+    Console.WriteLine("]");
+    Console.WriteLine("--------------------");
+  }
+}
+
+
@@ -0,0 +1 @@
+../online-decode-files/WaveReader.cs
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+fi
+
+dotnet run -c Release