From 293fc5247290f1fa4df18d7a5b418b3e1eeee5a3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 16 May 2024 19:05:35 +0800 Subject: [PATCH] Add tail_paddings to Whisper C API. --- CMakeLists.txt | 2 +- nodejs-examples/test-offline-nemo-ctc.js | 1 + nodejs-examples/test-offline-paraformer.js | 1 + nodejs-examples/test-offline-transducer.js | 1 + nodejs-examples/test-offline-whisper.js | 1 + scripts/dotnet/offline.cs | 3 +++ scripts/go/sherpa_onnx.go | 11 +++++++---- scripts/node-addon-api/src/non-streaming-asr.cc | 3 ++- sherpa-onnx/c-api/c-api.cc | 3 +++ sherpa-onnx/c-api/c-api.h | 1 + swift-api-examples/SherpaOnnx.swift | 6 ++++-- wasm/asr/sherpa-onnx-asr.js | 2 ++ wasm/nodejs/sherpa-onnx-wasm-nodejs.cc | 3 ++- 13 files changed, 29 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index baaef5afaa..999448726f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.9.24") +set(SHERPA_ONNX_VERSION "1.9.25") # Disable warning about # diff --git a/nodejs-examples/test-offline-nemo-ctc.js b/nodejs-examples/test-offline-nemo-ctc.js index e71f43152c..7ed15b0095 100644 --- a/nodejs-examples/test-offline-nemo-ctc.js +++ b/nodejs-examples/test-offline-nemo-ctc.js @@ -29,6 +29,7 @@ function createOfflineRecognizer() { decoder: '', language: '', task: '', + tailPaddings: -1, }, tdnn: { model: '', diff --git a/nodejs-examples/test-offline-paraformer.js b/nodejs-examples/test-offline-paraformer.js index f1f55bc374..6329f007ca 100644 --- a/nodejs-examples/test-offline-paraformer.js +++ b/nodejs-examples/test-offline-paraformer.js @@ -29,6 +29,7 @@ function createOfflineRecognizer() { decoder: '', language: '', task: '', + tailPaddings: -1, }, tdnn: { model: '', diff --git a/nodejs-examples/test-offline-transducer.js b/nodejs-examples/test-offline-transducer.js index bb5d4e8454..9ae6daabe6 100644 --- a/nodejs-examples/test-offline-transducer.js +++ b/nodejs-examples/test-offline-transducer.js @@ -32,6 +32,7 @@ function createOfflineRecognizer() { decoder: '', language: '', task: '', + tailPaddings: -1, }, tdnn: { model: '', diff --git a/nodejs-examples/test-offline-whisper.js b/nodejs-examples/test-offline-whisper.js index ab84e6ccfc..5a2147e3e2 100644 --- a/nodejs-examples/test-offline-whisper.js +++ b/nodejs-examples/test-offline-whisper.js @@ -29,6 +29,7 @@ function createOfflineRecognizer() { decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx', language: '', task: 'transcribe', + tailPaddings: -1, }, tdnn: { model: '', diff --git a/scripts/dotnet/offline.cs b/scripts/dotnet/offline.cs index 39de7526fe..59f1d66352 100644 --- a/scripts/dotnet/offline.cs +++ b/scripts/dotnet/offline.cs @@ -301,6 +301,7 @@ public OfflineWhisperModelConfig() Decoder = ""; Language = ""; Task = "transcribe"; + TailPaddings = -1; } [MarshalAs(UnmanagedType.LPStr)] public string Encoder; @@ -313,6 +314,8 @@ public OfflineWhisperModelConfig() [MarshalAs(UnmanagedType.LPStr)] public string Task; + + public int TailPaddings; } [StructLayout(LayoutKind.Sequential)] diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index 1feb3c2ad2..af60d959f0 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -336,10 +336,11 @@ type OfflineNemoEncDecCtcModelConfig struct { } type OfflineWhisperModelConfig struct { - Encoder string - Decoder string - Language string - Task string + Encoder string + Decoder string + Language string + Task string + TailPaddings int } type OfflineTdnnModelConfig struct { @@ -441,6 +442,8 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { c.model_config.whisper.task = C.CString(config.ModelConfig.Whisper.Task) defer C.free(unsafe.Pointer(c.model_config.whisper.task)) + c.model_config.whisper.tail_paddings = C.int(config.ModelConfig.Whisper.TailPaddings) + c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model) defer C.free(unsafe.Pointer(c.model_config.tdnn.model)) diff --git a/scripts/node-addon-api/src/non-streaming-asr.cc b/scripts/node-addon-api/src/non-streaming-asr.cc index f45ca2f0ee..a1749a47ec 100644 --- a/scripts/node-addon-api/src/non-streaming-asr.cc +++ b/scripts/node-addon-api/src/non-streaming-asr.cc @@ -74,7 +74,8 @@ static SherpaOnnxOfflineWhisperModelConfig GetOfflineWhisperModelConfig( SHERPA_ONNX_ASSIGN_ATTR_STR(encoder, encoder); SHERPA_ONNX_ASSIGN_ATTR_STR(decoder, decoder); SHERPA_ONNX_ASSIGN_ATTR_STR(language, language); - SHERPA_ONNX_ASSIGN_ATTR_STR(task, languagek); + SHERPA_ONNX_ASSIGN_ATTR_STR(task, task); + SHERPA_ONNX_ASSIGN_ATTR_INT32(tail_paddings, tailPaddings); return c; } diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 4407493425..615e86da8c 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -341,6 +341,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( recognizer_config.model_config.whisper.task = "transcribe"; } + recognizer_config.model_config.whisper.tail_paddings = + SHERPA_ONNX_OR(config->model_config.whisper.tail_paddings, -1); + recognizer_config.model_config.tdnn.model = SHERPA_ONNX_OR(config->model_config.tdnn.model, ""); diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index b70d5bccc8..87cdfc5ca8 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -359,6 +359,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { const char *decoder; const char *language; const char *task; + int32_t tail_paddings; } SherpaOnnxOfflineWhisperModelConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTdnnModelConfig { diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 97b4e55662..686f2bbd84 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -314,13 +314,15 @@ func sherpaOnnxOfflineWhisperModelConfig( encoder: String = "", decoder: String = "", language: String = "", - task: String = "transcribe" + task: String = "transcribe", + tailPaddings: Int = -1 ) -> SherpaOnnxOfflineWhisperModelConfig { return SherpaOnnxOfflineWhisperModelConfig( encoder: toCPointer(encoder), decoder: toCPointer(decoder), language: toCPointer(language), - task: toCPointer(task) + task: toCPointer(task), + tail_paddings: Int32(tailPaddings) ) } diff --git a/wasm/asr/sherpa-onnx-asr.js b/wasm/asr/sherpa-onnx-asr.js index 6b66b73b58..24e31fc742 100644 --- a/wasm/asr/sherpa-onnx-asr.js +++ b/wasm/asr/sherpa-onnx-asr.js @@ -453,6 +453,8 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { Module.setValue(ptr + 12, buffer + offset, 'i8*'); offset += taskLen; + Module.setValue(ptr + 16, config.tailPaddings || -1, 'i32'); + return { buffer: buffer, ptr: ptr, len: len, } diff --git a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc index cd7e0c50f7..539699cc4c 100644 --- a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc +++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc @@ -14,7 +14,7 @@ static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); -static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 4 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, ""); static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, ""); @@ -80,6 +80,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { fprintf(stdout, "decoder: %s\n", whisper->decoder); fprintf(stdout, "language: %s\n", whisper->language); fprintf(stdout, "task: %s\n", whisper->task); + fprintf(stdout, "tail_paddings: %d\n", whisper->tail_paddings); fprintf(stdout, "----------offline tdnn model config----------\n"); fprintf(stdout, "model: %s\n", tdnn->model);