change to whisper_full_parallel

litongjava · litongjava · commit 06d94e2fdf91 · 2023-11-20T22:49:39.000-10:00
diff --git a/stream_components_params.h b/stream_components_params.h
@@ -37,6 +37,7 @@ namespace stream_components {
 
   struct service_params {
     int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t n_processors = 4;
     bool speed_up = false;
     bool translate = false;
     bool no_fallback = false;
diff --git a/stream_components_service.cpp b/stream_components_service.cpp
@@ -5,28 +5,28 @@ using namespace stream_components;
 
 // -- WhisperService --
 
-WhisperService::WhisperService(const struct service_params &server_params,
+WhisperService::WhisperService(const struct service_params &service_params,
                                const struct audio_params &audio_params,
                                const struct whisper_context_params &cparams)
-  : server_params(server_params),
+  : service_params(service_params),
     audio_params(audio_params),
-    ctx(whisper_init_from_file_with_params(server_params.model.c_str(), cparams)) {
+    ctx(whisper_init_from_file_with_params(service_params.model.c_str(), cparams)) {
   {
     fprintf(stderr, "\n");
     if (!whisper_is_multilingual(ctx)) {
-      if (server_params.language != "en" || server_params.translate) {
-        this->server_params.language = "en";
-        this->server_params.translate = false;
+      if (service_params.language != "en" || service_params.translate) {
+        this->service_params.language = "en";
+        this->service_params.translate = false;
         fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n",
                 __func__);
       }
     }
     fprintf(stderr, "%s: serving with %d threads, lang = %s, task = %s, timestamps = %d ...\n",
             __func__,
-            server_params.n_threads,
-            server_params.language.c_str(),
-            server_params.translate ? "translate" : "transcribe",
-            server_params.no_timestamps ? 0 : 1);
+            service_params.n_threads,
+            service_params.language.c_str(),
+            service_params.translate ? "translate" : "transcribe",
+            service_params.no_timestamps ? 0 : 1);
 
     // if (!params.use_vad) {
     //     fprintf(stderr, "%s: n_new_line = %d, no_context = %d\n", __func__, n_new_line, params.no_context);
@@ -51,25 +51,26 @@ bool WhisperService::process(const float *samples, int sample_count) {
   wparams.max_tokens = 0;
   wparams.token_timestamps = true;
 
-  wparams.translate = server_params.translate;
+  wparams.translate = service_params.translate;
   wparams.single_segment = !audio_params.use_vad;
-  wparams.language = server_params.language.c_str();
-  wparams.n_threads = server_params.n_threads;
+  wparams.language = service_params.language.c_str();
+  wparams.n_threads = service_params.n_threads;
 
   wparams.audio_ctx = audio_params.audio_ctx;
-  wparams.speed_up = server_params.speed_up;
+  wparams.speed_up = service_params.speed_up;
 
-  wparams.tdrz_enable = server_params.tinydiarize; // [TDRZ]
+  wparams.tdrz_enable = service_params.tinydiarize; // [TDRZ]
 
   // disable temperature fallback
   //wparams.temperature_inc  = -1.0f;
-  wparams.temperature_inc = server_params.no_fallback ? 0.0f : wparams.temperature_inc;
+  wparams.temperature_inc = service_params.no_fallback ? 0.0f : wparams.temperature_inc;
 
-  wparams.prompt_tokens = server_params.no_context ? nullptr : prompt_tokens.data();
-  wparams.prompt_n_tokens = server_params.no_context ? 0 : prompt_tokens.size();
+  wparams.prompt_tokens = service_params.no_context ? nullptr : prompt_tokens.data();
+  wparams.prompt_n_tokens = service_params.no_context ? 0 : prompt_tokens.size();
 
   // *** Run the actual inference!!! ***
-  if (whisper_full(ctx, wparams, samples, sample_count) != 0) {
+  //if (whisper_full(ctx, wparams, samples, sample_count) != 0) {
+  if (whisper_full_parallel(ctx, wparams, samples, sample_count,service_params.n_processors) != 0) {
     return false;
   }
 
@@ -84,7 +85,7 @@ bool WhisperService::process(const float *samples, int sample_count) {
     //pcmf32_old = std::vector<float>(pcmf32.end() - n_samples_keep, pcmf32.end());
 
     // Add tokens of the last full length segment as the prompt
-    if (!server_params.no_context) {
+    if (!service_params.no_context) {
       prompt_tokens.clear();
 
       const int n_segments = whisper_full_n_segments(ctx);
diff --git a/stream_components_service.h b/stream_components_service.h
@@ -24,7 +24,7 @@ namespace stream_components {
 
     bool process(const float *samples,int size);
 
-    service_params server_params;
+    service_params service_params;
     audio_params audio_params;
 
     struct whisper_context *ctx;