move utils to common

litongmacos · litongmacos · commit 76ef40f0b889 · 2023-11-23T20:49:41.000-10:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -62,7 +62,7 @@ target_link_libraries(stream_local whisper ${SDL2_LIBRARIES})
 add_executable(whisper_http_server_base_httplib whisper_http_server_base_httplib.cpp common/common.cpp httplib/httplib.h nlohmann/json.hpp handler/inference_handler.cpp params/whisper_params.cpp)
 target_link_libraries(whisper_http_server_base_httplib whisper)
 
-add_executable(whisper_server_base_on_uwebsockets whisper_server_base_on_uwebsockets.cpp common/common.cpp stream/stream_components_service.cpp utils/utils.cpp)
+add_executable(whisper_server_base_on_uwebsockets whisper_server_base_on_uwebsockets.cpp common/common.cpp stream/stream_components_service.cpp common/utils.cpp)
 #add uwebsockets head files
 target_include_directories(whisper_server_base_on_uwebsockets PRIVATE ${UWEBSOCKETS_INCLUDE_DIRS})
 # linked uWebSockets、zlib、libuv 和 uSockets libs
diff --git a/common/utils.cpp b/common/utils.cpp
diff --git a/common/utils.h b/common/utils.h
diff --git a/handler/inference_handler.cpp b/handler/inference_handler.cpp
@@ -3,6 +3,7 @@
 #include "../common/common.h"
 #include "../params/whisper_params.h"
 #include "../nlohmann/json.hpp"
+#include "common/utils.h"
 
 using json = nlohmann::json;
 
@@ -201,6 +202,9 @@ void getReqParameters(const Request &req, whisper_params &params) {
   if (req.has_file("temerature")) {
     params.userdef_temp = std::stof(req.get_file_value("temperature").content);
   }
+  if(req.has_file("audio_format")){
+    params.audio_format=std::stof(req.get_file_value("audio_format").content);
+  }
 }
 
 
@@ -225,7 +229,7 @@ void handleInference(const Request &req, Response &res, std::mutex &whisper_mute
   getReqParameters(req, params);
 
   std::string filename{audio_file.filename};
-  printf("Received request: %s\n", filename.c_str());
+  printf("%s: Received filename: %s,audio_format\n",get_current_time().c_str(),filename.c_str(),params.audio_format.c_str());
 
   // audio arrays
   std::vector<float> pcmf32;               // mono-channel F32 PCM
@@ -236,13 +240,20 @@ void handleInference(const Request &req, Response &res, std::mutex &whisper_mute
   temp_file << audio_file.content;
 
   // read wav content into pcmf32
-  if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) {
-    fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
-    const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
-    res.set_content(error_resp, "application/json");
-    whisper_mutex.unlock();
-    return;
+  if(params.audio_format=="mp3"){
+
+  }else if(params.audio_format=="m4a"){
+
+  }else{
+    if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) {
+      fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
+      const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
+      res.set_content(error_resp, "application/json");
+      whisper_mutex.unlock();
+      return;
+    }
   }
+
   // remove temp file
   std::remove(filename.c_str());
 
diff --git a/params/whisper_params.h b/params/whisper_params.h
@@ -58,6 +58,7 @@ struct whisper_params {
   std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
 
   std::string openvino_encode_device = "CPU";
+  std::string audio_format="wav";
 };
 
 struct server_params {
diff --git a/whisper_server_base_on_uwebsockets.cpp b/whisper_server_base_on_uwebsockets.cpp
@@ -1,7 +1,7 @@
 #include "nlohmann/json.hpp"
 #include "stream/stream_components_service.h"
 #include "stream/stream_components.h"
-#include "utils/utils.h"
+#include "common/utils.h"
 #include "common/common.h"
 #include <uwebsockets/App.h>
 #include <iostream>
@@ -61,14 +61,13 @@ int main(int argc, char **argv) {
   //Save Audio
   auto ws_save_handler=[](auto *ws,std::string_view message,uWS::OpCode opCode){
     auto* userData = (std::string*)ws->getUserData();
-    printf("%s: User Data: %s\n", get_current_time().c_str(), userData->c_str());
+    // printf("%s: User Data: %s\n", get_current_time().c_str(), userData->c_str());
     thread_local wav_writer wavWriter;
     thread_local std::string filename;
 
     nlohmann::json response;
     if (opCode == uWS::OpCode::TEXT) {
-      printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),
-             std::string(message).c_str());
+      // printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),std::string(message).c_str());
       auto jsonMsg = nlohmann::json::parse(message);
       std::string signal = jsonMsg["signal"];
       if (signal == "start") {
@@ -94,12 +93,13 @@ int main(int argc, char **argv) {
       // process binary message（PCM16 data）
       auto size = message.size();
       std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
-      printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
+      // printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
       // add received PCM16 to audio cache
       std::vector<int16_t> pcm16(size / 2);
       std::memcpy(pcm16.data(), data, size);
       //write to file
       wavWriter.write(pcm16.data(), size / 2);
+      ws->send(response.dump(), uWS::OpCode::TEXT);
     }
   };
 
@@ -111,11 +111,10 @@ int main(int argc, char **argv) {
     //std::unique_ptr<nlohmann::json> results(new nlohmann::json(nlohmann::json::array()));
     thread_local nlohmann::json final_results;
     auto thread_id = std::this_thread::get_id();
-    std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
+    // std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
     nlohmann::json response;
     if (opCode == uWS::OpCode::TEXT) {
-      printf("%s: Received message on /paddlespeech/asr/streaming: %s\n", get_current_time().c_str(),
-             std::string(message).c_str());
+      // printf("%s: Received message on /paddlespeech/asr/streaming: %s\n", get_current_time().c_str(),std::string(message).c_str());
       // process text message
       try {
         auto jsonMsg = nlohmann::json::parse(message);
@@ -147,7 +146,7 @@ int main(int argc, char **argv) {
           // 如果开启了VAD
           bool isOk;
           if (params.audio.use_vad) {
-            printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
+            // printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
             // TODO: 实现VAD处理，
             //bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
             isOk=whisperService.process(pcm32.data(), pcm32.size());
@@ -167,9 +166,10 @@ int main(int argc, char **argv) {
         auto size = message.size();
       }
     } else if (opCode == uWS::OpCode::BINARY) {
+      int size=message.size();
       // process binary message（PCM16 data）
       std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
-      printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
+      // printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
       // add received PCM16 to audio cache
       std::vector<int16_t> pcm16(size / 2);
 
@@ -188,7 +188,7 @@ int main(int argc, char **argv) {
         // 如果开启了VAD
         bool isOk;
         if (params.audio.use_vad) {
-          printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
+          // printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
           // TODO: 实现VAD处理，
           //bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
           isOk=whisperService.process(pcm32.data(), pcm32.size());
@@ -226,7 +226,7 @@ int main(int argc, char **argv) {
 
 bool processAudio(WhisperService whisperService, std::vector<float> pcm32, const whisper_local_stream_params& params) {
   if (params.audio.use_vad) {
-    printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
+    // printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
     // TODO: 实现VAD处理，
     //bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
     return whisperService.process(pcm32.data(), pcm32.size());