Skip to content

Commit 76ef40f

Browse files
author
litongmacos
committed
move utils to common
1 parent fbca847 commit 76ef40f

File tree

6 files changed

+32
-20
lines changed

6 files changed

+32
-20
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ target_link_libraries(stream_local whisper ${SDL2_LIBRARIES})
6262
add_executable(whisper_http_server_base_httplib whisper_http_server_base_httplib.cpp common/common.cpp httplib/httplib.h nlohmann/json.hpp handler/inference_handler.cpp params/whisper_params.cpp)
6363
target_link_libraries(whisper_http_server_base_httplib whisper)
6464

65-
add_executable(whisper_server_base_on_uwebsockets whisper_server_base_on_uwebsockets.cpp common/common.cpp stream/stream_components_service.cpp utils/utils.cpp)
65+
add_executable(whisper_server_base_on_uwebsockets whisper_server_base_on_uwebsockets.cpp common/common.cpp stream/stream_components_service.cpp common/utils.cpp)
6666
#add uwebsockets head files
6767
target_include_directories(whisper_server_base_on_uwebsockets PRIVATE ${UWEBSOCKETS_INCLUDE_DIRS})
6868
# linked uWebSockets、zlib、libuv 和 uSockets libs
File renamed without changes.
File renamed without changes.

handler/inference_handler.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "../common/common.h"
44
#include "../params/whisper_params.h"
55
#include "../nlohmann/json.hpp"
6+
#include "common/utils.h"
67

78
using json = nlohmann::json;
89

@@ -201,6 +202,9 @@ void getReqParameters(const Request &req, whisper_params &params) {
201202
if (req.has_file("temerature")) {
202203
params.userdef_temp = std::stof(req.get_file_value("temperature").content);
203204
}
205+
if(req.has_file("audio_format")){
206+
params.audio_format=std::stof(req.get_file_value("audio_format").content);
207+
}
204208
}
205209

206210

@@ -225,7 +229,7 @@ void handleInference(const Request &req, Response &res, std::mutex &whisper_mute
225229
getReqParameters(req, params);
226230

227231
std::string filename{audio_file.filename};
228-
printf("Received request: %s\n", filename.c_str());
232+
printf("%s: Received filename: %s,audio_format\n",get_current_time().c_str(),filename.c_str(),params.audio_format.c_str());
229233

230234
// audio arrays
231235
std::vector<float> pcmf32; // mono-channel F32 PCM
@@ -236,13 +240,20 @@ void handleInference(const Request &req, Response &res, std::mutex &whisper_mute
236240
temp_file << audio_file.content;
237241

238242
// read wav content into pcmf32
239-
if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) {
240-
fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
241-
const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
242-
res.set_content(error_resp, "application/json");
243-
whisper_mutex.unlock();
244-
return;
243+
if(params.audio_format=="mp3"){
244+
245+
}else if(params.audio_format=="m4a"){
246+
247+
}else{
248+
if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) {
249+
fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
250+
const std::string error_resp = "{\"error\":\"failed to read WAV file\"}";
251+
res.set_content(error_resp, "application/json");
252+
whisper_mutex.unlock();
253+
return;
254+
}
245255
}
256+
246257
// remove temp file
247258
std::remove(filename.c_str());
248259

params/whisper_params.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ struct whisper_params {
5858
std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
5959

6060
std::string openvino_encode_device = "CPU";
61+
std::string audio_format="wav";
6162
};
6263

6364
struct server_params {

whisper_server_base_on_uwebsockets.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include "nlohmann/json.hpp"
22
#include "stream/stream_components_service.h"
33
#include "stream/stream_components.h"
4-
#include "utils/utils.h"
4+
#include "common/utils.h"
55
#include "common/common.h"
66
#include <uwebsockets/App.h>
77
#include <iostream>
@@ -61,14 +61,13 @@ int main(int argc, char **argv) {
6161
//Save Audio
6262
auto ws_save_handler=[](auto *ws,std::string_view message,uWS::OpCode opCode){
6363
auto* userData = (std::string*)ws->getUserData();
64-
printf("%s: User Data: %s\n", get_current_time().c_str(), userData->c_str());
64+
// printf("%s: User Data: %s\n", get_current_time().c_str(), userData->c_str());
6565
thread_local wav_writer wavWriter;
6666
thread_local std::string filename;
6767

6868
nlohmann::json response;
6969
if (opCode == uWS::OpCode::TEXT) {
70-
printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),
71-
std::string(message).c_str());
70+
// printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),std::string(message).c_str());
7271
auto jsonMsg = nlohmann::json::parse(message);
7372
std::string signal = jsonMsg["signal"];
7473
if (signal == "start") {
@@ -94,12 +93,13 @@ int main(int argc, char **argv) {
9493
// process binary message(PCM16 data)
9594
auto size = message.size();
9695
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
97-
printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
96+
// printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
9897
// add received PCM16 to audio cache
9998
std::vector<int16_t> pcm16(size / 2);
10099
std::memcpy(pcm16.data(), data, size);
101100
//write to file
102101
wavWriter.write(pcm16.data(), size / 2);
102+
ws->send(response.dump(), uWS::OpCode::TEXT);
103103
}
104104
};
105105

@@ -111,11 +111,10 @@ int main(int argc, char **argv) {
111111
//std::unique_ptr<nlohmann::json> results(new nlohmann::json(nlohmann::json::array()));
112112
thread_local nlohmann::json final_results;
113113
auto thread_id = std::this_thread::get_id();
114-
std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
114+
// std::cout << get_current_time().c_str() << ": Handling a message in thread: " << thread_id << std::endl;
115115
nlohmann::json response;
116116
if (opCode == uWS::OpCode::TEXT) {
117-
printf("%s: Received message on /paddlespeech/asr/streaming: %s\n", get_current_time().c_str(),
118-
std::string(message).c_str());
117+
// printf("%s: Received message on /paddlespeech/asr/streaming: %s\n", get_current_time().c_str(),std::string(message).c_str());
119118
// process text message
120119
try {
121120
auto jsonMsg = nlohmann::json::parse(message);
@@ -147,7 +146,7 @@ int main(int argc, char **argv) {
147146
// 如果开启了VAD
148147
bool isOk;
149148
if (params.audio.use_vad) {
150-
printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
149+
// printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
151150
// TODO: 实现VAD处理,
152151
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
153152
isOk=whisperService.process(pcm32.data(), pcm32.size());
@@ -167,9 +166,10 @@ int main(int argc, char **argv) {
167166
auto size = message.size();
168167
}
169168
} else if (opCode == uWS::OpCode::BINARY) {
169+
int size=message.size();
170170
// process binary message(PCM16 data)
171171
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
172-
printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
172+
// printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
173173
// add received PCM16 to audio cache
174174
std::vector<int16_t> pcm16(size / 2);
175175

@@ -188,7 +188,7 @@ int main(int argc, char **argv) {
188188
// 如果开启了VAD
189189
bool isOk;
190190
if (params.audio.use_vad) {
191-
printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
191+
// printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
192192
// TODO: 实现VAD处理,
193193
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
194194
isOk=whisperService.process(pcm32.data(), pcm32.size());
@@ -226,7 +226,7 @@ int main(int argc, char **argv) {
226226

227227
bool processAudio(WhisperService whisperService, std::vector<float> pcm32, const whisper_local_stream_params& params) {
228228
if (params.audio.use_vad) {
229-
printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
229+
// printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
230230
// TODO: 实现VAD处理,
231231
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
232232
return whisperService.process(pcm32.data(), pcm32.size());

0 commit comments

Comments
 (0)