Skip to content

Commit b97d0ad

Browse files
author
litongmacos
committed
add /streaming/save
1 parent d0b59ef commit b97d0ad

File tree

3 files changed

+67
-18
lines changed

3 files changed

+67
-18
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ target_link_libraries(stream_local whisper ${SDL2_LIBRARIES})
6262
add_executable(whisper_http_server_base_httplib whisper_http_server_base_httplib.cpp common/common.cpp httplib/httplib.h nlohmann/json.hpp handler/inference_handler.cpp params/whisper_params.cpp)
6363
target_link_libraries(whisper_http_server_base_httplib whisper)
6464

65-
add_executable(whisper_server_base_on_uwebsockets whisper_server_base_on_uwebsockets.cpp stream/stream_components_service.cpp utils/utils.cpp)
65+
add_executable(whisper_server_base_on_uwebsockets whisper_server_base_on_uwebsockets.cpp common/common.cpp stream/stream_components_service.cpp utils/utils.cpp)
6666
#add uwebsockets head files
6767
target_include_directories(whisper_server_base_on_uwebsockets PRIVATE ${UWEBSOCKETS_INCLUDE_DIRS})
6868
# linked uWebSockets、zlib、libuv 和 uSockets libs

common/common.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,10 @@ class wav_writer {
179179
}
180180

181181
// It is assumed that PCM data is normalized to a range from -1 to 1
182-
bool write_audio(const float *data, size_t length) {
182+
bool write_audio(const int16_t *data, size_t length) {
183183
for (size_t i = 0; i < length; ++i) {
184184
// Ensure that the data is in the range of -1 to 1
185-
float normalizedSample = std::max(-1.0f, std::min(1.0f, data[i]));
186-
const int16_t intSample = static_cast<int16_t>(normalizedSample * 32767);
187-
fstream.write(reinterpret_cast<const char *>(&intSample), sizeof(int16_t));
185+
fstream.write(reinterpret_cast<const char *>(&data[i]), sizeof(int16_t));
188186
dataSize += sizeof(int16_t);
189187

190188
// Check if write was successful
@@ -238,7 +236,7 @@ class wav_writer {
238236
return true;
239237
}
240238

241-
bool write(const float *data, size_t length) {
239+
bool write(const int16_t *data, size_t length) {
242240
return write_audio(data, length);
243241
}
244242

whisper_server_base_on_uwebsockets.cpp

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
#include <whisper.h>
1010
#include <sstream>
1111

12+
struct PerSocketData {
13+
wav_writer wavWriter;
14+
};
15+
1216
bool process_vad(float *pDouble, unsigned long size);
1317

1418
std::vector<float> extract_first_voice_segment(std::vector<float> vector1);
@@ -60,9 +64,53 @@ int main(int argc, char **argv) {
6064
auto ws_echo_handler = [](auto *ws, std::string_view message, uWS::OpCode opCode) {
6165
ws->send(message, opCode);
6266
};
67+
//Save Audio
68+
auto ws_save_handler=[](auto *ws,std::string_view message,uWS::OpCode opCode){
69+
std::string* userData = (std::string*)ws->getUserData();
70+
printf("%s: User Data: %s\n", get_current_time().c_str(), userData->c_str());
71+
thread_local wav_writer wavWriter;
72+
thread_local std::string filename;
73+
74+
nlohmann::json response;
75+
if (opCode == uWS::OpCode::TEXT) {
76+
printf("%s: Received message on /streaming/save: %s\n", get_current_time().c_str(),
77+
std::string(message).c_str());
78+
auto jsonMsg = nlohmann::json::parse(message);
79+
std::string signal = jsonMsg["signal"];
80+
if (signal == "start") {
81+
if (jsonMsg["name"].is_string()) {
82+
filename = jsonMsg["name"];
83+
} else {
84+
filename = std::to_string(get_current_time_millis()) + ".wav";
85+
}
86+
// 发送服务器准备好的消息
87+
response = {{"status", "ok"},
88+
{"signal", "server_ready"}};
89+
ws->send(response.dump(), uWS::OpCode::TEXT);
90+
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
91+
}
92+
if (signal == "end") {
93+
wavWriter.close();
94+
response = {{"name", filename},
95+
{"signal", signal}};
96+
ws->send(response.dump(), uWS::OpCode::TEXT);
97+
}
98+
99+
}else if (opCode == uWS::OpCode::BINARY) {
100+
// process binary message(PCM16 data)
101+
auto size = message.size();
102+
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
103+
printf("%s: Received message size on /streaming/save: %zu\n", get_current_time().c_str(), size);
104+
// add received PCM16 to audio cache
105+
std::vector<int16_t> pcm16(size / 2);
106+
std::memcpy(pcm16.data(), data, size);
107+
//write to file
108+
wavWriter.write(pcm16.data(), size / 2);
109+
}
110+
};
63111

64112
// WebSocket /paddlespeech/asr/streaming handler
65-
auto item = [&whisperService, &params](auto *ws, std::string_view message, uWS::OpCode opCode) {
113+
auto ws_streaming_handler = [&whisperService, &params](auto *ws, std::string_view message, uWS::OpCode opCode) {
66114
thread_local std::vector<float> audioBuffer; //thread-localized variable
67115
thread_local wav_writer wavWriter;
68116
thread_local std::string filename;
@@ -77,13 +125,13 @@ int main(int argc, char **argv) {
77125
// process text message
78126
try {
79127
auto jsonMsg = nlohmann::json::parse(message);
80-
if (jsonMsg["name"].is_string()) {
81-
filename = jsonMsg["name"];
82-
} else {
83-
filename = std::to_string(get_current_time_millis()) + ".wav";
84-
}
85128
std::string signal = jsonMsg["signal"];
86129
if (signal == "start") {
130+
if (jsonMsg["name"].is_string()) {
131+
filename = jsonMsg["name"];
132+
} else {
133+
filename = std::to_string(get_current_time_millis()) + ".wav";
134+
}
87135
final_results = nlohmann::json(nlohmann::json::array());
88136
// 发送服务器准备好的消息
89137
response = {{"status", "ok"},
@@ -118,7 +166,7 @@ int main(int argc, char **argv) {
118166
return static_cast<float>(sample) / 32768.0f;
119167
});
120168
//write to file
121-
wavWriter.write(temp.data(), size / 2);
169+
//wavWriter.write(temp.data(), size / 2);
122170
audioBuffer.insert(audioBuffer.end(), temp.begin(), temp.end());
123171
// 如果开启了VAD
124172
bool isOk = false;
@@ -161,15 +209,21 @@ int main(int argc, char **argv) {
161209
ws->send(response.dump(), uWS::OpCode::TEXT);
162210
}
163211
};
164-
auto ws_streaming_handler = item;
212+
165213

166214
// config uWebSockets app
167215
uWS::App()
168216
//hello
169217
.get("/hello", hello_action)
170218
//echo
171219
.ws<std::string>("/echo", {.message = ws_echo_handler})
172-
//streaming
220+
//only_save_audio
221+
.ws<std::string>("/streaming/save", {.open=[](auto *ws){
222+
// 初始化用户数据
223+
std::string* userData = (std::string*)ws->getUserData();
224+
*userData = "Create User Id"; // 设置初始值
225+
},.message = ws_save_handler})
226+
//streaming asr
173227
.ws<std::string>("/paddlespeech/asr/streaming", {.message = ws_streaming_handler})
174228
//listen
175229
.listen(port, started_handler).run();
@@ -179,6 +233,3 @@ std::vector<float> extract_first_voice_segment(std::vector<float> vector1) {
179233
return std::vector<float>();
180234
}
181235

182-
bool process_vad(float *pDouble, unsigned long size) {
183-
return false;
184-
}

0 commit comments

Comments
 (0)