9
9
#include < whisper.h>
10
10
#include < sstream>
11
11
12
+ struct PerSocketData {
13
+ wav_writer wavWriter;
14
+ };
15
+
12
16
bool process_vad (float *pDouble, unsigned long size);
13
17
14
18
std::vector<float > extract_first_voice_segment (std::vector<float > vector1);
@@ -60,9 +64,53 @@ int main(int argc, char **argv) {
60
64
auto ws_echo_handler = [](auto *ws, std::string_view message, uWS::OpCode opCode) {
61
65
ws->send (message, opCode);
62
66
};
67
+ // Save Audio
68
+ auto ws_save_handler=[](auto *ws,std::string_view message,uWS::OpCode opCode){
69
+ std::string* userData = (std::string*)ws->getUserData ();
70
+ printf (" %s: User Data: %s\n " , get_current_time ().c_str (), userData->c_str ());
71
+ thread_local wav_writer wavWriter;
72
+ thread_local std::string filename;
73
+
74
+ nlohmann::json response;
75
+ if (opCode == uWS::OpCode::TEXT) {
76
+ printf (" %s: Received message on /streaming/save: %s\n " , get_current_time ().c_str (),
77
+ std::string (message).c_str ());
78
+ auto jsonMsg = nlohmann::json::parse (message);
79
+ std::string signal = jsonMsg[" signal" ];
80
+ if (signal == " start" ) {
81
+ if (jsonMsg[" name" ].is_string ()) {
82
+ filename = jsonMsg[" name" ];
83
+ } else {
84
+ filename = std::to_string (get_current_time_millis ()) + " .wav" ;
85
+ }
86
+ // 发送服务器准备好的消息
87
+ response = {{" status" , " ok" },
88
+ {" signal" , " server_ready" }};
89
+ ws->send (response.dump (), uWS::OpCode::TEXT);
90
+ wavWriter.open (filename, WHISPER_SAMPLE_RATE, 16 , 1 );
91
+ }
92
+ if (signal == " end" ) {
93
+ wavWriter.close ();
94
+ response = {{" name" , filename},
95
+ {" signal" , signal}};
96
+ ws->send (response.dump (), uWS::OpCode::TEXT);
97
+ }
98
+
99
+ }else if (opCode == uWS::OpCode::BINARY) {
100
+ // process binary message(PCM16 data)
101
+ auto size = message.size ();
102
+ std::basic_string_view<char , std::char_traits<char >>::const_pointer data = message.data ();
103
+ printf (" %s: Received message size on /streaming/save: %zu\n " , get_current_time ().c_str (), size);
104
+ // add received PCM16 to audio cache
105
+ std::vector<int16_t > pcm16 (size / 2 );
106
+ std::memcpy (pcm16.data (), data, size);
107
+ // write to file
108
+ wavWriter.write (pcm16.data (), size / 2 );
109
+ }
110
+ };
63
111
64
112
// WebSocket /paddlespeech/asr/streaming handler
65
- auto item = [&whisperService, ¶ms](auto *ws, std::string_view message, uWS::OpCode opCode) {
113
+ auto ws_streaming_handler = [&whisperService, ¶ms](auto *ws, std::string_view message, uWS::OpCode opCode) {
66
114
thread_local std::vector<float > audioBuffer; // thread-localized variable
67
115
thread_local wav_writer wavWriter;
68
116
thread_local std::string filename;
@@ -77,13 +125,13 @@ int main(int argc, char **argv) {
77
125
// process text message
78
126
try {
79
127
auto jsonMsg = nlohmann::json::parse (message);
80
- if (jsonMsg[" name" ].is_string ()) {
81
- filename = jsonMsg[" name" ];
82
- } else {
83
- filename = std::to_string (get_current_time_millis ()) + " .wav" ;
84
- }
85
128
std::string signal = jsonMsg[" signal" ];
86
129
if (signal == " start" ) {
130
+ if (jsonMsg[" name" ].is_string ()) {
131
+ filename = jsonMsg[" name" ];
132
+ } else {
133
+ filename = std::to_string (get_current_time_millis ()) + " .wav" ;
134
+ }
87
135
final_results = nlohmann::json (nlohmann::json::array ());
88
136
// 发送服务器准备好的消息
89
137
response = {{" status" , " ok" },
@@ -118,7 +166,7 @@ int main(int argc, char **argv) {
118
166
return static_cast <float >(sample) / 32768 .0f ;
119
167
});
120
168
// write to file
121
- wavWriter.write (temp.data (), size / 2 );
169
+ // wavWriter.write(temp.data(), size / 2);
122
170
audioBuffer.insert (audioBuffer.end (), temp.begin (), temp.end ());
123
171
// 如果开启了VAD
124
172
bool isOk = false ;
@@ -161,15 +209,21 @@ int main(int argc, char **argv) {
161
209
ws->send (response.dump (), uWS::OpCode::TEXT);
162
210
}
163
211
};
164
- auto ws_streaming_handler = item;
212
+
165
213
166
214
// config uWebSockets app
167
215
uWS::App ()
168
216
// hello
169
217
.get (" /hello" , hello_action)
170
218
// echo
171
219
.ws <std::string>(" /echo" , {.message = ws_echo_handler})
172
- // streaming
220
+ // only_save_audio
221
+ .ws <std::string>(" /streaming/save" , {.open =[](auto *ws){
222
+ // 初始化用户数据
223
+ std::string* userData = (std::string*)ws->getUserData ();
224
+ *userData = " Create User Id" ; // 设置初始值
225
+ },.message = ws_save_handler})
226
+ // streaming asr
173
227
.ws <std::string>(" /paddlespeech/asr/streaming" , {.message = ws_streaming_handler})
174
228
// listen
175
229
.listen (port, started_handler).run ();
@@ -179,6 +233,3 @@ std::vector<float> extract_first_voice_segment(std::vector<float> vector1) {
179
233
return std::vector<float >();
180
234
}
181
235
182
- bool process_vad (float *pDouble, unsigned long size) {
183
- return false ;
184
- }
0 commit comments