9
9
#include < whisper.h>
10
10
#include < sstream>
11
11
12
- struct PerSocketData {
13
- wav_writer wavWriter;
14
- };
15
-
16
- bool process_vad (float *pDouble, unsigned long size);
17
-
18
- std::vector<float > extract_first_voice_segment (std::vector<float > vector1);
19
-
20
12
using namespace stream_components ;
13
+ nlohmann::json getResult (whisper_context *ctx);
14
+ bool processAudio (WhisperService service, std::vector<float > pcm32, const whisper_local_stream_params& params);
21
15
22
16
int main (int argc, char **argv) {
23
17
// Read parameters...
24
18
whisper_local_stream_params params;
25
19
26
- if (whisper_params_parse (argc, argv, params) == false ) {
20
+ if (! whisper_params_parse (argc, argv, params)) {
27
21
return 1 ;
28
22
}
29
23
@@ -40,7 +34,7 @@ int main(int argc, char **argv) {
40
34
}
41
35
42
36
// Instantiate the service
43
- struct whisper_context_params cparams;
37
+ struct whisper_context_params cparams{} ;
44
38
cparams.use_gpu = params.service .use_gpu ;
45
39
stream_components::WhisperService whisperService (params.service , params.audio , cparams);
46
40
@@ -66,7 +60,7 @@ int main(int argc, char **argv) {
66
60
};
67
61
// Save Audio
68
62
auto ws_save_handler=[](auto *ws,std::string_view message,uWS::OpCode opCode){
69
- std::string * userData = (std::string*)ws->getUserData ();
63
+ auto * userData = (std::string*)ws->getUserData ();
70
64
printf (" %s: User Data: %s\n " , get_current_time ().c_str (), userData->c_str ());
71
65
thread_local wav_writer wavWriter;
72
66
thread_local std::string filename;
@@ -144,16 +138,36 @@ int main(int argc, char **argv) {
144
138
// nlohmann::json response = {{"name",filename},{"signal", signal}};
145
139
response = {{" name" , filename},
146
140
{" signal" , signal}};
147
- response[" result" ] = final_results;
141
+
142
+ std::vector<float > pcm32 (audioBuffer.size ());
143
+ std::transform (audioBuffer.begin (), audioBuffer.end (), pcm32.begin (), [](int16_t sample) {
144
+ return static_cast <float >(sample) / 32768 .0f ;
145
+ });
146
+ audioBuffer.clear ();
147
+ // 如果开启了VAD
148
+ bool isOk;
149
+ if (params.audio .use_vad ) {
150
+ printf (" %s: vad: %d \n " , get_current_time ().c_str (), params.audio .use_vad );
151
+ // TODO: 实现VAD处理,
152
+ // bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
153
+ isOk=whisperService.process (pcm32.data (), pcm32.size ());
154
+ } else {
155
+ // asr
156
+ isOk= whisperService.process (pcm32.data (), pcm32.size ());
157
+ }
158
+ if (isOk) {
159
+ final_results = getResult (whisperService.ctx );
160
+ response[" result" ] = final_results;
161
+ }
148
162
ws->send (response.dump (), uWS::OpCode::TEXT);
149
163
}
150
164
// other process logic...
151
165
} catch (const std::exception &e) {
152
166
std::cerr << " JSON parse error: " << e.what () << std::endl;
167
+ auto size = message.size ();
153
168
}
154
169
} else if (opCode == uWS::OpCode::BINARY) {
155
170
// process binary message(PCM16 data)
156
- auto size = message.size ();
157
171
std::basic_string_view<char , std::char_traits<char >>::const_pointer data = message.data ();
158
172
printf (" %s: Received message size on /paddlespeech/asr/streaming: %zu\n " , get_current_time ().c_str (), size);
159
173
// add received PCM16 to audio cache
@@ -172,32 +186,18 @@ int main(int argc, char **argv) {
172
186
});
173
187
audioBuffer.clear ();
174
188
// 如果开启了VAD
175
- bool isOk = false ;
189
+ bool isOk;
176
190
if (params.audio .use_vad ) {
177
- printf (" %s: vad: %n \n " , get_current_time ().c_str (), params.audio .use_vad );
191
+ printf (" %s: vad: %d \n " , get_current_time ().c_str (), params.audio .use_vad );
178
192
// TODO: 实现VAD处理,
179
193
// bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
180
- isOk = whisperService.process (pcm32.data (), pcm32.size ());
194
+ isOk= whisperService.process (pcm32.data (), pcm32.size ());
181
195
} else {
182
196
// asr
183
- isOk = whisperService.process (pcm32.data (), pcm32.size ());
197
+ isOk= whisperService.process (pcm32.data (), pcm32.size ());
184
198
}
185
199
if (isOk) {
186
- const int n_segments = whisper_full_n_segments (whisperService.ctx );
187
- nlohmann::json results = nlohmann::json (nlohmann::json::array ());
188
- for (int i = 0 ; i < n_segments; ++i) {
189
- nlohmann::json segment;
190
- int64_t t0 = whisper_full_get_segment_t0 (whisperService.ctx , i);
191
- int64_t t1 = whisper_full_get_segment_t1 (whisperService.ctx , i);
192
- const char *sentence = whisper_full_get_segment_text (whisperService.ctx , i);
193
- auto result = std::to_string (t0) + " -->" + std::to_string (t1) + " :" + sentence + " \n " ;
194
- printf (" %s: result:%s\n " , get_current_time ().c_str (), result.c_str ());
195
- segment[" t0" ] = t0;
196
- segment[" t1" ] = t1;
197
- segment[" sentence" ] = sentence;
198
- results.push_back (segment);
199
- }
200
- final_results = results;
200
+ final_results = getResult (whisperService.ctx );
201
201
response[" result" ] = final_results;
202
202
}
203
203
}
@@ -215,7 +215,7 @@ int main(int argc, char **argv) {
215
215
// only_save_audio
216
216
.ws <std::string>(" /streaming/save" , {.open =[](auto *ws){
217
217
// 初始化用户数据
218
- std::string * userData = (std::string*)ws->getUserData ();
218
+ auto * userData = (std::string*)ws->getUserData ();
219
219
*userData = " Create User Id" ; // 设置初始值
220
220
},.message = ws_save_handler})
221
221
// streaming asr
@@ -224,7 +224,34 @@ int main(int argc, char **argv) {
224
224
.listen (port, started_handler).run ();
225
225
}
226
226
227
- std::vector<float > extract_first_voice_segment (std::vector<float > vector1) {
228
- return std::vector<float >();
227
+ bool processAudio (WhisperService whisperService, std::vector<float > pcm32, const whisper_local_stream_params& params) {
228
+ if (params.audio .use_vad ) {
229
+ printf (" %s: vad: %d \n " , get_current_time ().c_str (), params.audio .use_vad );
230
+ // TODO: 实现VAD处理,
231
+ // bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
232
+ return whisperService.process (pcm32.data (), pcm32.size ());
233
+ } else {
234
+ // asr
235
+ return whisperService.process (pcm32.data (), pcm32.size ());
236
+ }
229
237
}
230
238
239
+ nlohmann::json getResult (whisper_context *ctx) {
240
+ nlohmann::json results = nlohmann::json (nlohmann::json::array ());
241
+ const int n_segments = whisper_full_n_segments (ctx);
242
+ for (int i = 0 ; i < n_segments; ++i) {
243
+ nlohmann::json segment;
244
+ int64_t t0 = whisper_full_get_segment_t0 (ctx, i);
245
+ int64_t t1 = whisper_full_get_segment_t1 (ctx, i);
246
+ const char *sentence = whisper_full_get_segment_text (ctx, i);
247
+ auto result = std::to_string (t0) + " -->" + std::to_string (t1) + " :" + sentence + " \n " ;
248
+ printf (" %s: result:%s\n " , get_current_time ().c_str (), result.c_str ());
249
+ segment[" t0" ] = t0;
250
+ segment[" t1" ] = t1;
251
+ segment[" sentence" ] = sentence;
252
+ results.push_back (segment);
253
+ }
254
+ return results;
255
+ }
256
+
257
+
0 commit comments