2
2
#include " stream/stream_components_service.h"
3
3
#include " stream/stream_components.h"
4
4
#include " utils/utils.h"
5
+ #include " common/common.h"
5
6
#include < uwebsockets/App.h>
6
7
#include < iostream>
7
8
#include < string>
@@ -30,20 +31,14 @@ int main(int argc, char **argv) {
30
31
exit (0 );
31
32
}
32
33
33
- // Instantiate the microphone input
34
- // stream_components::LocalSDLMicrophone microphone(params.audio);
35
-
36
34
// Instantiate the service
37
35
struct whisper_context_params cparams;
38
36
cparams.use_gpu = params.service .use_gpu ;
39
37
stream_components::WhisperService whisperService (params.service , params.audio , cparams);
40
38
41
- // Print the 'header'...
42
- // WhisperStreamOutput::to_json(std::cout, params.service, whisperService.ctx);
43
-
44
39
const int port = 8090 ;
45
40
46
- // 开始处理器
41
+ // started handler
47
42
auto started_handler = [](auto *token) {
48
43
if (token) {
49
44
std::cout << " Server started on port " << port << std::endl;
@@ -52,7 +47,7 @@ int main(int argc, char **argv) {
52
47
}
53
48
};
54
49
55
- // HTTP GET /hello 处理器
50
+ // HTTP GET /hello handler
56
51
auto hello_action = [](auto *res, auto *req) {
57
52
res->end (" Hello World!" );
58
53
};
@@ -63,20 +58,31 @@ int main(int argc, char **argv) {
63
58
};
64
59
65
60
// WebSocket /paddlespeech/asr/streaming handler
66
- std::vector<float > audioBuffer; // global audio data buffer
67
- auto ws_streaming_handler = [&whisperService, &audioBuffer](auto *ws, std::string_view message, uWS::OpCode opCode) {
61
+ std::vector<float > * audioBuffer; // global audio data buffer
62
+ wav_writer * wavWriter;
63
+
64
+ auto ws_streaming_handler = [&whisperService, &audioBuffer, &wavWriter](auto *ws, std::string_view message,
65
+ uWS::OpCode opCode) {
68
66
if (opCode == uWS::OpCode::TEXT) {
69
67
printf (" %s: Received message on /paddlespeech/asr/streaming: %s\n " , get_current_time ().c_str (),
70
68
std::string (message).c_str ());
71
69
// process text message
72
70
try {
73
71
auto jsonMsg = nlohmann::json::parse (message);
72
+ std::string filename = jsonMsg[" name" ];
74
73
std::string signal = jsonMsg[" signal" ];
75
74
if (signal == " start" ) {
76
75
// 发送服务器准备好的消息
77
76
nlohmann::json response = {{" status" , " ok" },
78
77
{" signal" , " server_ready" }};
79
78
ws->send (response.dump (), uWS::OpCode::TEXT);
79
+ wavWriter = new wav_writer ();
80
+ audioBuffer = new std::vector<float >();
81
+ wavWriter->open (filename, WHISPER_SAMPLE_RATE, 16 , 1 );
82
+ }
83
+ if (signal == " end" ) {
84
+ delete wavWriter;
85
+ delete audioBuffer;
80
86
}
81
87
// other process logic...
82
88
} catch (const std::exception &e) {
@@ -88,17 +94,22 @@ int main(int argc, char **argv) {
88
94
printf (" %s: Received message size on /paddlespeech/asr/streaming: %zu\n " , get_current_time ().c_str (), size);
89
95
// add received PCM16 to audio cache
90
96
std::vector<int16_t > pcm16 (size / 2 );
91
- std::memcpy (pcm16.data (), message.data (), size);
97
+ std::basic_string_view<char , std::char_traits<char >>::const_pointer data = message.data ();
98
+ std::memcpy (pcm16.data (), data, size);
92
99
93
- std::transform (pcm16.begin (), pcm16.end (), std::back_inserter (audioBuffer), [](int16_t sample) {
94
- return static_cast <float >(sample) / 32768 .0f ; // convert to [-1.0, 1.0] float
100
+ std::vector<float > temp (size/2 );
101
+ std::transform (pcm16.begin (), pcm16.end (), temp.begin (), [](int16_t sample) {
102
+ return static_cast <float >(sample) / 32768 .0f ;
95
103
});
104
+ wavWriter->write (temp.data (), size/2 );
105
+
106
+ audioBuffer->insert (audioBuffer->end (), temp.begin (), temp.end ());
107
+
96
108
97
109
// asr
98
- bool isOk = whisperService. process (audioBuffer. data (), audioBuffer. size ()) ;
99
- printf ( " %s: isOk:%d \n " , get_current_time (). c_str ( ), isOk );
110
+ nlohmann::json response ;
111
+ bool isOk = whisperService. process (audioBuffer-> data ( ), audioBuffer-> size () );
100
112
if (isOk) {
101
- nlohmann::json response;
102
113
nlohmann::json results = nlohmann::json::array (); // create JSON Array
103
114
104
115
const int n_segments = whisper_full_n_segments (whisperService.ctx );
@@ -114,10 +125,10 @@ int main(int argc, char **argv) {
114
125
segment[" sentence" ] = sentence;
115
126
results.push_back (segment);
116
127
}
117
-
118
128
response[" result" ] = results;
119
- ws->send (response.dump (), uWS::OpCode::TEXT);
120
129
}
130
+
131
+ ws->send (response.dump (), uWS::OpCode::TEXT);
121
132
}
122
133
};
123
134
0 commit comments