Skip to content

Commit ec698e7

Browse files
author
litongmacos
committed
save audo file
1 parent 9729379 commit ec698e7

File tree

1 file changed

+29
-18
lines changed

1 file changed

+29
-18
lines changed

whisper_server_base_on_uwebsockets.cpp

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "stream/stream_components_service.h"
33
#include "stream/stream_components.h"
44
#include "utils/utils.h"
5+
#include "common/common.h"
56
#include <uwebsockets/App.h>
67
#include <iostream>
78
#include <string>
@@ -30,20 +31,14 @@ int main(int argc, char **argv) {
3031
exit(0);
3132
}
3233

33-
// Instantiate the microphone input
34-
// stream_components::LocalSDLMicrophone microphone(params.audio);
35-
3634
// Instantiate the service
3735
struct whisper_context_params cparams;
3836
cparams.use_gpu = params.service.use_gpu;
3937
stream_components::WhisperService whisperService(params.service, params.audio, cparams);
4038

41-
// Print the 'header'...
42-
//WhisperStreamOutput::to_json(std::cout, params.service, whisperService.ctx);
43-
4439
const int port = 8090;
4540

46-
// 开始处理器
41+
// started handler
4742
auto started_handler = [](auto *token) {
4843
if (token) {
4944
std::cout << "Server started on port " << port << std::endl;
@@ -52,7 +47,7 @@ int main(int argc, char **argv) {
5247
}
5348
};
5449

55-
// HTTP GET /hello 处理器
50+
// HTTP GET /hello handler
5651
auto hello_action = [](auto *res, auto *req) {
5752
res->end("Hello World!");
5853
};
@@ -63,20 +58,31 @@ int main(int argc, char **argv) {
6358
};
6459

6560
// WebSocket /paddlespeech/asr/streaming handler
66-
std::vector<float> audioBuffer; // global audio data buffer
67-
auto ws_streaming_handler = [&whisperService, &audioBuffer](auto *ws, std::string_view message, uWS::OpCode opCode) {
61+
std::vector<float> * audioBuffer; // global audio data buffer
62+
wav_writer * wavWriter;
63+
64+
auto ws_streaming_handler = [&whisperService, &audioBuffer, &wavWriter](auto *ws, std::string_view message,
65+
uWS::OpCode opCode) {
6866
if (opCode == uWS::OpCode::TEXT) {
6967
printf("%s: Received message on /paddlespeech/asr/streaming: %s\n", get_current_time().c_str(),
7068
std::string(message).c_str());
7169
// process text message
7270
try {
7371
auto jsonMsg = nlohmann::json::parse(message);
72+
std::string filename = jsonMsg["name"];
7473
std::string signal = jsonMsg["signal"];
7574
if (signal == "start") {
7675
// 发送服务器准备好的消息
7776
nlohmann::json response = {{"status", "ok"},
7877
{"signal", "server_ready"}};
7978
ws->send(response.dump(), uWS::OpCode::TEXT);
79+
wavWriter = new wav_writer();
80+
audioBuffer = new std::vector<float>();
81+
wavWriter->open(filename, WHISPER_SAMPLE_RATE, 16, 1);
82+
}
83+
if (signal == "end") {
84+
delete wavWriter;
85+
delete audioBuffer;
8086
}
8187
// other process logic...
8288
} catch (const std::exception &e) {
@@ -88,17 +94,22 @@ int main(int argc, char **argv) {
8894
printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
8995
// add received PCM16 to audio cache
9096
std::vector<int16_t> pcm16(size / 2);
91-
std::memcpy(pcm16.data(), message.data(), size);
97+
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
98+
std::memcpy(pcm16.data(), data, size);
9299

93-
std::transform(pcm16.begin(), pcm16.end(), std::back_inserter(audioBuffer), [](int16_t sample) {
94-
return static_cast<float>(sample) / 32768.0f; // convert to [-1.0, 1.0] float
100+
std::vector<float> temp(size/2);
101+
std::transform(pcm16.begin(), pcm16.end(), temp.begin(), [](int16_t sample) {
102+
return static_cast<float>(sample) / 32768.0f;
95103
});
104+
wavWriter->write(temp.data(), size/2);
105+
106+
audioBuffer->insert(audioBuffer->end(), temp.begin(), temp.end());
107+
96108

97109
// asr
98-
bool isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
99-
printf("%s: isOk:%d\n", get_current_time().c_str(), isOk);
110+
nlohmann::json response;
111+
bool isOk = whisperService.process(audioBuffer->data(), audioBuffer->size());
100112
if (isOk) {
101-
nlohmann::json response;
102113
nlohmann::json results = nlohmann::json::array(); // create JSON Array
103114

104115
const int n_segments = whisper_full_n_segments(whisperService.ctx);
@@ -114,10 +125,10 @@ int main(int argc, char **argv) {
114125
segment["sentence"] = sentence;
115126
results.push_back(segment);
116127
}
117-
118128
response["result"] = results;
119-
ws->send(response.dump(), uWS::OpCode::TEXT);
120129
}
130+
131+
ws->send(response.dump(), uWS::OpCode::TEXT);
121132
}
122133
};
123134

0 commit comments

Comments
 (0)