Skip to content

Commit 95c987d

Browse files
author
litongmacos
committed
Encapsulate getResult method
1 parent 7751bcc commit 95c987d

File tree

3 files changed

+64
-37
lines changed

3 files changed

+64
-37
lines changed

examples/simplest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ struct whisper_local_params {
8686
std::string language = "en";
8787
std::string prompt;
8888
std::string font_path = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
89-
std::string model = "models/ggml-base.en.bin";
89+
std::string model = "../models/ggml-base.en.bin";
9090

9191
// [TDRZ] speaker turn string
9292
std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line

stream/stream_components_params.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ namespace stream_components {
5050
bool use_gpu = true;
5151

5252
std::string language = "en";
53-
std::string model = "models/ggml-base.en.bin";
53+
std::string model = "../models/ggml-base.en.bin";
5454

5555
void initialize() {}
5656
};

whisper_server_base_on_uwebsockets.cpp

Lines changed: 62 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,15 @@
99
#include <whisper.h>
1010
#include <sstream>
1111

12-
struct PerSocketData {
13-
wav_writer wavWriter;
14-
};
15-
16-
bool process_vad(float *pDouble, unsigned long size);
17-
18-
std::vector<float> extract_first_voice_segment(std::vector<float> vector1);
19-
2012
using namespace stream_components;
13+
nlohmann::json getResult(whisper_context *ctx);
14+
bool processAudio(WhisperService service, std::vector<float> pcm32, const whisper_local_stream_params& params);
2115

2216
int main(int argc, char **argv) {
2317
// Read parameters...
2418
whisper_local_stream_params params;
2519

26-
if (whisper_params_parse(argc, argv, params) == false) {
20+
if (!whisper_params_parse(argc, argv, params)) {
2721
return 1;
2822
}
2923

@@ -40,7 +34,7 @@ int main(int argc, char **argv) {
4034
}
4135

4236
// Instantiate the service
43-
struct whisper_context_params cparams;
37+
struct whisper_context_params cparams{};
4438
cparams.use_gpu = params.service.use_gpu;
4539
stream_components::WhisperService whisperService(params.service, params.audio, cparams);
4640

@@ -66,7 +60,7 @@ int main(int argc, char **argv) {
6660
};
6761
//Save Audio
6862
auto ws_save_handler=[](auto *ws,std::string_view message,uWS::OpCode opCode){
69-
std::string* userData = (std::string*)ws->getUserData();
63+
auto* userData = (std::string*)ws->getUserData();
7064
printf("%s: User Data: %s\n", get_current_time().c_str(), userData->c_str());
7165
thread_local wav_writer wavWriter;
7266
thread_local std::string filename;
@@ -144,16 +138,36 @@ int main(int argc, char **argv) {
144138
// nlohmann::json response = {{"name",filename},{"signal", signal}};
145139
response = {{"name", filename},
146140
{"signal", signal}};
147-
response["result"] = final_results;
141+
142+
std::vector<float> pcm32(audioBuffer.size());
143+
std::transform(audioBuffer.begin(), audioBuffer.end(), pcm32.begin(), [](int16_t sample) {
144+
return static_cast<float>(sample) / 32768.0f;
145+
});
146+
audioBuffer.clear();
147+
// 如果开启了VAD
148+
bool isOk;
149+
if (params.audio.use_vad) {
150+
printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
151+
// TODO: 实现VAD处理,
152+
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
153+
isOk=whisperService.process(pcm32.data(), pcm32.size());
154+
} else {
155+
// asr
156+
isOk= whisperService.process(pcm32.data(), pcm32.size());
157+
}
158+
if (isOk) {
159+
final_results = getResult(whisperService.ctx);
160+
response["result"] = final_results;
161+
}
148162
ws->send(response.dump(), uWS::OpCode::TEXT);
149163
}
150164
// other process logic...
151165
} catch (const std::exception &e) {
152166
std::cerr << "JSON parse error: " << e.what() << std::endl;
167+
auto size = message.size();
153168
}
154169
} else if (opCode == uWS::OpCode::BINARY) {
155170
// process binary message(PCM16 data)
156-
auto size = message.size();
157171
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
158172
printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
159173
// add received PCM16 to audio cache
@@ -172,32 +186,18 @@ int main(int argc, char **argv) {
172186
});
173187
audioBuffer.clear();
174188
// 如果开启了VAD
175-
bool isOk = false;
189+
bool isOk;
176190
if (params.audio.use_vad) {
177-
printf("%s: vad: %n\n", get_current_time().c_str(), params.audio.use_vad);
191+
printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
178192
// TODO: 实现VAD处理,
179193
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
180-
isOk = whisperService.process(pcm32.data(), pcm32.size());
194+
isOk=whisperService.process(pcm32.data(), pcm32.size());
181195
} else {
182196
// asr
183-
isOk = whisperService.process(pcm32.data(), pcm32.size());
197+
isOk=whisperService.process(pcm32.data(), pcm32.size());
184198
}
185199
if (isOk) {
186-
const int n_segments = whisper_full_n_segments(whisperService.ctx);
187-
nlohmann::json results = nlohmann::json(nlohmann::json::array());
188-
for (int i = 0; i < n_segments; ++i) {
189-
nlohmann::json segment;
190-
int64_t t0 = whisper_full_get_segment_t0(whisperService.ctx, i);
191-
int64_t t1 = whisper_full_get_segment_t1(whisperService.ctx, i);
192-
const char *sentence = whisper_full_get_segment_text(whisperService.ctx, i);
193-
auto result = std::to_string(t0) + "-->" + std::to_string(t1) + ":" + sentence + "\n";
194-
printf("%s: result:%s\n", get_current_time().c_str(), result.c_str());
195-
segment["t0"] = t0;
196-
segment["t1"] = t1;
197-
segment["sentence"] = sentence;
198-
results.push_back(segment);
199-
}
200-
final_results = results;
200+
final_results = getResult(whisperService.ctx);
201201
response["result"] = final_results;
202202
}
203203
}
@@ -215,7 +215,7 @@ int main(int argc, char **argv) {
215215
//only_save_audio
216216
.ws<std::string>("/streaming/save", {.open=[](auto *ws){
217217
// 初始化用户数据
218-
std::string* userData = (std::string*)ws->getUserData();
218+
auto* userData = (std::string*)ws->getUserData();
219219
*userData = "Create User Id"; // 设置初始值
220220
},.message = ws_save_handler})
221221
//streaming asr
@@ -224,7 +224,34 @@ int main(int argc, char **argv) {
224224
.listen(port, started_handler).run();
225225
}
226226

227-
std::vector<float> extract_first_voice_segment(std::vector<float> vector1) {
228-
return std::vector<float>();
227+
bool processAudio(WhisperService whisperService, std::vector<float> pcm32, const whisper_local_stream_params& params) {
228+
if (params.audio.use_vad) {
229+
printf("%s: vad: %d \n", get_current_time().c_str(), params.audio.use_vad);
230+
// TODO: 实现VAD处理,
231+
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
232+
return whisperService.process(pcm32.data(), pcm32.size());
233+
} else {
234+
// asr
235+
return whisperService.process(pcm32.data(), pcm32.size());
236+
}
229237
}
230238

239+
nlohmann::json getResult(whisper_context *ctx) {
240+
nlohmann::json results = nlohmann::json(nlohmann::json::array());
241+
const int n_segments = whisper_full_n_segments(ctx);
242+
for (int i = 0; i < n_segments; ++i) {
243+
nlohmann::json segment;
244+
int64_t t0 = whisper_full_get_segment_t0(ctx, i);
245+
int64_t t1 = whisper_full_get_segment_t1(ctx, i);
246+
const char *sentence = whisper_full_get_segment_text(ctx, i);
247+
auto result = std::to_string(t0) + "-->" + std::to_string(t1) + ":" + sentence + "\n";
248+
printf("%s: result:%s\n", get_current_time().c_str(), result.c_str());
249+
segment["t0"] = t0;
250+
segment["t1"] = t1;
251+
segment["sentence"] = sentence;
252+
results.push_back(segment);
253+
}
254+
return results;
255+
}
256+
257+

0 commit comments

Comments
 (0)