Skip to content

Commit d0b59ef

Browse files
author
litongmacos
committed
change audio_vad
1 parent a35af07 commit d0b59ef

File tree

4 files changed

+43
-11
lines changed

4 files changed

+43
-11
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ include_directories(${SDL2_INCLUDE_DIRS})
2323
include_directories(webrtc)
2424
include_directories(.)
2525
# find cpp files
26-
file(GLOB SRC_FILES simplevad/*.c simplevad/*.h
26+
file(GLOB VAD_FILES simplevad/*.c simplevad/*.h
2727
webrtc/common_audio/*/*.c webrtc/rtc_base/*.c*)
2828

29-
add_executable(audio_vad examples/audio_vad.cpp ${SRC_FILES})
29+
add_executable(audio_vad examples/audio_vad.cpp ${VAD_FILES})
3030
target_link_libraries(audio_vad pthread)
3131

3232

common/common.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,8 @@ void high_pass_filter(std::vector<float> &data, float cutoff, float sample_rate)
727727
}
728728

729729
bool
730-
vad_simple(std::vector<float> &pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
730+
vad_simple(std::vector<float> &pcmf32, int sample_rate, int last_ms,
731+
float vad_thold, float freq_thold, bool verbose) {
731732
const int n_samples = pcmf32.size();
732733
const int n_samples_last = (sample_rate * last_ms) / 1000;
733734

examples/audio_vad.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,17 @@ int main(int argc,char** argv) {
2828
return 3;
2929
}
3030

31-
std::unique_ptr<simple_vad, decltype(&simple_vad_free)> vad(simple_vad_create(), &simple_vad_free);
31+
std::unique_ptr<simple_vad, decltype(&simple_vad_free)> vad(
32+
simple_vad_create(), &simple_vad_free);
3233
if (!vad) {
3334
return 4;
3435
}
3536

3637
std::unique_ptr<FILE, decltype(&fclose)> fp2(fopen(filename, "rb"), &fclose);
3738
std::unique_ptr<struct cut_info, decltype(&cut_info_free)> cut(cut_info_create(fp2.get()), &cut_info_free);
3839

39-
snprintf(cut->output_filename_prefix, sizeof(cut->output_filename_prefix), "%s", output_filename_prefix);
40+
snprintf(cut->output_filename_prefix, sizeof(cut->output_filename_prefix), "%s",
41+
output_filename_prefix);
4042
snprintf(cut->output_file_dir, sizeof(cut->output_file_dir), "%s", output_dir);
4143

4244
int res = run(fp.get(), vad.get(), cut.get());

whisper_server_base_on_uwebsockets.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
#include <whisper.h>
1010
#include <sstream>
1111

12+
bool process_vad(float *pDouble, unsigned long size);
13+
14+
std::vector<float> extract_first_voice_segment(std::vector<float> vector1);
15+
1216
using namespace stream_components;
1317

1418
int main(int argc, char **argv) {
@@ -22,7 +26,7 @@ int main(int argc, char **argv) {
2226
// Compute derived parameters
2327
params.initialize();
2428
//output params
25-
printf("vad:%d\n", params.audio.use_vad);
29+
2630

2731
// Check parameters
2832
if (params.service.language != "auto" && whisper_lang_id(params.service.language.c_str()) == -1) {
@@ -58,7 +62,7 @@ int main(int argc, char **argv) {
5862
};
5963

6064
// WebSocket /paddlespeech/asr/streaming handler
61-
auto item = [&whisperService](auto *ws, std::string_view message, uWS::OpCode opCode) {
65+
auto item = [&whisperService, &params](auto *ws, std::string_view message, uWS::OpCode opCode) {
6266
thread_local std::vector<float> audioBuffer; //thread-localized variable
6367
thread_local wav_writer wavWriter;
6468
thread_local std::string filename;
@@ -102,11 +106,11 @@ int main(int argc, char **argv) {
102106
} else if (opCode == uWS::OpCode::BINARY) {
103107
// process binary message(PCM16 data)
104108
auto size = message.size();
105-
109+
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
106110
printf("%s: Received message size on /paddlespeech/asr/streaming: %zu\n", get_current_time().c_str(), size);
107111
// add received PCM16 to audio cache
108112
std::vector<int16_t> pcm16(size / 2);
109-
std::basic_string_view<char, std::char_traits<char>>::const_pointer data = message.data();
113+
110114
std::memcpy(pcm16.data(), data, size);
111115

112116
std::vector<float> temp(size / 2);
@@ -116,8 +120,24 @@ int main(int argc, char **argv) {
116120
//write to file
117121
wavWriter.write(temp.data(), size / 2);
118122
audioBuffer.insert(audioBuffer.end(), temp.begin(), temp.end());
119-
// asr
120-
bool isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
123+
// 如果开启了VAD
124+
bool isOk = false;
125+
if (params.audio.use_vad) {
126+
printf("%s: vad: %n\n", get_current_time().c_str(), params.audio.use_vad);
127+
// TODO: 实现VAD处理,这里假设process_vad是一个可以处理音频并返回是否包含有效语音的函数
128+
bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
129+
130+
if (containsVoice) {
131+
// 提取第一个有效音频段
132+
// TODO: 实现提取第一个有效音频段的逻辑,这里假设extract_first_voice_segment是实现这一功能的函数
133+
std::vector<float> firstSegment = extract_first_voice_segment(audioBuffer);
134+
// 清除audioBuffer中对应的字节
135+
isOk = whisperService.process(firstSegment.data(), firstSegment.size());
136+
}
137+
} else {
138+
// asr
139+
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
140+
}
121141
if (isOk) {
122142
const int n_segments = whisper_full_n_segments(whisperService.ctx);
123143
nlohmann::json results = nlohmann::json(nlohmann::json::array());
@@ -137,6 +157,7 @@ int main(int argc, char **argv) {
137157
response["result"] = final_results;
138158
}
139159

160+
140161
ws->send(response.dump(), uWS::OpCode::TEXT);
141162
}
142163
};
@@ -153,3 +174,11 @@ int main(int argc, char **argv) {
153174
//listen
154175
.listen(port, started_handler).run();
155176
}
177+
178+
std::vector<float> extract_first_voice_segment(std::vector<float> vector1) {
179+
return std::vector<float>();
180+
}
181+
182+
bool process_vad(float *pDouble, unsigned long size) {
183+
return false;
184+
}

0 commit comments

Comments
 (0)