Skip to content

Commit 7751bcc

Browse files
author
litongmacos
committed
add time length judgment
1 parent b97d0ad commit 7751bcc

File tree

1 file changed

+38
-43
lines changed

1 file changed

+38
-43
lines changed

whisper_server_base_on_uwebsockets.cpp

Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ int main(int argc, char **argv) {
111111

112112
// WebSocket /paddlespeech/asr/streaming handler
113113
auto ws_streaming_handler = [&whisperService, &params](auto *ws, std::string_view message, uWS::OpCode opCode) {
114-
thread_local std::vector<float> audioBuffer; //thread-localized variable
114+
thread_local std::vector<int16_t> audioBuffer; //thread-localized variable
115115
thread_local wav_writer wavWriter;
116116
thread_local std::string filename;
117117
//std::unique_ptr<nlohmann::json> results(new nlohmann::json(nlohmann::json::array()));
@@ -160,52 +160,47 @@ int main(int argc, char **argv) {
160160
std::vector<int16_t> pcm16(size / 2);
161161

162162
std::memcpy(pcm16.data(), data, size);
163-
164-
std::vector<float> temp(size / 2);
165-
std::transform(pcm16.begin(), pcm16.end(), temp.begin(), [](int16_t sample) {
166-
return static_cast<float>(sample) / 32768.0f;
167-
});
168163
//write to file
169-
//wavWriter.write(temp.data(), size / 2);
170-
audioBuffer.insert(audioBuffer.end(), temp.begin(), temp.end());
171-
// 如果开启了VAD
172-
bool isOk = false;
173-
if (params.audio.use_vad) {
174-
printf("%s: vad: %n\n", get_current_time().c_str(), params.audio.use_vad);
175-
// TODO: 实现VAD处理,这里假设process_vad是一个可以处理音频并返回是否包含有效语音的函数
176-
bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
177-
178-
if (containsVoice) {
179-
// 提取第一个有效音频段
180-
// TODO: 实现提取第一个有效音频段的逻辑,这里假设extract_first_voice_segment是实现这一功能的函数
181-
std::vector<float> firstSegment = extract_first_voice_segment(audioBuffer);
182-
// 清除audioBuffer中对应的字节
183-
isOk = whisperService.process(firstSegment.data(), firstSegment.size());
164+
wavWriter.write(pcm16.data(), size / 2);
165+
166+
audioBuffer.insert(audioBuffer.end(), pcm16.begin(), pcm16.end());
167+
unsigned long bufferSize = audioBuffer.size();
168+
if(bufferSize>16000*10){
169+
std::vector<float> pcm32(bufferSize);
170+
std::transform(audioBuffer.begin(), audioBuffer.end(), pcm32.begin(), [](int16_t sample) {
171+
return static_cast<float>(sample) / 32768.0f;
172+
});
173+
audioBuffer.clear();
174+
// 如果开启了VAD
175+
bool isOk = false;
176+
if (params.audio.use_vad) {
177+
printf("%s: vad: %n\n", get_current_time().c_str(), params.audio.use_vad);
178+
// TODO: 实现VAD处理,
179+
//bool containsVoice = vad_simple(audioBuffer, WHISPER_SAMPLE_RATE, 1000, params.audio.vad_thold, params.audio.freq_thold, false);
180+
isOk = whisperService.process(pcm32.data(), pcm32.size());
181+
} else {
182+
// asr
183+
isOk = whisperService.process(pcm32.data(), pcm32.size());
184184
}
185-
} else {
186-
// asr
187-
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
188-
}
189-
if (isOk) {
190-
const int n_segments = whisper_full_n_segments(whisperService.ctx);
191-
nlohmann::json results = nlohmann::json(nlohmann::json::array());
192-
for (int i = 0; i < n_segments; ++i) {
193-
nlohmann::json segment;
194-
int64_t t0 = whisper_full_get_segment_t0(whisperService.ctx, i);
195-
int64_t t1 = whisper_full_get_segment_t1(whisperService.ctx, i);
196-
const char *sentence = whisper_full_get_segment_text(whisperService.ctx, i);
197-
auto result = std::to_string(t0) + "-->" + std::to_string(t1) + ":" + sentence + "\n";
198-
printf("%s: result:%s\n", get_current_time().c_str(), result.c_str());
199-
segment["t0"] = t0;
200-
segment["t1"] = t1;
201-
segment["sentence"] = sentence;
202-
results.push_back(segment);
185+
if (isOk) {
186+
const int n_segments = whisper_full_n_segments(whisperService.ctx);
187+
nlohmann::json results = nlohmann::json(nlohmann::json::array());
188+
for (int i = 0; i < n_segments; ++i) {
189+
nlohmann::json segment;
190+
int64_t t0 = whisper_full_get_segment_t0(whisperService.ctx, i);
191+
int64_t t1 = whisper_full_get_segment_t1(whisperService.ctx, i);
192+
const char *sentence = whisper_full_get_segment_text(whisperService.ctx, i);
193+
auto result = std::to_string(t0) + "-->" + std::to_string(t1) + ":" + sentence + "\n";
194+
printf("%s: result:%s\n", get_current_time().c_str(), result.c_str());
195+
segment["t0"] = t0;
196+
segment["t1"] = t1;
197+
segment["sentence"] = sentence;
198+
results.push_back(segment);
199+
}
200+
final_results = results;
201+
response["result"] = final_results;
203202
}
204-
final_results = results;
205-
response["result"] = final_results;
206203
}
207-
208-
209204
ws->send(response.dump(), uWS::OpCode::TEXT);
210205
}
211206
};

0 commit comments

Comments
 (0)