Skip to content

Commit 089924c

Browse files
author
litongmacos
committed
add audio_vad on macos
1 parent d7a3292 commit 089924c

File tree

5 files changed

+81
-26
lines changed

5 files changed

+81
-26
lines changed

CMakeLists.txt

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,10 @@ include_directories(${SDL2_INCLUDE_DIRS})
2121

2222
find_package(SampleRate CONFIG REQUIRED)
2323
find_package(FFMPEG REQUIRED)
24-
# webrtc
25-
include_directories(webrtc)
26-
include_directories(.)
27-
# find cpp files
28-
file(GLOB VAD_FILES simplevad/*.c simplevad/*.h
29-
webrtc/common_audio/*/*.c webrtc/rtc_base/*.c*)
30-
31-
add_executable(audio_vad examples/audio_vad.cpp ${VAD_FILES})
32-
target_link_libraries(audio_vad pthread)
33-
24+
# 查找 SpeexDSP 库
25+
find_library(SPEEXDSP_LIBRARY NAMES speexdsp)
26+
# 查找头文件
27+
find_path(SPEEXDSP_INCLUDE_DIRS "speex/speex_preprocess.h")
3428

3529
# Detecting Operating Systems
3630
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
@@ -48,12 +42,17 @@ elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
4842
link_directories(E:\\code\\cpp\\project-ping\\whisper.cpp\\cmake-build-release\\bin)
4943
endif ()
5044

45+
add_executable(audio_vad examples/audio_vad.cpp common/common.cpp
46+
stream/stream_components_service.cpp common/utils.cpp)
47+
target_link_libraries(audio_vad PRIVATE whisper SampleRate::samplerate ${SPEEXDSP_LIBRARY})
48+
# 链接头文件
49+
target_include_directories(audio_vad PRIVATE ${SPEEXDSP_INCLUDE_DIRS})
5150

5251
add_executable(sdl_version examples/sdl_version.cpp)
53-
target_link_libraries(sdl_version ${SDL2_LIBRARIES})
52+
target_link_libraries(sdl_version PRIVATE ${SDL2_LIBRARIES})
5453

5554
add_executable(simplest examples/simplest.cpp common/common.cpp common/utils.cpp)
56-
target_link_libraries(simplest whisper SampleRate::samplerate)
55+
target_link_libraries(simplest PRIVATE whisper SampleRate::samplerate)
5756

5857
add_executable(stream_local examples/stream_local.cpp common/common.cpp common/common-sdl.cpp common/utils.cpp
5958
stream/stream_components_service.cpp stream/stream_components_audio.cpp

common/common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
#define DR_MP3_IMPLEMENTATION
1212

13-
#include "dr_libs/dr_mp3.h"
13+
#include "../dr_libs/dr_mp3.h"
1414
#include <samplerate.h>
1515
#include <cmath>
1616
#include <cstring>

examples/audio_vad.cpp

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,68 @@
1-
#include <cstdio>
2-
#include <cstdlib>
31
#include <iostream>
4-
#include <memory>
5-
#include <filesystem>
6-
7-
int main(int argc, char **argv) {
8-
//default cmake-build-debug/main
9-
const char filename[] = "../pcm/16k_1.pcm";
10-
const char output_dir[] = "output_pcm";
11-
const char output_filename_prefix[] = "16k_1.pcm";
12-
if (!std::filesystem::exists(output_dir)) {
13-
std::filesystem::create_directories(output_dir);
2+
#include <vector>
3+
#include <cstdint>
4+
#include <whisper.h>
5+
6+
#include "../stream/stream_components_service.h"
7+
#include "../stream/stream_components.h"
8+
#include "../common/utils.h"
9+
#include "../common/common.h"
10+
#include <speex/speex_preprocess.h>
11+
12+
using namespace stream_components;
13+
14+
15+
int main() {
16+
std::string wav_file_path = "../samples/jfk.wav"; // 替换为您的 WAV 文件路径
17+
// audio arrays
18+
std::vector<float> pcmf32; // mono-channel F32 PCM
19+
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
20+
::read_wav(wav_file_path, pcmf32, pcmf32s, false);
21+
22+
printf("size of samples %lu\n", pcmf32.size());
23+
24+
25+
whisper_local_stream_params params;
26+
struct whisper_context_params cparams{};
27+
cparams.use_gpu = params.service.use_gpu;
28+
//Instantiate the service
29+
stream_components::WhisperService whisperService(params.service, params.audio, cparams);
30+
31+
//Simulate websokcet by adding 1500 data each time.
32+
std::vector<float> audio_buffer;
33+
int chunk_size = 160; // 适用于 16 kHz 采样率的 100 毫秒帧
34+
SpeexPreprocessState *st = speex_preprocess_state_init(chunk_size, WHISPER_SAMPLE_RATE);
35+
36+
int vad = 1;
37+
speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &vad);
38+
39+
bool last_is_speech = false;
40+
// 处理音频帧
41+
for (size_t i = 0; i < pcmf32.size(); i += chunk_size) {
42+
spx_int16_t frame[chunk_size];
43+
for (int j = 0; j < chunk_size; ++j) {
44+
if (i + j < pcmf32.size()) {
45+
frame[j] = (spx_int16_t)(pcmf32[i + j] * 32768);
46+
} else {
47+
frame[j] = 0; // 对于超出范围的部分填充 0
48+
}
49+
}
50+
int is_speech = speex_preprocess_run(st, frame);
51+
52+
// 将当前帧添加到 audio_buffer
53+
audio_buffer.insert(audio_buffer.end(), pcmf32.begin() + i, pcmf32.begin() + std::min(i + chunk_size, pcmf32.size()));
54+
printf("is_speech %d \n",is_speech);
55+
if (!is_speech && last_is_speech) {
56+
bool b = whisperService.process(pcmf32.data(), pcmf32.size());
57+
const nlohmann::json &json_array = get_result(whisperService.ctx);
58+
const std::basic_string<char, std::char_traits<char>, std::allocator<char>> &string = json_array.dump();
59+
printf("%s\n",string.c_str());
60+
return 0;
61+
audio_buffer.clear();
62+
}
63+
64+
last_is_speech = is_speech != 0;
1465
}
66+
67+
speex_preprocess_state_destroy(st);
1568
}

vcpkg.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,8 @@
1616
}, {
1717
"name" : "ffmpeg",
1818
"version>=" : "6.1"
19+
}, {
20+
"name" : "speexdsp",
21+
"version>=" : "1.2.1#1"
1922
} ]
2023
}

whisper_server_base_on_uwebsockets.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ int main(int argc, char **argv) {
191191
} else {
192192
// asr
193193
isOk = whisperService.process(audioBuffer.data(), audioBuffer.size());
194-
// audioBuffer.clear();
194+
audioBuffer.clear();
195195
}
196196
printf("%s: is_ok: %d \n", get_current_time().c_str(), isOk);
197197
if (isOk) {

0 commit comments

Comments
 (0)