|
1 | 1 | #include <ruby.h>
|
2 | 2 | #include "ruby_whisper.h"
|
3 |
| -#define DR_WAV_IMPLEMENTATION |
4 |
| -#include "dr_wav.h" |
| 3 | +#include "common-whisper.h" |
5 | 4 | #include <string>
|
6 | 5 | #include <vector>
|
7 | 6 |
|
@@ -47,84 +46,9 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
47 | 46 | std::vector<float> pcmf32; // mono-channel F32 PCM
|
48 | 47 | std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
49 | 48 |
|
50 |
| - // WAV input - this is directly from main.cpp example |
51 |
| - { |
52 |
| - drwav wav; |
53 |
| - std::vector<uint8_t> wav_data; // used for pipe input from stdin |
54 |
| - |
55 |
| - if (fname_inp == "-") { |
56 |
| - { |
57 |
| - uint8_t buf[1024]; |
58 |
| - while (true) { |
59 |
| - const size_t n = fread(buf, 1, sizeof(buf), stdin); |
60 |
| - if (n == 0) { |
61 |
| - break; |
62 |
| - } |
63 |
| - wav_data.insert(wav_data.end(), buf, buf + n); |
64 |
| - } |
65 |
| - } |
66 |
| - |
67 |
| - if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) { |
68 |
| - fprintf(stderr, "error: failed to open WAV file from stdin\n"); |
69 |
| - return self; |
70 |
| - } |
71 |
| - |
72 |
| - fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); |
73 |
| - } else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) { |
74 |
| - fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str()); |
75 |
| - return self; |
76 |
| - } |
77 |
| - |
78 |
| - if (wav.channels != 1 && wav.channels != 2) { |
79 |
| - fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str()); |
80 |
| - return self; |
81 |
| - } |
82 |
| - |
83 |
| - if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) { |
84 |
| - fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str()); |
85 |
| - return self; |
86 |
| - } |
87 |
| - |
88 |
| - if (wav.sampleRate != WHISPER_SAMPLE_RATE) { |
89 |
| - fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000); |
90 |
| - return self; |
91 |
| - } |
92 |
| - |
93 |
| - if (wav.bitsPerSample != 16) { |
94 |
| - fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str()); |
95 |
| - return self; |
96 |
| - } |
97 |
| - |
98 |
| - const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8); |
99 |
| - |
100 |
| - std::vector<int16_t> pcm16; |
101 |
| - pcm16.resize(n*wav.channels); |
102 |
| - drwav_read_pcm_frames_s16(&wav, n, pcm16.data()); |
103 |
| - drwav_uninit(&wav); |
104 |
| - |
105 |
| - // convert to mono, float |
106 |
| - pcmf32.resize(n); |
107 |
| - if (wav.channels == 1) { |
108 |
| - for (uint64_t i = 0; i < n; i++) { |
109 |
| - pcmf32[i] = float(pcm16[i])/32768.0f; |
110 |
| - } |
111 |
| - } else { |
112 |
| - for (uint64_t i = 0; i < n; i++) { |
113 |
| - pcmf32[i] = float((int32_t)pcm16[2*i] + pcm16[2*i + 1])/65536.0f; |
114 |
| - } |
115 |
| - } |
116 |
| - |
117 |
| - if (rwp->diarize) { |
118 |
| - // convert to stereo, float |
119 |
| - pcmf32s.resize(2); |
120 |
| - |
121 |
| - pcmf32s[0].resize(n); |
122 |
| - pcmf32s[1].resize(n); |
123 |
| - for (uint64_t i = 0; i < n; i++) { |
124 |
| - pcmf32s[0][i] = float(pcm16[2*i])/32768.0f; |
125 |
| - pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f; |
126 |
| - } |
127 |
| - } |
| 49 | + if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) { |
| 50 | + fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str()); |
| 51 | + return self; |
128 | 52 | }
|
129 | 53 | {
|
130 | 54 | static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
|
0 commit comments