Skip to content

Commit c42f67e

Browse files
ruby : follow audio library change (#2851)
* Enable CPU * Follow audio lib change
1 parent 339a1cb commit c42f67e

File tree

3 files changed

+20
-84
lines changed

3 files changed

+20
-84
lines changed

.github/workflows/bindings-ruby.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ on:
1919
- ggml/**/*.m
2020
- ggml/**/*.metal
2121
- scripts/get-flags.mk
22-
- examples/dr_wav.h
22+
- examples/common.h
23+
- examples/common.cpp
24+
- examples/common-whisper.h
25+
- examples/common-whisper.cpp
26+
- examples/stb_vorbis.c
27+
- examples/miniaudio.h
2328
pull_request:
2429
paths:
2530
- bindings/ruby/**
@@ -39,7 +44,12 @@ on:
3944
- ggml/**/*.m
4045
- ggml/**/*.metal
4146
- scripts/get-flags.mk
42-
- examples/dr_wav.h
47+
- examples/common.h
48+
- examples/common.cpp
49+
- examples/common-whisper.h
50+
- examples/common-whisper.cpp
51+
- examples/stb_vorbis.c
52+
- examples/miniaudio.h
4353

4454
jobs:
4555
ubuntu-22:

bindings/ruby/ext/extconf.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
$GGML_METAL_EMBED_LIBRARY = true
3636
end
3737

38-
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples'
38+
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
3939
$MK_CFLAGS = '-std=c11 -fPIC'
4040
$MK_CXXFLAGS = '-std=c++17 -fPIC'
4141
$MK_NVCCFLAGS = '-std=c++17'
@@ -171,7 +171,9 @@
171171
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
172172

173173
$OBJ_WHISPER <<
174-
'src/whisper.o'
174+
'src/whisper.o' <<
175+
'examples/common.o' <<
176+
'examples/common-whisper.o'
175177

176178
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
177179
$objs <<

bindings/ruby/ext/ruby_whisper_transcribe.cpp

Lines changed: 4 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include <ruby.h>
22
#include "ruby_whisper.h"
3-
#define DR_WAV_IMPLEMENTATION
4-
#include "dr_wav.h"
3+
#include "common-whisper.h"
54
#include <string>
65
#include <vector>
76

@@ -47,84 +46,9 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
4746
std::vector<float> pcmf32; // mono-channel F32 PCM
4847
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
4948

50-
// WAV input - this is directly from main.cpp example
51-
{
52-
drwav wav;
53-
std::vector<uint8_t> wav_data; // used for pipe input from stdin
54-
55-
if (fname_inp == "-") {
56-
{
57-
uint8_t buf[1024];
58-
while (true) {
59-
const size_t n = fread(buf, 1, sizeof(buf), stdin);
60-
if (n == 0) {
61-
break;
62-
}
63-
wav_data.insert(wav_data.end(), buf, buf + n);
64-
}
65-
}
66-
67-
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
68-
fprintf(stderr, "error: failed to open WAV file from stdin\n");
69-
return self;
70-
}
71-
72-
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
73-
} else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
74-
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
75-
return self;
76-
}
77-
78-
if (wav.channels != 1 && wav.channels != 2) {
79-
fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
80-
return self;
81-
}
82-
83-
if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
84-
fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
85-
return self;
86-
}
87-
88-
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
89-
fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
90-
return self;
91-
}
92-
93-
if (wav.bitsPerSample != 16) {
94-
fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
95-
return self;
96-
}
97-
98-
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
99-
100-
std::vector<int16_t> pcm16;
101-
pcm16.resize(n*wav.channels);
102-
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
103-
drwav_uninit(&wav);
104-
105-
// convert to mono, float
106-
pcmf32.resize(n);
107-
if (wav.channels == 1) {
108-
for (uint64_t i = 0; i < n; i++) {
109-
pcmf32[i] = float(pcm16[i])/32768.0f;
110-
}
111-
} else {
112-
for (uint64_t i = 0; i < n; i++) {
113-
pcmf32[i] = float((int32_t)pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
114-
}
115-
}
116-
117-
if (rwp->diarize) {
118-
// convert to stereo, float
119-
pcmf32s.resize(2);
120-
121-
pcmf32s[0].resize(n);
122-
pcmf32s[1].resize(n);
123-
for (uint64_t i = 0; i < n; i++) {
124-
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
125-
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
126-
}
127-
}
49+
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
50+
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
51+
return self;
12852
}
12953
{
13054
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race

0 commit comments

Comments
 (0)