Skip to content

Commit c9e59b1

Browse files
authored
Merge branch 'ggml-org:master' into flat-fix
2 parents 0cc4f5e + 09846f4 commit c9e59b1

File tree

16 files changed

+99
-231
lines changed

16 files changed

+99
-231
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ if (MSVC)
236236
endfunction()
237237

238238
if (WHISPER_BUILD_EXAMPLES)
239+
disable_msvc_warnings(whisper)
239240
disable_msvc_warnings(common)
240241
disable_msvc_warnings(common-sdl)
241242
disable_msvc_warnings(lsp)

examples/cli/cli.cpp

Lines changed: 80 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <cstdio>
1414
#include <string>
1515
#include <thread>
16+
#include <utility>
1617
#include <vector>
1718
#include <cstring>
1819

@@ -23,10 +24,6 @@
2324
#include <windows.h>
2425
#endif
2526

26-
#if defined(_MSC_VER)
27-
#pragma warning(disable: 4244 4267) // possible loss of data
28-
#endif
29-
3027
// helper function to replace substrings
3128
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
3229
for (size_t pos = 0; ; pos += replace.length()) {
@@ -383,15 +380,7 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct
383380
}
384381
}
385382

386-
static bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
387-
std::ofstream fout(fname);
388-
if (!fout.is_open()) {
389-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
390-
return false;
391-
}
392-
393-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
394-
383+
static void output_txt(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
395384
const int n_segments = whisper_full_n_segments(ctx);
396385
for (int i = 0; i < n_segments; ++i) {
397386
const char * text = whisper_full_get_segment_text(ctx, i);
@@ -406,19 +395,9 @@ static bool output_txt(struct whisper_context * ctx, const char * fname, const w
406395

407396
fout << speaker << text << "\n";
408397
}
409-
410-
return true;
411398
}
412399

413-
static bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
414-
std::ofstream fout(fname);
415-
if (!fout.is_open()) {
416-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
417-
return false;
418-
}
419-
420-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
421-
400+
static void output_vtt(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
422401
fout << "WEBVTT\n\n";
423402

424403
const int n_segments = whisper_full_n_segments(ctx);
@@ -438,19 +417,9 @@ static bool output_vtt(struct whisper_context * ctx, const char * fname, const w
438417
fout << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
439418
fout << speaker << text << "\n\n";
440419
}
441-
442-
return true;
443420
}
444421

445-
static bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
446-
std::ofstream fout(fname);
447-
if (!fout.is_open()) {
448-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
449-
return false;
450-
}
451-
452-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
453-
422+
static void output_srt(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
454423
const int n_segments = whisper_full_n_segments(ctx);
455424
for (int i = 0; i < n_segments; ++i) {
456425
const char * text = whisper_full_get_segment_text(ctx, i);
@@ -467,8 +436,6 @@ static bool output_srt(struct whisper_context * ctx, const char * fname, const w
467436
fout << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
468437
fout << speaker << text << "\n\n";
469438
}
470-
471-
return true;
472439
}
473440

474441
static char * escape_double_quotes_and_backslashes(const char * str) {
@@ -534,15 +501,7 @@ static char * escape_double_quotes_in_csv(const char * str) {
534501
return escaped;
535502
}
536503

537-
static bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
538-
std::ofstream fout(fname);
539-
if (!fout.is_open()) {
540-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
541-
return false;
542-
}
543-
544-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
545-
504+
static void output_csv(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
546505
const int n_segments = whisper_full_n_segments(ctx);
547506
fout << "start,end,";
548507
if (params.diarize && pcmf32s.size() == 2)
@@ -565,14 +524,9 @@ static bool output_csv(struct whisper_context * ctx, const char * fname, const w
565524
}
566525
fout << "\"" << text_escaped << "\"\n";
567526
}
568-
569-
return true;
570527
}
571528

572-
static bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & /*params*/, std::vector<std::vector<float>> /*pcmf32s*/) {
573-
std::ofstream fout(fname);
574-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
575-
529+
static void output_score(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & /*params*/, std::vector<std::vector<float>> /*pcmf32s*/) {
576530
const int n_segments = whisper_full_n_segments(ctx);
577531
// fprintf(stderr,"segments: %d\n",n_segments);
578532
for (int i = 0; i < n_segments; ++i) {
@@ -585,16 +539,14 @@ static bool output_score(struct whisper_context * ctx, const char * fname, const
585539
// fprintf(stderr,"token: %s %f\n",token,probability);
586540
}
587541
}
588-
return true;
589542
}
590543

591-
static bool output_json(
544+
static void output_json(
592545
struct whisper_context * ctx,
593-
const char * fname,
546+
std::ofstream & fout,
594547
const whisper_params & params,
595-
std::vector<std::vector<float>> pcmf32s,
596-
bool full) {
597-
std::ofstream fout(fname);
548+
std::vector<std::vector<float>> pcmf32s) {
549+
const bool full = params.output_jsn_full;
598550
int indent = 0;
599551

600552
auto doindent = [&]() {
@@ -674,12 +626,6 @@ static bool output_json(
674626
end_obj(end);
675627
};
676628

677-
if (!fout.is_open()) {
678-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
679-
return false;
680-
}
681-
682-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
683629
start_obj(nullptr);
684630
value_s("systeminfo", whisper_print_system_info(), false);
685631
start_obj("model");
@@ -753,17 +699,12 @@ static bool output_json(
753699

754700
end_arr(true);
755701
end_obj(true);
756-
return true;
757702
}
758703

759704
// karaoke video generation
760705
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
761706
// TODO: font parameter adjustments
762-
static bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec, std::vector<std::vector<float>> pcmf32s) {
763-
std::ofstream fout(fname);
764-
765-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
766-
707+
static bool output_wts(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s, const char * fname_inp, float t_sec, const char * fname_out) {
767708
static const char * font = params.font_path.c_str();
768709

769710
std::ifstream fin(font);
@@ -879,20 +820,12 @@ static bool output_wts(struct whisper_context * ctx, const char * fname, const c
879820

880821
fout.close();
881822

882-
fprintf(stderr, "%s: run 'source %s' to generate karaoke video\n", __func__, fname);
823+
fprintf(stderr, "# %s: run 'source %s' to generate karaoke video\n", __func__, fname_out);
883824

884825
return true;
885826
}
886827

887-
static bool output_lrc(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
888-
std::ofstream fout(fname);
889-
if (!fout.is_open()) {
890-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
891-
return false;
892-
}
893-
894-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
895-
828+
static void output_lrc(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
896829
fout << "[by:whisper.cpp]\n";
897830

898831
const int n_segments = whisper_full_n_segments(ctx);
@@ -920,8 +853,6 @@ static bool output_lrc(struct whisper_context * ctx, const char * fname, const w
920853

921854
fout << '[' << timestamp_lrc << ']' << speaker << text << "\n";
922855
}
923-
924-
return true;
925856
}
926857

927858

@@ -1079,8 +1010,52 @@ int main(int argc, char ** argv) {
10791010
}
10801011

10811012
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
1082-
const auto fname_inp = params.fname_inp[f];
1083-
const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
1013+
const auto & fname_inp = params.fname_inp[f];
1014+
struct fout_factory {
1015+
std::string fname_out;
1016+
const size_t basename_length;
1017+
const bool is_stdout;
1018+
bool used_stdout;
1019+
decltype(whisper_print_segment_callback) * const print_segment_callback;
1020+
std::ofstream fout;
1021+
1022+
fout_factory (const std::string & fname_out_, const std::string & fname_inp, whisper_params & params) :
1023+
fname_out{!fname_out_.empty() ? fname_out_ : fname_inp},
1024+
basename_length{fname_out.size()},
1025+
is_stdout{fname_out == "-"},
1026+
used_stdout{},
1027+
print_segment_callback{is_stdout ? nullptr : whisper_print_segment_callback} {
1028+
if (!print_segment_callback) {
1029+
params.print_progress = false;
1030+
}
1031+
}
1032+
1033+
bool open(const char * ext, const char * function) {
1034+
if (is_stdout) {
1035+
if (std::exchange(used_stdout, true)) {
1036+
fprintf(stderr, "warning: Not appending multiple file formats to stdout\n");
1037+
return false;
1038+
}
1039+
#ifdef _WIN32
1040+
fout = std::ofstream{"CON"};
1041+
#else
1042+
fout = std::ofstream{"/dev/stdout"};
1043+
#endif
1044+
// Not using fprintf stderr here because it might equal stdout
1045+
// Also assuming /dev is mounted
1046+
return true;
1047+
}
1048+
fname_out.resize(basename_length);
1049+
fname_out += ext;
1050+
fout = std::ofstream{fname_out};
1051+
if (!fout.is_open()) {
1052+
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname_out.c_str());
1053+
return false;
1054+
}
1055+
fprintf(stderr, "%s: saving output to '%s'\n", function, fname_out.c_str());
1056+
return true;
1057+
}
1058+
} fout_factory{f < (int) params.fname_out.size() ? params.fname_out[f] : "", fname_inp, params};
10841059

10851060
std::vector<float> pcmf32; // mono-channel F32 PCM
10861061
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
@@ -1185,7 +1160,7 @@ int main(int argc, char ** argv) {
11851160

11861161
// this callback is called on each new segment
11871162
if (!wparams.print_realtime) {
1188-
wparams.new_segment_callback = whisper_print_segment_callback;
1163+
wparams.new_segment_callback = fout_factory.print_segment_callback;
11891164
wparams.new_segment_callback_user_data = &user_data;
11901165
}
11911166

@@ -1227,54 +1202,26 @@ int main(int argc, char ** argv) {
12271202

12281203
// output stuff
12291204
{
1230-
printf("\n");
1231-
1232-
// output to text file
1233-
if (params.output_txt) {
1234-
const auto fname_txt = fname_out + ".txt";
1235-
output_txt(ctx, fname_txt.c_str(), params, pcmf32s);
1236-
}
1237-
1238-
// output to VTT file
1239-
if (params.output_vtt) {
1240-
const auto fname_vtt = fname_out + ".vtt";
1241-
output_vtt(ctx, fname_vtt.c_str(), params, pcmf32s);
1242-
}
1243-
1244-
// output to SRT file
1245-
if (params.output_srt) {
1246-
const auto fname_srt = fname_out + ".srt";
1247-
output_srt(ctx, fname_srt.c_str(), params, pcmf32s);
1248-
}
1249-
1250-
// output to WTS file
1251-
if (params.output_wts) {
1252-
const auto fname_wts = fname_out + ".wts";
1253-
output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE, pcmf32s);
1254-
}
1255-
1256-
// output to CSV file
1257-
if (params.output_csv) {
1258-
const auto fname_csv = fname_out + ".csv";
1259-
output_csv(ctx, fname_csv.c_str(), params, pcmf32s);
1260-
}
1261-
1262-
// output to JSON file
1263-
if (params.output_jsn) {
1264-
const auto fname_jsn = fname_out + ".json";
1265-
output_json(ctx, fname_jsn.c_str(), params, pcmf32s, params.output_jsn_full);
1266-
}
1267-
1268-
// output to LRC file
1269-
if (params.output_lrc) {
1270-
const auto fname_lrc = fname_out + ".lrc";
1271-
output_lrc(ctx, fname_lrc.c_str(), params, pcmf32s);
1272-
}
1273-
1274-
// output to score file
1275-
if (params.log_score) {
1276-
const auto fname_score = fname_out + ".score.txt";
1277-
output_score(ctx, fname_score.c_str(), params, pcmf32s);
1205+
// macros to stringify function name
1206+
#define output_func(func, ext, param, ...) if (param && fout_factory.open(ext, #func)) {\
1207+
func(ctx, fout_factory.fout, params, __VA_ARGS__); \
1208+
}
1209+
#define output_ext(ext, ...) output_func(output_##ext, "." #ext, params.output_##ext, __VA_ARGS__)
1210+
1211+
output_ext(txt, pcmf32s);
1212+
output_ext(vtt, pcmf32s);
1213+
output_ext(srt, pcmf32s);
1214+
output_ext(wts, pcmf32s, fname_inp.c_str(), float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE, fout_factory.fname_out.c_str());
1215+
output_ext(csv, pcmf32s);
1216+
output_func(output_json, ".json", params.output_jsn, pcmf32s);
1217+
output_ext(lrc, pcmf32s);
1218+
output_func(output_score, ".score.txt", params.log_score, pcmf32s);
1219+
1220+
#undef output_ext
1221+
#undef output_func
1222+
1223+
if (fout_factory.is_stdout && !fout_factory.used_stdout) {
1224+
fprintf(stderr, "warning: '--output-file -' used without any other '--output-*'");
12781225
}
12791226
}
12801227
}

examples/common-whisper.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,6 @@
2626
#define MINIAUDIO_IMPLEMENTATION
2727
#include "miniaudio.h"
2828

29-
#if defined(_MSC_VER)
30-
#pragma warning(disable: 4244 4267) // possible loss of data
31-
#endif
32-
3329
#ifdef _WIN32
3430
#include <fcntl.h>
3531
#include <io.h>

examples/common.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010
#include <regex>
1111
#include <sstream>
1212

13-
#if defined(_MSC_VER)
14-
#pragma warning(disable: 4244 4267) // possible loss of data
15-
#endif
16-
1713
// Function to check if the next argument exists
1814
static std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag, gpt_params& params) {
1915
if (i + 1 < argc && argv[i + 1][0] != '-') {

0 commit comments

Comments
 (0)