13
13
#include < cstdio>
14
14
#include < string>
15
15
#include < thread>
16
+ #include < utility>
16
17
#include < vector>
17
18
#include < cstring>
18
19
23
24
#include < windows.h>
24
25
#endif
25
26
26
- #if defined(_MSC_VER)
27
- #pragma warning(disable: 4244 4267) // possible loss of data
28
- #endif
29
-
30
27
// helper function to replace substrings
31
28
static void replace_all (std::string & s, const std::string & search, const std::string & replace) {
32
29
for (size_t pos = 0 ; ; pos += replace.length ()) {
@@ -383,15 +380,7 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct
383
380
}
384
381
}
385
382
386
- static bool output_txt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
387
- std::ofstream fout (fname);
388
- if (!fout.is_open ()) {
389
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
390
- return false ;
391
- }
392
-
393
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
394
-
383
+ static void output_txt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
395
384
const int n_segments = whisper_full_n_segments (ctx);
396
385
for (int i = 0 ; i < n_segments; ++i) {
397
386
const char * text = whisper_full_get_segment_text (ctx, i);
@@ -406,19 +395,9 @@ static bool output_txt(struct whisper_context * ctx, const char * fname, const w
406
395
407
396
fout << speaker << text << " \n " ;
408
397
}
409
-
410
- return true ;
411
398
}
412
399
413
- static bool output_vtt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
414
- std::ofstream fout (fname);
415
- if (!fout.is_open ()) {
416
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
417
- return false ;
418
- }
419
-
420
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
421
-
400
+ static void output_vtt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
422
401
fout << " WEBVTT\n\n " ;
423
402
424
403
const int n_segments = whisper_full_n_segments (ctx);
@@ -438,19 +417,9 @@ static bool output_vtt(struct whisper_context * ctx, const char * fname, const w
438
417
fout << to_timestamp (t0) << " --> " << to_timestamp (t1) << " \n " ;
439
418
fout << speaker << text << " \n\n " ;
440
419
}
441
-
442
- return true ;
443
420
}
444
421
445
- static bool output_srt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
446
- std::ofstream fout (fname);
447
- if (!fout.is_open ()) {
448
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
449
- return false ;
450
- }
451
-
452
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
453
-
422
+ static void output_srt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
454
423
const int n_segments = whisper_full_n_segments (ctx);
455
424
for (int i = 0 ; i < n_segments; ++i) {
456
425
const char * text = whisper_full_get_segment_text (ctx, i);
@@ -467,8 +436,6 @@ static bool output_srt(struct whisper_context * ctx, const char * fname, const w
467
436
fout << to_timestamp (t0, true ) << " --> " << to_timestamp (t1, true ) << " \n " ;
468
437
fout << speaker << text << " \n\n " ;
469
438
}
470
-
471
- return true ;
472
439
}
473
440
474
441
static char * escape_double_quotes_and_backslashes (const char * str) {
@@ -534,15 +501,7 @@ static char * escape_double_quotes_in_csv(const char * str) {
534
501
return escaped;
535
502
}
536
503
537
- static bool output_csv (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
538
- std::ofstream fout (fname);
539
- if (!fout.is_open ()) {
540
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
541
- return false ;
542
- }
543
-
544
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
545
-
504
+ static void output_csv (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
546
505
const int n_segments = whisper_full_n_segments (ctx);
547
506
fout << " start,end," ;
548
507
if (params.diarize && pcmf32s.size () == 2 )
@@ -565,14 +524,9 @@ static bool output_csv(struct whisper_context * ctx, const char * fname, const w
565
524
}
566
525
fout << " \" " << text_escaped << " \"\n " ;
567
526
}
568
-
569
- return true ;
570
527
}
571
528
572
- static bool output_score (struct whisper_context * ctx, const char * fname, const whisper_params & /* params*/ , std::vector<std::vector<float >> /* pcmf32s*/ ) {
573
- std::ofstream fout (fname);
574
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
575
-
529
+ static void output_score (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & /* params*/ , std::vector<std::vector<float >> /* pcmf32s*/ ) {
576
530
const int n_segments = whisper_full_n_segments (ctx);
577
531
// fprintf(stderr,"segments: %d\n",n_segments);
578
532
for (int i = 0 ; i < n_segments; ++i) {
@@ -585,16 +539,14 @@ static bool output_score(struct whisper_context * ctx, const char * fname, const
585
539
// fprintf(stderr,"token: %s %f\n",token,probability);
586
540
}
587
541
}
588
- return true ;
589
542
}
590
543
591
- static bool output_json (
544
+ static void output_json (
592
545
struct whisper_context * ctx,
593
- const char * fname ,
546
+ std::ofstream & fout ,
594
547
const whisper_params & params,
595
- std::vector<std::vector<float >> pcmf32s,
596
- bool full) {
597
- std::ofstream fout (fname);
548
+ std::vector<std::vector<float >> pcmf32s) {
549
+ const bool full = params.output_jsn_full ;
598
550
int indent = 0 ;
599
551
600
552
auto doindent = [&]() {
@@ -674,12 +626,6 @@ static bool output_json(
674
626
end_obj (end);
675
627
};
676
628
677
- if (!fout.is_open ()) {
678
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
679
- return false ;
680
- }
681
-
682
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
683
629
start_obj (nullptr );
684
630
value_s (" systeminfo" , whisper_print_system_info (), false );
685
631
start_obj (" model" );
@@ -753,17 +699,12 @@ static bool output_json(
753
699
754
700
end_arr (true );
755
701
end_obj (true );
756
- return true ;
757
702
}
758
703
759
704
// karaoke video generation
760
705
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
761
706
// TODO: font parameter adjustments
762
- static bool output_wts (struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec, std::vector<std::vector<float >> pcmf32s) {
763
- std::ofstream fout (fname);
764
-
765
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
766
-
707
+ static bool output_wts (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s, const char * fname_inp, float t_sec, const char * fname_out) {
767
708
static const char * font = params.font_path .c_str ();
768
709
769
710
std::ifstream fin (font);
@@ -879,20 +820,12 @@ static bool output_wts(struct whisper_context * ctx, const char * fname, const c
879
820
880
821
fout.close ();
881
822
882
- fprintf (stderr, " %s: run 'source %s' to generate karaoke video\n " , __func__, fname );
823
+ fprintf (stderr, " # %s: run 'source %s' to generate karaoke video\n " , __func__, fname_out );
883
824
884
825
return true ;
885
826
}
886
827
887
- static bool output_lrc (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
888
- std::ofstream fout (fname);
889
- if (!fout.is_open ()) {
890
- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
891
- return false ;
892
- }
893
-
894
- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
895
-
828
+ static void output_lrc (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
896
829
fout << " [by:whisper.cpp]\n " ;
897
830
898
831
const int n_segments = whisper_full_n_segments (ctx);
@@ -920,8 +853,6 @@ static bool output_lrc(struct whisper_context * ctx, const char * fname, const w
920
853
921
854
fout << ' [' << timestamp_lrc << ' ]' << speaker << text << " \n " ;
922
855
}
923
-
924
- return true ;
925
856
}
926
857
927
858
@@ -1079,8 +1010,52 @@ int main(int argc, char ** argv) {
1079
1010
}
1080
1011
1081
1012
for (int f = 0 ; f < (int ) params.fname_inp .size (); ++f) {
1082
- const auto fname_inp = params.fname_inp [f];
1083
- const auto fname_out = f < (int ) params.fname_out .size () && !params.fname_out [f].empty () ? params.fname_out [f] : params.fname_inp [f];
1013
+ const auto & fname_inp = params.fname_inp [f];
1014
+ struct fout_factory {
1015
+ std::string fname_out;
1016
+ const size_t basename_length;
1017
+ const bool is_stdout;
1018
+ bool used_stdout;
1019
+ decltype (whisper_print_segment_callback) * const print_segment_callback;
1020
+ std::ofstream fout;
1021
+
1022
+ fout_factory (const std::string & fname_out_, const std::string & fname_inp, whisper_params & params) :
1023
+ fname_out{!fname_out_.empty () ? fname_out_ : fname_inp},
1024
+ basename_length{fname_out.size ()},
1025
+ is_stdout{fname_out == " -" },
1026
+ used_stdout{},
1027
+ print_segment_callback{is_stdout ? nullptr : whisper_print_segment_callback} {
1028
+ if (!print_segment_callback) {
1029
+ params.print_progress = false ;
1030
+ }
1031
+ }
1032
+
1033
+ bool open (const char * ext, const char * function) {
1034
+ if (is_stdout) {
1035
+ if (std::exchange (used_stdout, true )) {
1036
+ fprintf (stderr, " warning: Not appending multiple file formats to stdout\n " );
1037
+ return false ;
1038
+ }
1039
+ #ifdef _WIN32
1040
+ fout = std::ofstream{" CON" };
1041
+ #else
1042
+ fout = std::ofstream{" /dev/stdout" };
1043
+ #endif
1044
+ // Not using fprintf stderr here because it might equal stdout
1045
+ // Also assuming /dev is mounted
1046
+ return true ;
1047
+ }
1048
+ fname_out.resize (basename_length);
1049
+ fname_out += ext;
1050
+ fout = std::ofstream{fname_out};
1051
+ if (!fout.is_open ()) {
1052
+ fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname_out.c_str ());
1053
+ return false ;
1054
+ }
1055
+ fprintf (stderr, " %s: saving output to '%s'\n " , function, fname_out.c_str ());
1056
+ return true ;
1057
+ }
1058
+ } fout_factory{f < (int ) params.fname_out .size () ? params.fname_out [f] : " " , fname_inp, params};
1084
1059
1085
1060
std::vector<float > pcmf32; // mono-channel F32 PCM
1086
1061
std::vector<std::vector<float >> pcmf32s; // stereo-channel F32 PCM
@@ -1185,7 +1160,7 @@ int main(int argc, char ** argv) {
1185
1160
1186
1161
// this callback is called on each new segment
1187
1162
if (!wparams.print_realtime ) {
1188
- wparams.new_segment_callback = whisper_print_segment_callback ;
1163
+ wparams.new_segment_callback = fout_factory. print_segment_callback ;
1189
1164
wparams.new_segment_callback_user_data = &user_data;
1190
1165
}
1191
1166
@@ -1227,54 +1202,26 @@ int main(int argc, char ** argv) {
1227
1202
1228
1203
// output stuff
1229
1204
{
1230
- printf (" \n " );
1231
-
1232
- // output to text file
1233
- if (params.output_txt ) {
1234
- const auto fname_txt = fname_out + " .txt" ;
1235
- output_txt (ctx, fname_txt.c_str (), params, pcmf32s);
1236
- }
1237
-
1238
- // output to VTT file
1239
- if (params.output_vtt ) {
1240
- const auto fname_vtt = fname_out + " .vtt" ;
1241
- output_vtt (ctx, fname_vtt.c_str (), params, pcmf32s);
1242
- }
1243
-
1244
- // output to SRT file
1245
- if (params.output_srt ) {
1246
- const auto fname_srt = fname_out + " .srt" ;
1247
- output_srt (ctx, fname_srt.c_str (), params, pcmf32s);
1248
- }
1249
-
1250
- // output to WTS file
1251
- if (params.output_wts ) {
1252
- const auto fname_wts = fname_out + " .wts" ;
1253
- output_wts (ctx, fname_wts.c_str (), fname_inp.c_str (), params, float (pcmf32.size () + 1000 )/WHISPER_SAMPLE_RATE, pcmf32s);
1254
- }
1255
-
1256
- // output to CSV file
1257
- if (params.output_csv ) {
1258
- const auto fname_csv = fname_out + " .csv" ;
1259
- output_csv (ctx, fname_csv.c_str (), params, pcmf32s);
1260
- }
1261
-
1262
- // output to JSON file
1263
- if (params.output_jsn ) {
1264
- const auto fname_jsn = fname_out + " .json" ;
1265
- output_json (ctx, fname_jsn.c_str (), params, pcmf32s, params.output_jsn_full );
1266
- }
1267
-
1268
- // output to LRC file
1269
- if (params.output_lrc ) {
1270
- const auto fname_lrc = fname_out + " .lrc" ;
1271
- output_lrc (ctx, fname_lrc.c_str (), params, pcmf32s);
1272
- }
1273
-
1274
- // output to score file
1275
- if (params.log_score ) {
1276
- const auto fname_score = fname_out + " .score.txt" ;
1277
- output_score (ctx, fname_score.c_str (), params, pcmf32s);
1205
+ // macros to stringify function name
1206
+ #define output_func (func, ext, param, ...) if (param && fout_factory.open(ext, #func)) {\
1207
+ func (ctx, fout_factory.fout , params, __VA_ARGS__); \
1208
+ }
1209
+ #define output_ext (ext, ...) output_func(output_##ext, " ." #ext, params.output_##ext, __VA_ARGS__)
1210
+
1211
+ output_ext (txt, pcmf32s);
1212
+ output_ext (vtt, pcmf32s);
1213
+ output_ext (srt, pcmf32s);
1214
+ output_ext (wts, pcmf32s, fname_inp.c_str (), float (pcmf32.size () + 1000 )/WHISPER_SAMPLE_RATE, fout_factory.fname_out .c_str ());
1215
+ output_ext (csv, pcmf32s);
1216
+ output_func (output_json, " .json" , params.output_jsn , pcmf32s);
1217
+ output_ext (lrc, pcmf32s);
1218
+ output_func (output_score, " .score.txt" , params.log_score , pcmf32s);
1219
+
1220
+ #undef output_ext
1221
+ #undef output_func
1222
+
1223
+ if (fout_factory.is_stdout && !fout_factory.used_stdout ) {
1224
+ fprintf (stderr, " warning: '--output-file -' used without any other '--output-*'" );
1278
1225
}
1279
1226
}
1280
1227
}
0 commit comments