@@ -38,6 +38,7 @@ struct whisper_params {
38
38
bool print_progress = false ;
39
39
bool no_timestamps = false ;
40
40
bool no_prints = false ;
41
+ bool detect_language= false ;
41
42
bool use_gpu = true ;
42
43
bool flash_attn = false ;
43
44
bool comma_in_time = true ;
@@ -130,6 +131,11 @@ void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper
130
131
131
132
void cb_log_disable (enum ggml_log_level, const char *, void *) {}
132
133
134
+ struct whisper_result {
135
+ std::vector<std::vector<std::string>> segments;
136
+ std::string language;
137
+ };
138
+
133
139
class ProgressWorker : public Napi ::AsyncWorker {
134
140
public:
135
141
ProgressWorker (Napi::Function& callback, whisper_params params, Napi::Function progress_callback, Napi::Env env)
@@ -160,15 +166,27 @@ class ProgressWorker : public Napi::AsyncWorker {
160
166
161
167
void OnOK () override {
162
168
Napi::HandleScope scope (Env ());
163
- Napi::Object res = Napi::Array::New (Env (), result.size ());
164
- for (uint64_t i = 0 ; i < result.size (); ++i) {
169
+
170
+ if (params.detect_language ) {
171
+ Napi::Object resultObj = Napi::Object::New (Env ());
172
+ resultObj.Set (" language" , Napi::String::New (Env (), result.language ));
173
+ Callback ().Call ({Env ().Null (), resultObj});
174
+ }
175
+
176
+ Napi::Object returnObj = Napi::Object::New (Env ());
177
+ if (!result.language .empty ()) {
178
+ returnObj.Set (" language" , Napi::String::New (Env (), result.language ));
179
+ }
180
+ Napi::Array transcriptionArray = Napi::Array::New (Env (), result.segments .size ());
181
+ for (uint64_t i = 0 ; i < result.segments .size (); ++i) {
165
182
Napi::Object tmp = Napi::Array::New (Env (), 3 );
166
183
for (uint64_t j = 0 ; j < 3 ; ++j) {
167
- tmp[j] = Napi::String::New (Env (), result[i][j]);
184
+ tmp[j] = Napi::String::New (Env (), result. segments [i][j]);
168
185
}
169
- res[i] = tmp;
170
- }
171
- Callback ().Call ({Env ().Null (), res});
186
+ transcriptionArray[i] = tmp;
187
+ }
188
+ returnObj.Set (" transcription" , transcriptionArray);
189
+ Callback ().Call ({Env ().Null (), returnObj});
172
190
}
173
191
174
192
// Progress callback function - using thread-safe function
@@ -185,12 +203,12 @@ class ProgressWorker : public Napi::AsyncWorker {
185
203
186
204
private:
187
205
whisper_params params;
188
- std::vector<std::vector<std::string>> result;
206
+ whisper_result result;
189
207
Napi::Env env;
190
208
Napi::ThreadSafeFunction tsfn;
191
209
192
210
// Custom run function with progress callback support
193
- int run_with_progress (whisper_params ¶ms, std::vector<std::vector<std::string>> & result) {
211
+ int run_with_progress (whisper_params ¶ms, whisper_result & result) {
194
212
if (params.no_prints ) {
195
213
whisper_log_set (cb_log_disable, NULL );
196
214
}
@@ -279,7 +297,8 @@ class ProgressWorker : public Napi::AsyncWorker {
279
297
wparams.print_timestamps = !params.no_timestamps ;
280
298
wparams.print_special = params.print_special ;
281
299
wparams.translate = params.translate ;
282
- wparams.language = params.language .c_str ();
300
+ wparams.language = params.detect_language ? " auto" : params.language .c_str ();
301
+ wparams.detect_language = params.detect_language ;
283
302
wparams.n_threads = params.n_threads ;
284
303
wparams.n_max_text_ctx = params.max_context >= 0 ? params.max_context : wparams.n_max_text_ctx ;
285
304
wparams.offset_ms = params.offset_t_ms ;
@@ -330,18 +349,22 @@ class ProgressWorker : public Napi::AsyncWorker {
330
349
return 10 ;
331
350
}
332
351
}
333
- }
352
+ }
334
353
354
+ if (params.detect_language || params.language == " auto" ) {
355
+ result.language = whisper_lang_str (whisper_full_lang_id (ctx));
356
+ }
335
357
const int n_segments = whisper_full_n_segments (ctx);
336
- result.resize (n_segments);
358
+ result.segments .resize (n_segments);
359
+
337
360
for (int i = 0 ; i < n_segments; ++i) {
338
361
const char * text = whisper_full_get_segment_text (ctx, i);
339
362
const int64_t t0 = whisper_full_get_segment_t0 (ctx, i);
340
363
const int64_t t1 = whisper_full_get_segment_t1 (ctx, i);
341
364
342
- result[i].emplace_back (to_timestamp (t0, params.comma_in_time ));
343
- result[i].emplace_back (to_timestamp (t1, params.comma_in_time ));
344
- result[i].emplace_back (text);
365
+ result. segments [i].emplace_back (to_timestamp (t0, params.comma_in_time ));
366
+ result. segments [i].emplace_back (to_timestamp (t1, params.comma_in_time ));
367
+ result. segments [i].emplace_back (text);
345
368
}
346
369
347
370
whisper_print_timings (ctx);
@@ -366,6 +389,7 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
366
389
bool flash_attn = whisper_params.Get (" flash_attn" ).As <Napi::Boolean>();
367
390
bool no_prints = whisper_params.Get (" no_prints" ).As <Napi::Boolean>();
368
391
bool no_timestamps = whisper_params.Get (" no_timestamps" ).As <Napi::Boolean>();
392
+ bool detect_language = whisper_params.Get (" detect_language" ).As <Napi::Boolean>();
369
393
int32_t audio_ctx = whisper_params.Get (" audio_ctx" ).As <Napi::Number>();
370
394
bool comma_in_time = whisper_params.Get (" comma_in_time" ).As <Napi::Boolean>();
371
395
int32_t max_len = whisper_params.Get (" max_len" ).As <Napi::Number>();
@@ -418,6 +442,7 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
418
442
params.max_context = max_context;
419
443
params.print_progress = print_progress;
420
444
params.prompt = prompt;
445
+ params.detect_language = detect_language;
421
446
422
447
Napi::Function callback = info[1 ].As <Napi::Function>();
423
448
// Create a new Worker class with progress callback support
0 commit comments