@@ -5,28 +5,28 @@ using namespace stream_components;
5
5
6
6
// -- WhisperService --
7
7
8
- WhisperService::WhisperService (const struct service_params &server_params ,
8
+ WhisperService::WhisperService (const struct service_params &service_params ,
9
9
const struct audio_params &audio_params,
10
10
const struct whisper_context_params &cparams)
11
- : server_params(server_params ),
11
+ : service_params(service_params ),
12
12
audio_params(audio_params),
13
- ctx(whisper_init_from_file_with_params(server_params .model.c_str(), cparams)) {
13
+ ctx(whisper_init_from_file_with_params(service_params .model.c_str(), cparams)) {
14
14
{
15
15
fprintf (stderr, " \n " );
16
16
if (!whisper_is_multilingual (ctx)) {
17
- if (server_params .language != " en" || server_params .translate ) {
18
- this ->server_params .language = " en" ;
19
- this ->server_params .translate = false ;
17
+ if (service_params .language != " en" || service_params .translate ) {
18
+ this ->service_params .language = " en" ;
19
+ this ->service_params .translate = false ;
20
20
fprintf (stderr, " %s: WARNING: model is not multilingual, ignoring language and translation options\n " ,
21
21
__func__);
22
22
}
23
23
}
24
24
fprintf (stderr, " %s: serving with %d threads, lang = %s, task = %s, timestamps = %d ...\n " ,
25
25
__func__,
26
- server_params .n_threads ,
27
- server_params .language .c_str (),
28
- server_params .translate ? " translate" : " transcribe" ,
29
- server_params .no_timestamps ? 0 : 1 );
26
+ service_params .n_threads ,
27
+ service_params .language .c_str (),
28
+ service_params .translate ? " translate" : " transcribe" ,
29
+ service_params .no_timestamps ? 0 : 1 );
30
30
31
31
// if (!params.use_vad) {
32
32
// fprintf(stderr, "%s: n_new_line = %d, no_context = %d\n", __func__, n_new_line, params.no_context);
@@ -51,25 +51,26 @@ bool WhisperService::process(const float *samples, int sample_count) {
51
51
wparams.max_tokens = 0 ;
52
52
wparams.token_timestamps = true ;
53
53
54
- wparams.translate = server_params .translate ;
54
+ wparams.translate = service_params .translate ;
55
55
wparams.single_segment = !audio_params.use_vad ;
56
- wparams.language = server_params .language .c_str ();
57
- wparams.n_threads = server_params .n_threads ;
56
+ wparams.language = service_params .language .c_str ();
57
+ wparams.n_threads = service_params .n_threads ;
58
58
59
59
wparams.audio_ctx = audio_params.audio_ctx ;
60
- wparams.speed_up = server_params .speed_up ;
60
+ wparams.speed_up = service_params .speed_up ;
61
61
62
- wparams.tdrz_enable = server_params .tinydiarize ; // [TDRZ]
62
+ wparams.tdrz_enable = service_params .tinydiarize ; // [TDRZ]
63
63
64
64
// disable temperature fallback
65
65
// wparams.temperature_inc = -1.0f;
66
- wparams.temperature_inc = server_params .no_fallback ? 0 .0f : wparams.temperature_inc ;
66
+ wparams.temperature_inc = service_params .no_fallback ? 0 .0f : wparams.temperature_inc ;
67
67
68
- wparams.prompt_tokens = server_params .no_context ? nullptr : prompt_tokens.data ();
69
- wparams.prompt_n_tokens = server_params .no_context ? 0 : prompt_tokens.size ();
68
+ wparams.prompt_tokens = service_params .no_context ? nullptr : prompt_tokens.data ();
69
+ wparams.prompt_n_tokens = service_params .no_context ? 0 : prompt_tokens.size ();
70
70
71
71
// *** Run the actual inference!!! ***
72
- if (whisper_full (ctx, wparams, samples, sample_count) != 0 ) {
72
+ // if (whisper_full(ctx, wparams, samples, sample_count) != 0) {
73
+ if (whisper_full_parallel (ctx, wparams, samples, sample_count,service_params.n_processors ) != 0 ) {
73
74
return false ;
74
75
}
75
76
@@ -84,7 +85,7 @@ bool WhisperService::process(const float *samples, int sample_count) {
84
85
// pcmf32_old = std::vector<float>(pcmf32.end() - n_samples_keep, pcmf32.end());
85
86
86
87
// Add tokens of the last full length segment as the prompt
87
- if (!server_params .no_context ) {
88
+ if (!service_params .no_context ) {
88
89
prompt_tokens.clear ();
89
90
90
91
const int n_segments = whisper_full_n_segments (ctx);
0 commit comments