|
3 | 3 | #include "../common/common.h"
|
4 | 4 | #include "../params/whisper_params.h"
|
5 | 5 | #include "../nlohmann/json.hpp"
|
6 |
| -#include "common/utils.h" |
| 6 | +#include "../common/utils.h" |
7 | 7 |
|
8 | 8 | using json = nlohmann::json;
|
9 | 9 |
|
@@ -210,55 +210,31 @@ void getReqParameters(const Request &req, whisper_params ¶ms) {
|
210 | 210 |
|
211 | 211 | void getReqParameters(const Request &request, whisper_params ¶ms);
|
212 | 212 |
|
213 |
| -void handleInference(const Request &req, Response &res, std::mutex &whisper_mutex, whisper_params ¶ms, |
214 |
| - whisper_context *ctx, char *arg_audio_file) { |
215 |
| -// aquire whisper model mutex lock |
216 |
| - whisper_mutex.lock(); |
217 |
| - |
218 |
| - // first check user requested fields of the request |
219 |
| - if (!req.has_file("file")) { |
220 |
| - fprintf(stderr, "error: no 'file' field in the request\n"); |
221 |
| - const std::string error_resp = "{\"error\":\"no 'file' field in the request\"}"; |
222 |
| - res.set_content(error_resp, "application/json"); |
223 |
| - whisper_mutex.unlock(); |
224 |
| - return; |
225 |
| - } |
226 |
| - auto audio_file = req.get_file_value("file"); |
227 |
| - |
228 |
| - // check non-required fields |
229 |
| - getReqParameters(req, params); |
| 213 | +bool read_audio_file(std::string audio_format, std::string filename, std::vector<float> & pcmf32, |
| 214 | + std::vector<std::vector<float>> & pcmf32s, bool diarize) { |
230 | 215 |
|
231 |
| - std::string filename{audio_file.filename}; |
232 |
| - printf("%s: Received filename: %s,audio_format\n",get_current_time().c_str(),filename.c_str(),params.audio_format.c_str()); |
233 |
| - |
234 |
| - // audio arrays |
235 |
| - std::vector<float> pcmf32; // mono-channel F32 PCM |
236 |
| - std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM |
237 |
| - |
238 |
| - // write file to temporary file |
239 |
| - std::ofstream temp_file{filename, std::ios::binary}; |
240 |
| - temp_file << audio_file.content; |
241 |
| - |
242 |
| - // read wav content into pcmf32 |
243 |
| - if(params.audio_format=="mp3"){ |
244 |
| - |
245 |
| - }else if(params.audio_format=="m4a"){ |
246 |
| - |
247 |
| - }else{ |
248 |
| - if (!::read_wav(filename, pcmf32, pcmf32s, params.diarize)) { |
| 216 | + // read audio content into pcmf32 |
| 217 | + if (audio_format == "mp3") { |
| 218 | + if (!::read_mp3(filename, pcmf32, pcmf32s, diarize)) { |
| 219 | + fprintf(stderr, "error: failed to read mp3 file '%s'\n", filename.c_str()); |
| 220 | + return false; |
| 221 | + } |
| 222 | + } else if (audio_format == "m4a") { |
| 223 | + if (!::read_m4a(filename, pcmf32, pcmf32s, diarize)) { |
| 224 | + fprintf(stderr, "error: failed to read m4a file '%s'\n", filename.c_str()); |
| 225 | + return false; |
| 226 | + } |
| 227 | + } else { |
| 228 | + if (!::read_wav(filename, pcmf32, pcmf32s, diarize)) { |
249 | 229 | fprintf(stderr, "error: failed to read WAV file '%s'\n", filename.c_str());
|
250 |
| - const std::string error_resp = "{\"error\":\"failed to read WAV file\"}"; |
251 |
| - res.set_content(error_resp, "application/json"); |
252 |
| - whisper_mutex.unlock(); |
253 |
| - return; |
| 230 | + return false; |
254 | 231 | }
|
255 | 232 | }
|
| 233 | + return true; |
| 234 | +} |
256 | 235 |
|
257 |
| - // remove temp file |
258 |
| - std::remove(filename.c_str()); |
259 |
| - |
260 |
| - printf("Successfully loaded %s\n", filename.c_str()); |
261 |
| - |
| 236 | +bool run(std::mutex &whisper_mutex, whisper_params ¶ms, whisper_context *ctx, std::string filename, |
| 237 | + const std::vector<std::vector<float>>& pcmf32s, std::vector<float> pcmf32) { |
262 | 238 | // print system information
|
263 | 239 | {
|
264 | 240 | fprintf(stderr, "\n");
|
@@ -368,31 +344,87 @@ void handleInference(const Request &req, Response &res, std::mutex &whisper_mute
|
368 | 344 | wparams.abort_callback_user_data = &is_aborted;
|
369 | 345 | }
|
370 | 346 |
|
| 347 | + // aquire whisper model mutex lock |
| 348 | + whisper_mutex.lock(); |
371 | 349 | if (whisper_full_parallel(ctx, wparams, pcmf32.data(), pcmf32.size(), params.n_processors) != 0) {
|
372 |
| - fprintf(stderr, "%s: failed to process audio\n", arg_audio_file); |
373 |
| - const std::string error_resp = "{\"error\":\"failed to process audio\"}"; |
374 |
| - res.set_content(error_resp, "application/json"); |
| 350 | + fprintf(stderr, "%s: failed to process audio\n", filename.c_str()); |
375 | 351 | whisper_mutex.unlock();
|
376 |
| - return; |
| 352 | + return false; |
377 | 353 | }
|
| 354 | + whisper_mutex.unlock(); |
| 355 | + return true; |
378 | 356 | }
|
| 357 | +} |
379 | 358 |
|
380 |
| - // return results to user |
381 |
| - if (params.response_format == text_format) { |
382 |
| - std::string results = output_str(ctx, params, pcmf32s); |
383 |
| - res.set_content(results.c_str(), "text/html"); |
384 |
| - } |
385 |
| - // TODO add more output formats |
386 |
| - else { |
387 |
| - std::string results = output_str(ctx, params, pcmf32s); |
| 359 | + |
| 360 | +void handleInference(const Request &request, Response &response, std::mutex &whisper_mutex, whisper_params ¶ms, |
| 361 | + whisper_context *ctx, char *arg_audio_file) { |
| 362 | + // first check user requested fields of the request |
| 363 | + if (!request.has_file("file")) { |
| 364 | + fprintf(stderr, "error: no 'file' field in the request\n"); |
388 | 365 | json jres = json{
|
389 |
| - {"text", results} |
| 366 | + {"code",-1}, |
| 367 | + {"msg", "no 'file' field in the request"} |
390 | 368 | };
|
391 |
| - res.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace), |
392 |
| - "application/json"); |
| 369 | + auto json_string = jres.dump(-1, ' ', false,json::error_handler_t::replace); |
| 370 | + response.set_content(json_string,"application/json"); |
| 371 | + return; |
393 | 372 | }
|
| 373 | + auto audio_file = request.get_file_value("file"); |
394 | 374 |
|
395 |
| - // return whisper model mutex lock |
396 |
| - whisper_mutex.unlock(); |
397 |
| -} |
| 375 | + // check non-required fields |
| 376 | + getReqParameters(request, params); |
| 377 | + |
| 378 | + std::string filename{audio_file.filename}; |
| 379 | + printf("%s: Received filename: %s,audio_format:%s \n",get_current_time().c_str(),filename.c_str(),params.audio_format.c_str()); |
| 380 | + |
| 381 | + // audio arrays |
| 382 | + std::vector<float> pcmf32; // mono-channel F32 PCM |
| 383 | + std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM |
| 384 | + |
| 385 | + // write file to temporary file |
| 386 | + std::ofstream temp_file{filename, std::ios::binary}; |
| 387 | + temp_file << audio_file.content; |
398 | 388 |
|
| 389 | + bool isOK=read_audio_file(params.audio_format,filename,pcmf32,pcmf32s,params.diarize); |
| 390 | + if(!isOK){ |
| 391 | + json json_obj={ |
| 392 | + {"code",-1}, |
| 393 | + {"msg","error: failed to read WAV file "} |
| 394 | + }; |
| 395 | + auto json_string = json_obj.dump(-1, ' ', false,json::error_handler_t::replace); |
| 396 | + response.set_content(json_string, "application/json"); |
| 397 | + return; |
| 398 | + } |
| 399 | + |
| 400 | + // remove temp file |
| 401 | + std::remove(filename.c_str()); |
| 402 | + |
| 403 | + printf("Successfully loaded %s\n", filename.c_str()); |
| 404 | + |
| 405 | + bool isOk= run(whisper_mutex, params, ctx, filename, pcmf32s, pcmf32); |
| 406 | + if(isOk){ |
| 407 | + // return results to user |
| 408 | + if (params.response_format == text_format) { |
| 409 | + std::string results = output_str(ctx, params, pcmf32s); |
| 410 | + response.set_content(results.c_str(), "text/html"); |
| 411 | + } |
| 412 | + // TODO add more output formats |
| 413 | + else { |
| 414 | + std::string results = output_str(ctx, params, pcmf32s); |
| 415 | + json jres = json{ |
| 416 | + {"code",0}, |
| 417 | + {"text", results} |
| 418 | + }; |
| 419 | + response.set_content(jres.dump(-1, ' ', false, json::error_handler_t::replace), |
| 420 | + "application/json"); |
| 421 | + } |
| 422 | + }else{ |
| 423 | + json jres = json{ |
| 424 | + {"code",-1}, |
| 425 | + {"msg", "run error"} |
| 426 | + }; |
| 427 | + auto json_string = jres.dump(-1, ' ', false,json::error_handler_t::replace); |
| 428 | + response.set_content(json_string,"application/json"); |
| 429 | + } |
| 430 | +} |
0 commit comments