Skip to content

Commit 8a36f86

Browse files
authored
Merge pull request #74 from mmwillet/fix-phonemization-race-condition
Addresses race condition when asynchronously calling espeak-ng.
2 parents dd09dec + fe0fc0a commit 8a36f86

File tree

2 files changed

+82
-7
lines changed

2 files changed

+82
-7
lines changed

include/phonemizer.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <unordered_set>
1515
#include "tokenizer.h"
1616
#include <algorithm>
17+
#include <mutex>
1718

1819
static const std::string ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
1920
static const std::string ACCENTED_A = "àãâäáåÀÃÂÄÁÅ";
@@ -289,6 +290,38 @@ static const std::map<std::string, std::string> CONTRACTION_PHONEMES = {
289290
// characters that Espeak-ng treats as stopping tokens.
290291
static std::string STOPPING_TOKENS = ".,:;!?";
291292

293+
#ifdef ESPEAK_INSTALL
294+
/**
295+
* espeak-ng uses globals to persist and manage its state so it is not compatible with
296+
* threaded parallelism (https://github.com/espeak-ng/espeak-ng/issues/1527).
297+
* This singleton acts as a mutex wrapped provider for all espeak phonemization methods such
298+
* that multiple instances of the kokoro_runner can be initialized and called in parallel.
299+
*/
300+
class espeak_wrapper {
301+
private:
302+
static espeak_wrapper * instance;
303+
static std::mutex mutex;
304+
305+
protected:
306+
espeak_wrapper() {};
307+
~espeak_wrapper() {};
308+
bool espeak_initialized = false;
309+
310+
public:
311+
// singletons aren't copyable
312+
espeak_wrapper(espeak_wrapper &other) = delete;
313+
314+
// singletons aren't assignable
315+
void operator=(const espeak_wrapper &) = delete;
316+
317+
static espeak_wrapper * get_instance();
318+
const espeak_VOICE ** list_voices();
319+
espeak_ERROR set_voice(const char * voice_code);
320+
const char * text_to_phonemes(const void ** textptr, int textmode, int phonememode);
321+
void initialize(espeak_AUDIO_OUTPUT output, int buflength, const char * path, int options);
322+
};
323+
#endif
324+
292325
enum lookup_code {
293326
SUCCESS = 100,
294327
SUCCESS_PARTIAL = 101,

src/phonemizer.cpp

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,48 @@
11
#include "phonemizer.h"
22

3-
/*
3+
#ifdef ESPEAK_INSTALL
4+
/**
5+
* espeak_wrapper functions and assignments
6+
*
7+
* The espeak_wrapper is a singleton which wraps threaded calls to espeak-ng with a shared mutex
8+
*/
9+
10+
// non-const static members must be initialized out of line
11+
espeak_wrapper* espeak_wrapper::instance{nullptr};
12+
std::mutex espeak_wrapper::mutex;
13+
14+
espeak_wrapper * espeak_wrapper::get_instance() {
15+
if (!instance) {
16+
instance = new espeak_wrapper;
17+
}
18+
return instance;
19+
}
20+
21+
const espeak_VOICE ** espeak_wrapper::list_voices() {
22+
std::lock_guard<std::mutex> lock(mutex);
23+
return espeak_ListVoices(nullptr);
24+
}
25+
26+
espeak_ERROR espeak_wrapper::set_voice(const char * voice_code) {
27+
std::lock_guard<std::mutex> lock(mutex);
28+
return espeak_SetVoiceByName(voice_code);
29+
}
30+
31+
const char * espeak_wrapper::text_to_phonemes(const void ** textptr, int textmode, int phonememode) {
32+
std::lock_guard<std::mutex> lock(mutex);
33+
return espeak_TextToPhonemes(textptr, textmode, phonememode);
34+
}
35+
36+
void espeak_wrapper::initialize(espeak_AUDIO_OUTPUT output, int buflength, const char * path, int options) {
37+
std::lock_guard<std::mutex> lock(mutex);
38+
if (!espeak_initialized) {
39+
espeak_initialized = true;
40+
espeak_Initialize(output, buflength, path, options);
41+
}
42+
}
43+
#endif
44+
45+
/**
446
* Helper functions for string parsing
547
*/
648
const std::unordered_set<std::string> inline_combine_sets(const std::vector<std::unordered_set<std::string>> sets) {
@@ -131,7 +173,7 @@ std::string parse_voice_code(std::string voice_code) {
131173
if (search_by_id || search_by_lcc) {
132174
voice_code = replace(voice_code, '_', '-');
133175
}
134-
const espeak_VOICE** espeak_voices = espeak_ListVoices(nullptr);
176+
const espeak_VOICE** espeak_voices = espeak_wrapper::get_instance()->list_voices();
135177
// ideally we'd use the espeak voice scores which order voices by preference, but they are only returned when a voice_spec is passed to the list api and
136178
// the voice spec isn't compatible with partials (e.g. country codes, language family code, etc)
137179
int i = 0;
@@ -194,10 +236,10 @@ std::string parse_voice_code(std::string voice_code) {
194236

195237
void update_voice(std::string voice_code) {
196238
#ifdef ESPEAK_INSTALL
197-
espeak_ERROR e = espeak_SetVoiceByName(voice_code.c_str());
239+
espeak_ERROR e = espeak_wrapper::get_instance()->set_voice(voice_code.c_str());
198240
if (e != EE_OK) {
199241
voice_code = parse_voice_code(voice_code);
200-
espeak_SetVoiceByName(voice_code.c_str());
242+
espeak_wrapper::get_instance()->set_voice(voice_code.c_str());
201243
}
202244
#else
203245
TTS_ABORT("Attempted to set voice without espeak-ng installed.");
@@ -951,7 +993,7 @@ bool phonemizer::route(corpus * text, std::string* output, conditions * flags) {
951993
std::string phonemizer::espeak_text_to_phonemes(const char * text) {
952994
int mode = phoneme_mode == IPA ? (0 << 8 | 0x02) : (0 << 8 | 0x01);
953995
const void ** txt_ptr = (const void**)&text;
954-
const char * resp = espeak_TextToPhonemes(txt_ptr, espeakCHARS_UTF8, mode);
996+
const char * resp = espeak_wrapper::get_instance()->text_to_phonemes(txt_ptr, espeakCHARS_UTF8, mode);
955997
return strip(std::string(resp));
956998
}
957999
#endif
@@ -1083,7 +1125,7 @@ struct phonemizer * phonemizer_from_gguf(gguf_context * meta, const std::string
10831125

10841126
if ((phonemizer_type) ph_type == ESPEAK) {
10851127
#ifdef ESPEAK_INSTALL
1086-
espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, ESPEAK_DATA_PATH, 0);
1128+
espeak_wrapper::get_instance()->initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, ESPEAK_DATA_PATH, 0);
10871129

10881130
update_voice(espeak_voice_code);
10891131

@@ -1109,7 +1151,7 @@ struct phonemizer * phonemizer_from_gguf(gguf_context * meta, const std::string
11091151

11101152
struct phonemizer * espeak_phonemizer(bool use_espeak_phonemes, std::string espeak_voice_code) {
11111153
#ifdef ESPEAK_INSTALL
1112-
espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, ESPEAK_DATA_PATH, 0);
1154+
espeak_wrapper::get_instance()->initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, ESPEAK_DATA_PATH, 0);
11131155

11141156
update_voice(espeak_voice_code);
11151157

0 commit comments

Comments
 (0)