Skip to content

Commit 7eeabb7

Browse files
committed
Properly preprocess input text, and re-do streaming
1 parent 477b143 commit 7eeabb7

File tree

3 files changed

+300
-45
lines changed

3 files changed

+300
-45
lines changed

README.md

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
# Thai TTS (TH TTS)
1+
# THTTS (Thai TTS)
2+
3+
This project is the first implementation of Text-to-Speech (TTS) in Thai using the Wyoming protocol, making it fully compatible with Home Assistant. It enables local, streaming Thai voice synthesis for smarter automations and AI assistants—no cloud required.
4+
5+
Bring your local AI to life in Thai language with seamless integration, low latency, and privacy-first design.
26

37
## Model Attribution
48

@@ -71,21 +75,24 @@ You can override other parameters via environment variables (see below).
7175

7276
## Environment Variables
7377

74-
| Variable | Default Value | Description |
75-
|-----------------------|-----------------------------------------------|--------------------------------------------------|
76-
| `THTTS_BACKEND` | `VITS` | Model backend: `VITS`, `F5_V1`, or `F5_V2` |
77-
| `THTTS_HOST` | `0.0.0.0` | Bind address |
78-
| `THTTS_PORT` | `10200` | Port to listen on |
79-
| `THTTS_LOG_LEVEL` | `INFO` | Log level (`DEBUG`, `INFO`, etc.) |
80-
| `THTTS_MODEL` | `VIZINTZOR/MMS-TTS-THAI-FEMALEV2` | VITS model ID |
81-
| `THTTS_REF_AUDIO` | `hf_sample` | F5 reference audio path |
82-
| `THTTS_REF_TEXT` | *(empty)* | F5 reference transcript |
83-
| `THTTS_DEVICE` | `auto` | `auto`, `cpu`, or `cuda` |
84-
| `THTTS_SPEED` | `1.0` | F5 speech speed multiplier |
85-
| `THTTS_NFE_STEPS` | `32` | F5 denoising steps |
86-
| `THTTS_MAX_CONCURRENT`| `1` | Max concurrent synth requests |
87-
| `THTTS_CKPT_FILE` | *(auto-selected by backend)* | F5 checkpoint file path |
88-
| `THTTS_VOCAB_FILE` | *(auto-selected by backend)* | F5 vocab file path |
78+
| Variable | Default Value | Description |
79+
|-------------------------|-----------------------------------------------|--------------------------------------------------|
80+
| `THTTS_BACKEND` | `VITS` | Model backend: `VITS`, `F5_V1`, or `F5_V2` |
81+
| `THTTS_HOST` | `0.0.0.0` | Bind address |
82+
| `THTTS_PORT` | `10200` | Port to listen on |
83+
| `THTTS_LOG_LEVEL` | `INFO` | Log level (`DEBUG`, `INFO`, etc.) |
84+
| `THTTS_MODEL` | `VIZINTZOR/MMS-TTS-THAI-FEMALEV2` | VITS model ID |
85+
| `THTTS_REF_AUDIO` | `hf_sample` | F5 reference audio path |
86+
| `THTTS_REF_TEXT` | *(empty)* | F5 reference transcript |
87+
| `THTTS_DEVICE` | `auto` | `auto`, `cpu`, or `cuda` |
88+
| `THTTS_SPEED` | `1.0` | F5 speech speed multiplier |
89+
| `THTTS_NFE_STEPS` | `32` | F5 denoising steps |
90+
| `THTTS_MAX_CONCURRENT` | `1` | Max concurrent synth requests |
91+
| `THTTS_CKPT_FILE` | *(auto-selected by backend)* | F5 checkpoint file path |
92+
| `THTTS_VOCAB_FILE` | *(auto-selected by backend)* | F5 vocab file path |
93+
| `THTTS_SPEAK_SPEED` | | |
94+
| `THTTS_MAX_WAIT_MS` | | |
95+
| `THTTS_MIN_SENT_CHARS` | | |
8996

9097

9198
## 3. Docker Compose (NVIDIA GPU)

src/util/cleantext.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# src: https://github.com/VYNCX/F5-TTS-THAI/blob/99b8314f66a14fc2f0a6b53e5122829fbdf9c59c/src/f5_tts/cleantext/th_repeat.py
2+
# src: https://github.com/VYNCX/F5-TTS-THAI/blob/99b8314f66a14fc2f0a6b53e5122829fbdf9c59c/src/f5_tts/cleantext/number_tha.py
3+
import re
4+
5+
from pythainlp.tokenize import syllable_tokenize
6+
7+
8+
def remove_symbol(text: str):
9+
symbols = r"{}[]()-_?/\\|!*%$&@#^<>+-\";:~\`=“”"
10+
for symbol in symbols:
11+
text = text.replace(symbol, '')
12+
text = text.replace(" ๆ", "ๆ")
13+
return text
14+
15+
16+
def process_thai_repeat(text: str):
17+
18+
cleaned_symbols = remove_symbol(text)
19+
20+
words = syllable_tokenize(cleaned_symbols)
21+
22+
result = []
23+
i = 0
24+
while i < len(words):
25+
if i + 1 < len(words) and words[i + 1] == "ๆ":
26+
result.append(words[i])
27+
result.append(words[i])
28+
i += 2
29+
else:
30+
result.append(words[i])
31+
i += 1
32+
33+
return "".join(result)
34+
35+
36+
def number_to_thai_text(num, digit_by_digit=False):
37+
# Thai numerals and place values
38+
thai_digits = {
39+
0: "ศูนย์", 1: "หนึ่ง", 2: "สอง", 3: "สาม", 4: "สี่",
40+
5: "ห้า", 6: "หก", 7: "เจ็ด", 8: "แปด", 9: "เก้า"
41+
}
42+
thai_places = ["", "สิบ", "ร้อย", "พัน", "หมื่น", "แสน", "ล้าน"]
43+
44+
# Handle zero case
45+
if num == 0:
46+
return thai_digits[0]
47+
48+
# If digit_by_digit is True, read each digit separately
49+
if digit_by_digit:
50+
return " ".join(thai_digits[int(d)] for d in str(num))
51+
52+
# For very large numbers, we'll process in chunks of millions
53+
if num >= 1000000:
54+
millions = num // 1000000
55+
remainder = num % 1000000
56+
result = number_to_thai_text(millions) + "ล้าน"
57+
if remainder > 0:
58+
result += number_to_thai_text(remainder)
59+
return result
60+
61+
# Convert number to string and reverse it for easier place value processing
62+
num_str = str(num)
63+
digits = [int(d) for d in num_str]
64+
digits.reverse() # Reverse to process from units to highest place
65+
66+
result = []
67+
for i, digit in enumerate(digits):
68+
if digit == 0:
69+
continue # Skip zeros
70+
71+
# Special case for tens place
72+
if i == 1:
73+
if digit == 1:
74+
result.append(thai_places[i]) # "สิบ" for 10-19
75+
elif digit == 2:
76+
result.append("ยี่" + thai_places[i]) # "ยี่สิบ" for 20-29
77+
else:
78+
result.append(thai_digits[digit] + thai_places[i])
79+
# Special case for units place
80+
elif i == 0 and digit == 1:
81+
if len(digits) > 1 and digits[1] in [1, 2]:
82+
result.append("เอ็ด") # "เอ็ด" for 11, 21
83+
else:
84+
result.append(thai_digits[digit])
85+
else:
86+
result.append(thai_digits[digit] + thai_places[i])
87+
88+
# Reverse back and join
89+
result.reverse()
90+
return "".join(result)
91+
92+
93+
def replace_numbers_with_thai(text: str):
94+
# Function to convert matched number to Thai text
95+
def convert_match(match: re.Match[str]):
96+
num_str = match.group(0).replace(',', '')
97+
98+
# Skip if the string is empty or invalid after removing commas
99+
if not num_str or num_str == '.':
100+
return match.group(0)
101+
102+
# Handle decimal numbers
103+
if '.' in num_str:
104+
parts = num_str.split('.')
105+
integer_part = parts[0]
106+
decimal_part = parts[1] if len(parts) > 1 else ''
107+
108+
# If integer part is empty, treat as 0
109+
integer_value = int(integer_part) if integer_part else 0
110+
111+
# If integer part is too long (>7 digits), read digit by digit
112+
if len(integer_part) > 7:
113+
result = number_to_thai_text(integer_value, digit_by_digit=True)
114+
else:
115+
result = number_to_thai_text(integer_value)
116+
117+
# Add decimal part if it exists
118+
if decimal_part:
119+
result += "จุด " + " ".join(number_to_thai_text(int(d)) for d in decimal_part)
120+
return result
121+
122+
# Handle integer numbers
123+
num = int(num_str)
124+
if len(num_str) > 7: # If number exceeds 7 digits
125+
return number_to_thai_text(num, digit_by_digit=True)
126+
return number_to_thai_text(num)
127+
128+
# Replace all numbers (with or without commas and decimals) in the text
129+
def process_text(text: str):
130+
# Split by spaces to process each word
131+
words = text.split()
132+
result = []
133+
134+
for word in words:
135+
# Match only valid numeric strings (allowing commas and one decimal point)
136+
if re.match(r'^[\d,]+(\.\d+)?$', word): # Valid number with optional decimal
137+
match_res = re.match(r'[\d,\.]+', word)
138+
if match_res is not None:
139+
result.append(convert_match(match_res))
140+
else:
141+
# If word contains non-numeric characters, read numbers digit-by-digit
142+
if any(c.isdigit() for c in word):
143+
processed = ""
144+
num_chunk = ""
145+
for char in word:
146+
if char.isdigit():
147+
num_chunk += char
148+
else:
149+
if num_chunk:
150+
processed += " ".join(number_to_thai_text(int(d)) for d in num_chunk) + " "
151+
num_chunk = ""
152+
processed += char + " "
153+
if num_chunk: # Handle any remaining numbers
154+
processed += " ".join(number_to_thai_text(int(d)) for d in num_chunk)
155+
result.append(processed.strip())
156+
else:
157+
result.append(word)
158+
159+
return " ".join(result)
160+
161+
return process_text(text)

0 commit comments

Comments
 (0)