Skip to content

Commit d934e0a

Browse files
authored
Merge pull request #1348 from xinnan-tech/custom_tts_api
Custom tts api
2 parents 997e171 + 2ecce64 commit d934e0a

File tree

6 files changed

+62
-15
lines changed

6 files changed

+62
-15
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
-- 修改自定义TTS接口请求定义
2+
update `ai_model_provider` set `fields` =
3+
'[{"key":"url","label":"服务地址","type":"string"},{"key":"method","label":"请求方式","type":"string"},{"key":"params","label":"请求参数","type":"dict","dict_name":"params"},{"key":"headers","label":"请求头","type":"dict","dict_name":"headers"},{"key":"format","label":"音频格式","type":"string"},{"key":"output_dir","label":"输出目录","type":"string"}]'
4+
where `id` = 'SYSTEM_TTS_custom';
5+
6+
-- 修改自定义TTS配置说明
7+
UPDATE `ai_model_config` SET
8+
`doc_link` = NULL,
9+
`remark` = '自定义TTS配置说明:
10+
1. 自定义的TTS接口服务,请求参数可自定义,可接入众多TTS服务
11+
2. 以本地部署的KokoroTTS为例
12+
3. 如果只有cpu运行:docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:latest
13+
4. 如果只有gpu运行:docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:latest
14+
配置说明:
15+
1. 在params中配置请求参数,使用JSON格式
16+
例如KokoroTTS:{ "input": "{prompt_text}", "speed": 1, "voice": "zm_yunxi", "stream": true, "download_format": "mp3", "response_format": "mp3", "return_download_link": true }
17+
2. 在headers中配置请求头
18+
3. 设置返回音频格式' WHERE `id` = 'TTS_CustomTTS';

main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,11 @@ databaseChangeLog:
155155
changes:
156156
- sqlFile:
157157
encoding: utf8
158-
path: classpath:db/changelog/202505201744.sql
158+
path: classpath:db/changelog/202505201744.sql
159+
- changeSet:
160+
id: 202505151451
161+
author: hsoftxl
162+
changes:
163+
- sqlFile:
164+
encoding: utf8
165+
path: classpath:db/changelog/202505151451.sql

main/manager-web/src/components/DeviceItem.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
</div>
2929
<div class="settings-btn" @click="handleChatHistory"
3030
:class="{ 'disabled-btn': device.memModelId === 'Memory_nomem' }">
31-
<el-tooltip v-if="device.memModelId === 'Memory_nomem'" content="未开启记忆" placement="top">
31+
<el-tooltip v-if="device.memModelId === 'Memory_nomem'" content="请先在“配置角色”界面开启记忆" placement="top">
3232
<span>聊天记录</span>
3333
</el-tooltip>
3434
<span v-else>聊天记录</span>

main/manager-web/src/components/HeaderBar.vue

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,11 @@
3535
OTA管理
3636
</div>
3737
<el-dropdown v-if="isSuperAdmin" trigger="click" class="equipment-management more-dropdown"
38-
:class="{ 'active-tab': $route.path === '/dict-management' || $route.path === '/params-management' || $route.path === '/provider-management' }" @visible-change="handleParamDropdownVisibleChange">
38+
:class="{ 'active-tab': $route.path === '/dict-management' || $route.path === '/params-management' || $route.path === '/provider-management' || $route.path === '/server-side-management' }"
39+
@visible-change="handleParamDropdownVisibleChange">
3940
<span class="el-dropdown-link">
4041
<img loading="lazy" alt="" src="@/assets/header/param_management.png"
41-
:style="{ filter: $route.path === '/dict-management' || $route.path === '/params-management' || $route.path === '/provider-management' ? 'brightness(0) invert(1)' : 'None' }" />
42+
:style="{ filter: $route.path === '/dict-management' || $route.path === '/params-management' || $route.path === '/provider-management' || $route.path === '/server-side-management' ? 'brightness(0) invert(1)' : 'None' }" />
4243
参数字典
4344
<i class="el-icon-arrow-down el-icon--right" :class="{ 'rotate-down': paramDropdownVisible }"></i>
4445
</span>

main/xiaozhi-server/config.yaml

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -675,17 +675,24 @@ TTS:
675675
speed: 1
676676
output_dir: tmp/
677677
CustomTTS:
678-
# 自定义的TTS接口服务,请求参数可自定义
679-
# 要求接口使用GET方式请求,并返回音频文件
678+
# 自定义的TTS接口服务,请求参数可自定义,可接入众多TTS服务
679+
# 以本地部署的KokoroTTS为例
680+
# 如果只有cpu运行:docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:latest
681+
# 如果只有gpu运行:docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:latest
682+
# 要求接口使用POST方式请求,并返回音频文件
680683
type: custom
681-
url: "http://127.0.0.1:9880/tts"
684+
method: POST
685+
url: "http://127.0.0.1:8880/v1/audio/speech"
682686
params: # 自定义请求参数
683-
# text: "{prompt_text}" # {prompt_text}会被替换为实际的提示词内容
684-
# speaker: jok老师
685-
# speed: 1
686-
# foo: bar
687-
# testabc: 123456
687+
input: "{prompt_text}"
688+
response_format: "mp3"
689+
download_format: "mp3"
690+
voice: "zf_xiaoxiao"
691+
lang_code: "z"
692+
return_download_link: true
693+
speed: 1
694+
stream: false
688695
headers: # 自定义请求头
689696
# Authorization: Bearer xxxx
690-
format: wav # 接口返回的音频格式
697+
format: mp3 # 接口返回的音频格式
691698
output_dir: tmp/

main/xiaozhi-server/core/providers/tts/custom.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import json
23
import uuid
34
import requests
45
from config.logger import setup_logging
@@ -12,11 +13,21 @@ class TTSProvider(TTSProviderBase):
1213
def __init__(self, config, delete_audio_file):
1314
super().__init__(config, delete_audio_file)
1415
self.url = config.get("url")
16+
self.method = config.get("method", "GET")
1517
self.headers = config.get("headers", {})
16-
self.params = config.get("params")
1718
self.format = config.get("format", "wav")
1819
self.output_file = config.get("output_dir", "tmp/")
1920

21+
self.params = config.get("params")
22+
23+
if isinstance(self.params, str):
24+
try:
25+
self.params = json.loads(self.params)
26+
except json.JSONDecodeError:
27+
raise ValueError("Custom TTS配置参数出错,无法将字符串解析为对象")
28+
elif not isinstance(self.params, dict):
29+
raise TypeError("Custom TTS配置参数出错, 请参考配置说明")
30+
2031
def generate_filename(self):
2132
return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}.{self.format}")
2233

@@ -27,7 +38,10 @@ async def text_to_speak(self, text, output_file):
2738
v = v.replace("{prompt_text}", text)
2839
request_params[k] = v
2940

30-
resp = requests.get(self.url, params=request_params, headers=self.headers)
41+
if self.method.upper() == "POST":
42+
resp = requests.post(self.url, json=request_params, headers=self.headers)
43+
else:
44+
resp = requests.get(self.url, params=request_params, headers=self.headers)
3145
if resp.status_code == 200:
3246
with open(output_file, "wb") as file:
3347
file.write(resp.content)

0 commit comments

Comments
 (0)