Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/api/Api.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ async def _set_cors_headers(response: Response, origin: str = None):
if allow_credentials:
response.headers["Access-Control-Allow-Credentials"] = "true"

@self.app.middleware("http")
# @self.app.middleware("http")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个功能对于需要 web 端使用 api 的用户很重要,不应该直接删除

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

感谢回应。我曾怀疑是fastapi版本问题,但检查版本和推荐版本一致。应该是初始化逻辑造成的,和环境升级到torch2.7无关。容器运行到这个错误就终止了,需要一个解决方案来运行。
命令行参数:python3 webui.py --api --webui_experimental
完整的错误提示:
`
2025-07-30 18:34:12,421 - datasets - INFO - PyTorch version 2.7.1+cu128 available.
2025-07-30 18:34:13,653 - modules.core.models.tts.CosyVoiceModel - INFO - Found CosyVoice model: [PosixPath('models/CosyVoice2-0.5B')]
2025-07-30 18:34:13,723 - modules.devices.devices - INFO - Using half precision: torch.float16
2025-07-30 18:34:13,753 - modules.devices.devices - INFO - Using device: cuda
2025-07-30 18:34:13,754 - modules.webui.app - INFO - WebUI module initialized
2025-07-30 18:34:13,754 - modules.webui.localization - INFO - Loaded localization file /app/Speech-AI-Forge/language/zh-CN.json
2025-07-30 18:34:14,071 - httpx - INFO - HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
Running on local URL: http://0.0.0.0:7860
2025-07-30 18:34:14,118 - httpx - INFO - HTTP Request: GET http://localhost:7860/startup-events "HTTP/1.1 200 OK"
2025-07-30 18:34:14,153 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"

To create a public link, set share=True in launch().
2025-07-30 18:34:14,155 - modules.api.Api - INFO - Registered API: GET /v1/ping
2025-07-30 18:34:14,155 - modules.api.Api - INFO - Registered API: GET /v1/versions
2025-07-30 18:34:14,155 - modules.api.Api - INFO - Registered API: GET /v1/audio_formats
2025-07-30 18:34:14,156 - modules.api.Api - INFO - Registered API: GET /v1/models/reload
2025-07-30 18:34:14,156 - modules.api.Api - INFO - Registered API: GET /v1/models/unload
2025-07-30 18:34:14,156 - modules.api.Api - INFO - Registered API: GET /v1/models/list
2025-07-30 18:34:14,156 - modules.api.Api - INFO - Registered API: GET /v1/styles/list
2025-07-30 18:34:14,157 - modules.api.Api - INFO - Registered API: GET /v1/speakers/list
2025-07-30 18:34:14,157 - modules.api.Api - INFO - Registered API: POST /v1/speakers/refresh
2025-07-30 18:34:14,158 - modules.api.Api - INFO - Registered API: POST /v1/speakers/update
2025-07-30 18:34:14,158 - modules.api.Api - INFO - Registered API: POST /v1/speaker/create
2025-07-30 18:34:14,159 - modules.api.Api - INFO - Registered API: POST /v1/speaker/update
2025-07-30 18:34:14,159 - modules.api.Api - INFO - Registered API: POST /v1/speaker/detail
2025-07-30 18:34:14,162 - modules.api.Api - INFO - Registered API: GET /v1/tts
2025-07-30 18:34:14,162 - modules.api.Api - INFO - Registered API: POST /v1/ssml
2025-07-30 18:34:14,163 - modules.api.Api - INFO - Registered API: POST /v1/text:synthesize
2025-07-30 18:34:14,163 - modules.api.Api - INFO - Registered API: POST /v1/speech:recognize
2025-07-30 18:34:14,163 - modules.api.Api - INFO - Registered API: POST /v1/speech:longrunningrecognize
2025-07-30 18:34:14,163 - modules.api.Api - INFO - Registered API: POST /v1/audio/speech
2025-07-30 18:34:14,166 - modules.api.Api - INFO - Registered API: POST /v1/audio/transcriptions
2025-07-30 18:34:14,166 - modules.api.Api - INFO - Registered API: POST /v1/prompt/refine
2025-07-30 18:34:14,167 - modules.api.Api - INFO - Registered API: POST /v1/text/normalize
2025-07-30 18:34:14,167 - modules.api.impl.xtts_v2_api - INFO - Loaded xttsv2 config: {'stream_chunk_size': 100, 'temperature': 0.3, 'speed': 1, 'length_penalty': 0.5, 'repetition_penalty': 1.2, 'top_k': 20, 'top_p': 0.7, 'enable_text_splitting': True, 'batch_size': 4, 'eos': ' 。 ', 'infer_seed': 42, 'spliter_threshold': 100, 'model_name': 'cosy-voice', 'enable_enhancer': False}
2025-07-30 18:34:14,167 - modules.api.Api - INFO - Registered API: GET /v1/xtts_v2/speakers
2025-07-30 18:34:14,167 - modules.api.Api - INFO - Registered API: POST /v1/xtts_v2/tts_to_audio
2025-07-30 18:34:14,168 - modules.api.Api - INFO - Registered API: GET /v1/xtts_v2/tts_stream
2025-07-30 18:34:14,168 - modules.api.Api - INFO - Registered API: POST /v1/xtts_v2/set_tts_settings
2025-07-30 18:34:14,173 - modules.api.Api - INFO - Registered API: POST /v1/stt/transcribe
2025-07-30 18:34:14,176 - modules.api.Api - INFO - Registered API: POST /v1/stt/stream
2025-07-30 18:34:14,178 - modules.api.Api - INFO - Registered API: POST /v1/vc
2025-07-30 18:34:14,179 - modules.api.Api - INFO - Registered API: POST /v2/tts
2025-07-30 18:34:14,179 - modules.api.Api - INFO - Registered API: POST /v2/stt
Traceback (most recent call last):
File "/app/Speech-AI-Forge/webui.py", line 178, in
process_webui_args(args)
File "/app/Speech-AI-Forge/webui.py", line 156, in process_webui_args
process_api_args(args, app)
File "/app/Speech-AI-Forge/modules/api/api_setup.py", line 87, in process_api_args
api.set_cors(allow_origins=[cors_origin])
File "/app/Speech-AI-Forge/modules/api/Api.py", line 101, in set_cors
@self.app.middleware("http")
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/fastapi/applications.py", line 4535, in decorator
self.add_middleware(BaseHTTPMiddleware, dispatch=func)
File "/opt/conda/lib/python3.11/site-packages/starlette/applications.py", line 141, in add_middleware
raise RuntimeError("Cannot add middleware after an application has started")
RuntimeError: Cannot add middleware after an application has started
2025-07-30 18:34:14,889 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
2025-07-30 18:34:14,893 - httpx - INFO - HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK"
`

async def cors_handler(request: Request, call_next):
response: Response = await call_next(request)

Expand Down
7 changes: 4 additions & 3 deletions modules/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import sys

import torch
Expand All @@ -17,9 +18,9 @@
"python_version": ".".join([str(x) for x in sys.version_info[0:3]]),
"torch_version": getattr(torch, "__long_version__", torch.__version__),
# "gradio_version":gr.__version__,
"git_tag": git.git_tag(),
"git_branch": git.branch_name(),
"git_commit": git.commit_hash(),
"git_tag": os.environ.get("V_GIT_TAG") or git.git_tag(),
"git_branch": os.environ.get("V_GIT_BRANCH") or git.branch_name(),
"git_commit": os.environ.get("V_GIT_COMMIT") or git.commit_hash(),
"ffmpeg_version": ffmpeg.ffmpeg_version(),
}
)
2 changes: 1 addition & 1 deletion modules/core/models/tts/CosyVoiceModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def load(
# instruct = True if "-Instruct" in str(model_dir) else False
instruct = True

with open(model_dir / "cosyvoice.yaml", "r") as f:
with open(model_dir / "cosyvoice2.yaml", "r") as f:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

需要兼容老路径,因为这是模型提供上游修改文件名导致,所以历史文件应该还是老的名字也就是 cosyvoice.yaml

还有这里应该涉及到下载脚本需要同步修改:
scripts/downloader/cosyvoice2.py

configs = load_hyperpyyaml(f, overrides=self.hp_overrides)

frontend = CosyVoiceFrontEnd(
Expand Down
2 changes: 1 addition & 1 deletion modules/core/pipeline/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def create(cls, ctx: TTSPipelineContext) -> TTSPipeline:
return cls.create_fishspeech_pipeline(ctx)
elif model_id == "cosyvoice":
return cls.create_cosyvoice_pipeline(ctx)
elif model_id == "firered":
elif model_id == "fireredtts":
return cls.create_fire_red_tts_pipeline(ctx)
elif model_id == "f5" or model_id == "f5tts":
return cls.create_f5_tts_pipeline(ctx)
Expand Down
11 changes: 8 additions & 3 deletions modules/devices/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,14 @@ def get_cpu_memory():


def get_gpu_memory():
total_memory = torch.cuda.get_device_properties(0).total_memory
reserved_memory = torch.cuda.memory_reserved(0)
allocated_memory = torch.cuda.memory_allocated(0)
if torch.cuda.is_available():
total_memory = torch.cuda.get_device_properties(0).total_memory
reserved_memory = torch.cuda.memory_reserved(0)
allocated_memory = torch.cuda.memory_allocated(0)
else:
total_memory = 0
reserved_memory = 0
allocated_memory = 0
free_memory = total_memory - reserved_memory
return MemUsage(
device=cuda,
Expand Down
4 changes: 2 additions & 2 deletions modules/repos_static/ChatTTS/ChatTTS/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def download_models(
self,
source: Literal["huggingface", "local", "custom"] = "local",
force_redownload=False,
custom_path: Optional[torch.serialization.FILE_LIKE] = None,
custom_path: Optional[torch.serialization.FileLike] = None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

首先,这里应该是 torch 版本差异导致,我不太希望升级这个库的 torch 版本,可能带来意料之外的问题,很多默认值可能变成需要配置的项
repo_static 这个路径之下的代码全部来源于上游模型仓库,除非进行代码同步,不应该修改这里面的代码
引入这些代码的主要目的是固定代码和控制依赖

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

所以,如果你确实需要升级 torch,请把 FileLike 这个类型引入到我们的代码中
或者使用 Any 代替

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个不是什么大问题,确实是torch版本差异造成的。nvidia 50xx系显卡采用了新的架构,因此cuda要求12.8以上,而torch官方支持cuda12.8的版本是从2.7.0开始的,我只是无奈被动选择了这个版本V2.7.1。
关于FileLike类型引入repo_static下,正如你声明的,会改变上游代码,和查找替换相比,显得复杂了。我在打包docker镜像的时候,直接用sed替换就好了。似乎torch2.7.1+cu128也只涉及到这个地方的代码。

) -> Optional[str]:
if source == "local":
download_path = os.getcwd()
Expand Down Expand Up @@ -121,7 +121,7 @@ def load(
source: Literal["huggingface", "local", "custom"] = "local",
force_redownload=False,
compile: bool = False,
custom_path: Optional[torch.serialization.FILE_LIKE] = None,
custom_path: Optional[torch.serialization.FileLike] = None,
device: Optional[torch.device] = None,
coef: Optional[torch.Tensor] = None,
use_flash_attn=False,
Expand Down
2 changes: 1 addition & 1 deletion modules/repos_static/ChatTTS/ChatTTS/model/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
class Tokenizer:
def __init__(
self,
tokenizer_path: torch.serialization.FILE_LIKE,
tokenizer_path: torch.serialization.FileLike,
):
"""
tokenizer: BertTokenizerFast = torch.load(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,7 @@ def __init__(self, ckpt_path, device="cuda"):
global_context=True,
batch_norm=True,
)
model.load_state_dict(torch.load(ckpt_path), strict=True)
model.load_state_dict(torch.load(ckpt_path, map_location=torch.device(device)), strict=True)
model.eval()
self.model = model
self.model.to(device)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

fast_langdetect.infer._default_detector = fast_langdetect.infer.LangDetector(
fast_langdetect.infer.LangDetectConfig(
cache_dir=Path(__file__).parent.parent.parent / "pretrained_models" / "fast_langdetect"
cache_dir="models/gpt_sovits_v4/fast_langdetect"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

同上,我不太希望修改 repos_static 下面的代码逻辑,会徒增维护难度,以及会给后续同步上游代码带来困难
这里建议就保持原样,这个模型 125mb 下载和预打包都属于勉强可以接受的范围

)
)

Expand Down