|
1 | 1 | #!/usr/bin/env bash |
2 | | -set -e |
| 2 | +set -Eeuo pipefail |
3 | 3 |
|
4 | | -exec uv run python src/wyoming_thai_vits.py \ |
5 | | - --log-level "${THTTS_LOG_LEVEL}" \ |
6 | | - --host "${THTTS_HOST}" \ |
7 | | - --port "${THTTS_PORT}" \ |
8 | | - --model-id "${THTTS_MODEL}" |
| 4 | +# -------- Common env (with defaults) -------- |
| 5 | +: "${THTTS_HOST:=0.0.0.0}" |
| 6 | +: "${THTTS_PORT:=10200}" |
| 7 | +: "${THTTS_LOG_LEVEL:=INFO}" |
| 8 | + |
| 9 | +# VITS-specific |
| 10 | +: "${THTTS_MODEL:=VIZINTZOR/MMS-TTS-THAI-FEMALEV2}" # can be overridden |
| 11 | + |
| 12 | +# F5-specific (optional overrides) |
| 13 | +: "${THTTS_REF_AUDIO:=hf_sample}" |
| 14 | +: "${THTTS_REF_TEXT:=}" # empty = let backend decide/ASR if supported |
| 15 | +: "${THTTS_DEVICE:=auto}" # auto|cpu|cuda |
| 16 | +: "${THTTS_SPEED:=1.0}" |
| 17 | +: "${THTTS_NFE_STEPS:=32}" |
| 18 | +: "${THTTS_MAX_CONCURRENT:=2}" |
| 19 | +: "${THTTS_CKPT_FILE:=}" # optional override |
| 20 | +: "${THTTS_VOCAB_FILE:=}" # optional override |
| 21 | + |
| 22 | +BACKEND="${THTTS_BACKEND:-VITS}" |
| 23 | +BACKEND_UPPER="$(echo "$BACKEND" | tr '[:lower:]' '[:upper:]')" |
| 24 | + |
| 25 | +run_vits () { |
| 26 | + exec uv run python src/wyoming_thai_vits.py \ |
| 27 | + --log-level "${THTTS_LOG_LEVEL}" \ |
| 28 | + --host "${THTTS_HOST}" \ |
| 29 | + --port "${THTTS_PORT}" \ |
| 30 | + --model-id "${THTTS_MODEL}" |
| 31 | +} |
| 32 | + |
| 33 | +run_f5 () { |
| 34 | + local version="$1" # v1 or v2 |
| 35 | + local ckpt="${THTTS_CKPT_FILE}" |
| 36 | + local vocab="${THTTS_VOCAB_FILE}" |
| 37 | + |
| 38 | + if [[ -z "$ckpt" || -z "$vocab" ]]; then |
| 39 | + if [[ "$version" == "v1" ]]; then |
| 40 | + ckpt="${ckpt:-hf://VIZINTZOR/F5-TTS-THAI/model_1000000.pt}" |
| 41 | + vocab="${vocab:-hf://VIZINTZOR/F5-TTS-THAI/vocab.txt}" |
| 42 | + else |
| 43 | + ckpt="${ckpt:-hf://VIZINTZOR/F5-TTS-TH-V2/model_250000.pt}" |
| 44 | + vocab="${vocab:-hf://VIZINTZOR/F5-TTS-TH-V2/vocab.txt}" |
| 45 | + fi |
| 46 | + fi |
| 47 | + |
| 48 | + # Build args safely as an array |
| 49 | + args=( |
| 50 | + --log-level "${THTTS_LOG_LEVEL}" |
| 51 | + --host "${THTTS_HOST}" |
| 52 | + --port "${THTTS_PORT}" |
| 53 | + --model-version "${version}" |
| 54 | + --ckpt-file "${ckpt}" |
| 55 | + --vocab-file "${vocab}" |
| 56 | + --ref-audio "${THTTS_REF_AUDIO}" |
| 57 | + --device "${THTTS_DEVICE}" |
| 58 | + --speed "${THTTS_SPEED}" |
| 59 | + --nfe-steps "${THTTS_NFE_STEPS}" |
| 60 | + --max-concurrent "${THTTS_MAX_CONCURRENT}" |
| 61 | + ) |
| 62 | + # Only pass --ref-text if provided (avoid empty string ambiguity) |
| 63 | + if [[ -n "${THTTS_REF_TEXT}" ]]; then |
| 64 | + args+=( --ref-text "${THTTS_REF_TEXT}" ) |
| 65 | + fi |
| 66 | + |
| 67 | + exec uv run python src/wyoming_thai_f5.py "${args[@]}" |
| 68 | +} |
| 69 | + |
| 70 | +case "$BACKEND_UPPER" in |
| 71 | + VITS) |
| 72 | + echo "[entrypoint] Using backend: VITS" |
| 73 | + run_vits |
| 74 | + ;; |
| 75 | + F5_V1|F5-THV1|F5TH|V1) |
| 76 | + echo "[entrypoint] Using backend: F5 v1" |
| 77 | + run_f5 "v1" |
| 78 | + ;; |
| 79 | + F5_V2|F5-THV2|V2) |
| 80 | + echo "[entrypoint] Using backend: F5 v2" |
| 81 | + run_f5 "v2" |
| 82 | + ;; |
| 83 | + *) |
| 84 | + echo "[entrypoint] ERROR: Unknown THTTS_BACKEND='$BACKEND'. Use VITS, F5_V1, or F5_V2." >&2 |
| 85 | + exit 1 |
| 86 | + ;; |
| 87 | +esac |
0 commit comments