Skip to content

Commit 0d258dd

Browse files
authored
Support spoken language identification with whisper (#694)
1 parent 3cdad9b commit 0d258dd

36 files changed

+1173
-200
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
log() {
6+
# This function is from espnet
7+
local fname=${BASH_SOURCE[1]##*/}
8+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9+
}
10+
11+
echo "EXE is $EXE"
12+
echo "PATH: $PATH"
13+
14+
which $EXE
15+
16+
names=(
17+
tiny
18+
base
19+
small
20+
medium
21+
)
22+
23+
# all_language_codes=bo,ml,tt,fa,sl,bg,sn,sr,tl,km,ln,mr,hr,eu,ro,ba,bs,pl,as,nn,sk,ko,oc,ar,uz,pa,tg,mk,kk,hi,ha,uk,is,de,el,ja,yo,be,so,tk,id,sa,ru,yi,en,am,cs,ne,la,sv,su,pt,mi,ca,sd,hy,haw,fi,et,kn,da,lt,it,nl,he,mg,ur,tr,af,br,bn,ta,no,my,si,mt,th,gl,sw,mn,jw,ms,ps,fo,ka,hu,zh,ht,az,fr,lo,sq,gu,cy,lv,es,lb,te,vi
24+
25+
log "Download test waves"
26+
waves=(
27+
ar-arabic.wav
28+
bg-bulgarian.wav
29+
cs-czech.wav
30+
da-danish.wav
31+
de-german.wav
32+
el-greek.wav
33+
en-english.wav
34+
es-spanish.wav
35+
fa-persian.wav
36+
fi-finnish.wav
37+
fr-french.wav
38+
hi-hindi.wav
39+
hr-croatian.wav
40+
id-indonesian.wav
41+
it-italian.wav
42+
ja-japanese.wav
43+
ko-korean.wav
44+
nl-dutch.wav
45+
no-norwegian.wav
46+
po-polish.wav
47+
pt-portuguese.wav
48+
ro-romanian.wav
49+
ru-russian.wav
50+
sk-slovak.wav
51+
sv-swedish.wav
52+
ta-tamil.wav
53+
tl-tagalog.wav
54+
tr-turkish.wav
55+
uk-ukrainian.wav
56+
zh-chinese.wav
57+
)
58+
59+
for wav in ${waves[@]}; do
60+
echo "Downloading $wav"
61+
curl -SL -O https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/resolve/main/test_wavs/$wav
62+
ls -lh *.wav
63+
done
64+
65+
for name in ${names[@]}; do
66+
log "------------------------------------------------------------"
67+
log "Run $name"
68+
log "------------------------------------------------------------"
69+
70+
repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-whisper-$name
71+
log "Start testing ${repo_url}"
72+
repo=$(basename $repo_url)
73+
log "Download pretrained model and test-data from $repo_url"
74+
75+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
76+
pushd $repo
77+
git lfs pull --include "*.onnx"
78+
# git lfs pull --include "*.ort"
79+
ls -lh *.onnx
80+
popd
81+
82+
for wav in ${waves[@]}; do
83+
log "test fp32 onnx"
84+
85+
time $EXE \
86+
--whisper-encoder=$repo/${name}-encoder.onnx \
87+
--whisper-decoder=$repo/${name}-decoder.onnx \
88+
$wav
89+
90+
log "test int8 onnx"
91+
92+
time $EXE \
93+
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
94+
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
95+
$wav
96+
done
97+
rm -rf $repo
98+
done

.github/workflows/build-wheels-linux.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ jobs:
8282
env:
8383
HF_TOKEN: ${{ secrets.HF_TOKEN }}
8484
uses: nick-fields/retry@v3
85-
shell: bash
8685
with:
8786
max_attempts: 20
8887
timeout_seconds: 200

.github/workflows/build-wheels-macos-arm64.yaml

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,12 @@ jobs:
2121
fail-fast: false
2222
matrix:
2323
os: [macos-latest]
24-
python-version: ["cp37", "cp38", "cp39", "cp310", "cp311", "cp312"]
24+
python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]
2525

2626
steps:
2727
- uses: actions/checkout@v4
2828

29-
# see https://cibuildwheel.readthedocs.io/en/stable/changelog/
30-
# for a list of versions
3129
- name: Build wheels
32-
if: matrix.python-version == 'cp37'
33-
uses: pypa/cibuildwheel@v2.11.4
34-
env:
35-
CIBW_BUILD: "${{ matrix.python-version}}-* "
36-
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64'"
37-
CIBW_ARCHS: "arm64"
38-
CIBW_BUILD_VERBOSITY: 3
39-
40-
# Don't repair macOS wheels
41-
CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""
42-
43-
- name: Build wheels
44-
if: matrix.python-version != 'cp37'
4530
uses: pypa/cibuildwheel@v2.15.0
4631
env:
4732
CIBW_BUILD: "${{ matrix.python-version}}-* "

.github/workflows/linux-gpu.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,14 @@ jobs:
9292
file build/bin/sherpa-onnx
9393
readelf -d build/bin/sherpa-onnx
9494
95+
- name: Test spoken language identification
96+
shell: bash
97+
run: |
98+
export PATH=$PWD/build/bin:$PATH
99+
export EXE=sherpa-onnx-offline-language-identification
100+
101+
.github/scripts/test-spoken-language-identification.sh
102+
95103
- name: Test online CTC
96104
shell: bash
97105
run: |
@@ -116,6 +124,7 @@ jobs:
116124
117125
.github/scripts/test-online-paraformer.sh
118126
127+
119128
- name: Test offline Whisper
120129
shell: bash
121130
run: |

.github/workflows/linux.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,15 @@ jobs:
123123
name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
124124
path: build/bin/*
125125

126+
- name: Test spoken language identification
127+
if: matrix.build_type != 'Debug'
128+
shell: bash
129+
run: |
130+
export PATH=$PWD/build/bin:$PATH
131+
export EXE=sherpa-onnx-offline-language-identification
132+
133+
.github/scripts/test-spoken-language-identification.sh
134+
126135
- name: Test transducer kws
127136
shell: bash
128137
run: |
@@ -140,6 +149,7 @@ jobs:
140149
.github/scripts/test-online-ctc.sh
141150
142151
- name: Test offline Whisper
152+
if: matrix.build_type != 'Debug'
143153
shell: bash
144154
run: |
145155
export PATH=$PWD/build/bin:$PATH

.github/workflows/macos.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,15 @@ jobs:
102102
otool -L build/bin/sherpa-onnx
103103
otool -l build/bin/sherpa-onnx
104104
105+
- name: Test spoken language identification
106+
if: matrix.build_type != 'Debug'
107+
shell: bash
108+
run: |
109+
export PATH=$PWD/build/bin:$PATH
110+
export EXE=sherpa-onnx-offline-language-identification
111+
112+
.github/scripts/test-spoken-language-identification.sh
113+
105114
- name: Test transducer kws
106115
shell: bash
107116
run: |
@@ -135,6 +144,7 @@ jobs:
135144
.github/scripts/test-online-paraformer.sh
136145
137146
- name: Test offline Whisper
147+
if: matrix.build_type != 'Debug'
138148
shell: bash
139149
run: |
140150
export PATH=$PWD/build/bin:$PATH

.github/workflows/windows-x64-cuda.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ jobs:
6868
6969
ls -lh ./bin/Release/sherpa-onnx.exe
7070
71+
- name: Test spoken language identification
72+
shell: bash
73+
run: |
74+
export PATH=$PWD/build/bin/Release:$PATH
75+
export EXE=sherpa-onnx-offline-language-identification.exe
76+
77+
.github/scripts/test-spoken-language-identification.sh
78+
7179
- name: Test online CTC
7280
shell: bash
7381
run: |

.github/workflows/windows-x64.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ jobs:
6868
6969
ls -lh ./bin/Release/sherpa-onnx.exe
7070
71+
- name: Test spoken language identification
72+
shell: bash
73+
run: |
74+
export PATH=$PWD/build/bin/Release:$PATH
75+
export EXE=sherpa-onnx-offline-language-identification.exe
76+
77+
.github/scripts/test-spoken-language-identification.sh
78+
7179
- name: Test online CTC
7280
shell: bash
7381
run: |

.github/workflows/windows-x86.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ jobs:
6969
7070
ls -lh ./bin/Release/sherpa-onnx.exe
7171
72+
# - name: Test spoken language identification
73+
# shell: bash
74+
# run: |
75+
# export PATH=$PWD/build/bin/Release:$PATH
76+
# export EXE=sherpa-onnx-offline-language-identification.exe
77+
#
78+
# .github/scripts/test-spoken-language-identification.sh
79+
7280
- name: Test online CTC
7381
shell: bash
7482
run: |

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
22
project(sherpa-onnx)
33

4-
set(SHERPA_ONNX_VERSION "1.9.13")
4+
set(SHERPA_ONNX_VERSION "1.9.14")
55

66
# Disable warning about
77
#

cmake/cmake_extension.py

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,50 @@ def enable_alsa():
4343
return build_alsa and is_linux() and (is_arm64() or is_x86())
4444

4545

46+
def get_binaries():
47+
binaries = [
48+
"sherpa-onnx",
49+
"sherpa-onnx-keyword-spotter",
50+
"sherpa-onnx-microphone",
51+
"sherpa-onnx-microphone-offline",
52+
"sherpa-onnx-microphone-offline-speaker-identification",
53+
"sherpa-onnx-offline",
54+
"sherpa-onnx-offline-language-identification",
55+
"sherpa-onnx-offline-tts",
56+
"sherpa-onnx-offline-tts-play",
57+
"sherpa-onnx-offline-websocket-server",
58+
"sherpa-onnx-online-websocket-client",
59+
"sherpa-onnx-online-websocket-server",
60+
"sherpa-onnx-vad-microphone",
61+
"sherpa-onnx-vad-microphone-offline-asr",
62+
]
63+
64+
if enable_alsa():
65+
binaries += [
66+
"sherpa-onnx-alsa",
67+
"sherpa-onnx-alsa-offline",
68+
"sherpa-onnx-alsa-offline-speaker-identification",
69+
"sherpa-onnx-offline-tts-play-alsa",
70+
]
71+
72+
if is_windows():
73+
binaries += [
74+
"espeak-ng.dll",
75+
"kaldi-decoder-core.dll",
76+
"kaldi-native-fbank-core.dll",
77+
"onnxruntime.dll",
78+
"piper_phonemize.dll",
79+
"sherpa-onnx-c-api.dll",
80+
"sherpa-onnx-core.dll",
81+
"sherpa-onnx-fst.lib",
82+
"sherpa-onnx-kaldifst-core.lib",
83+
"sherpa-onnx-portaudio.dll",
84+
"ucd.dll",
85+
]
86+
87+
return binaries
88+
89+
4690
try:
4791
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
4892

@@ -150,38 +194,7 @@ def build_extension(self, ext: setuptools.extension.Extension):
150194
suffix = ".exe" if is_windows() else ""
151195
# Remember to also change setup.py
152196

153-
binaries = ["sherpa-onnx"]
154-
binaries += ["sherpa-onnx-keyword-spotter"]
155-
binaries += ["sherpa-onnx-offline"]
156-
binaries += ["sherpa-onnx-microphone"]
157-
binaries += ["sherpa-onnx-microphone-offline"]
158-
binaries += ["sherpa-onnx-microphone-offline-speaker-identification"]
159-
binaries += ["sherpa-onnx-online-websocket-server"]
160-
binaries += ["sherpa-onnx-offline-websocket-server"]
161-
binaries += ["sherpa-onnx-online-websocket-client"]
162-
binaries += ["sherpa-onnx-vad-microphone"]
163-
binaries += ["sherpa-onnx-vad-microphone-offline-asr"]
164-
binaries += ["sherpa-onnx-offline-tts"]
165-
binaries += ["sherpa-onnx-offline-tts-play"]
166-
167-
if enable_alsa():
168-
binaries += ["sherpa-onnx-alsa"]
169-
binaries += ["sherpa-onnx-alsa-offline"]
170-
binaries += ["sherpa-onnx-offline-tts-play-alsa"]
171-
binaries += ["sherpa-onnx-alsa-offline-speaker-identification"]
172-
173-
if is_windows():
174-
binaries += ["kaldi-native-fbank-core.dll"]
175-
binaries += ["sherpa-onnx-c-api.dll"]
176-
binaries += ["sherpa-onnx-core.dll"]
177-
binaries += ["sherpa-onnx-portaudio.dll"]
178-
binaries += ["onnxruntime.dll"]
179-
binaries += ["piper_phonemize.dll"]
180-
binaries += ["espeak-ng.dll"]
181-
binaries += ["ucd.dll"]
182-
binaries += ["kaldi-decoder-core.dll"]
183-
binaries += ["sherpa-onnx-fst.lib"]
184-
binaries += ["sherpa-onnx-kaldifst-core.lib"]
197+
binaries = get_binaries()
185198

186199
for f in binaries:
187200
suffix = "" if (".dll" in f or ".lib" in f) else suffix

0 commit comments

Comments
 (0)