Skip to content

Commit fd5a0d1

Browse files
authored
Add C++ runtime for Tele-AI/TeleSpeech-ASR (#970)
1 parent f8dbc10 commit fd5a0d1

File tree

52 files changed

+1050
-143
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1050
-143
lines changed

.github/scripts/test-dot-net.sh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,16 @@
22

33
cd dotnet-examples/
44

5-
cd vad-non-streaming-asr-paraformer
5+
cd ./offline-decode-files
6+
./run-telespeech-ctc.sh
7+
./run-nemo-ctc.sh
8+
./run-paraformer.sh
9+
./run-zipformer.sh
10+
./run-hotwords.sh
11+
./run-whisper.sh
12+
./run-tdnn-yesno.sh
13+
14+
cd ../vad-non-streaming-asr-paraformer
615
./run.sh
716

817
cd ../offline-punctuation
@@ -22,14 +31,6 @@ cd ../online-decode-files
2231
./run-transducer.sh
2332
./run-paraformer.sh
2433

25-
cd ../offline-decode-files
26-
./run-nemo-ctc.sh
27-
./run-paraformer.sh
28-
./run-zipformer.sh
29-
./run-hotwords.sh
30-
./run-whisper.sh
31-
./run-tdnn-yesno.sh
32-
3334
cd ../offline-tts
3435
./run-aishell3.sh
3536
./run-piper.sh

.github/scripts/test-offline-ctc.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,39 @@ echo "PATH: $PATH"
1515

1616
which $EXE
1717

18+
log "test offline TeleSpeech CTC"
19+
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
20+
name=$(basename $url)
21+
repo=$(basename -s .tar.bz2 $name)
22+
23+
curl -SL -O $url
24+
tar xvf $name
25+
rm $name
26+
ls -lh $repo
27+
28+
test_wavs=(
29+
3-sichuan.wav
30+
4-tianjin.wav
31+
5-henan.wav
32+
)
33+
for w in ${test_wavs[@]}; do
34+
time $EXE \
35+
--tokens=$repo/tokens.txt \
36+
--telespeech-ctc=$repo/model.int8.onnx \
37+
--debug=1 \
38+
$repo/test_wavs/$w
39+
done
40+
41+
time $EXE \
42+
--tokens=$repo/tokens.txt \
43+
--telespeech-ctc=$repo/model.int8.onnx \
44+
--debug=1 \
45+
$repo/test_wavs/3-sichuan.wav \
46+
$repo/test_wavs/4-tianjin.wav \
47+
$repo/test_wavs/5-henan.wav
48+
49+
rm -rf $repo
50+
1851
log "-----------------------------------------------------------------"
1952
log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
2053
log "-----------------------------------------------------------------"

.github/scripts/test-python.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@ log() {
1010

1111
export GIT_CLONE_PROTECTION_ACTIVE=false
1212

13+
log "test offline TeleSpeech CTC"
14+
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
15+
name=$(basename $url)
16+
repo=$(basename -s .tar.bz2 $name)
17+
18+
curl -SL -O $url
19+
tar xvf $name
20+
rm $name
21+
ls -lh $repo
22+
python3 ./python-api-examples/offline-telespeech-ctc-decode-files.py
23+
rm -rf $repo
24+
1325
log "test online NeMo CTC"
1426

1527
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2

.github/workflows/build-wheels-macos-arm64.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ jobs:
8282
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
8383
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
8484
run: |
85-
python3 -m pip install --upgrade pip
86-
python3 -m pip install wheel twine setuptools
85+
python3 -m pip install --break-system-packages --upgrade pip
86+
python3 -m pip install --break-system-packages wheel twine setuptools
8787
8888
twine upload ./wheelhouse/*.whl
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
name: build-wheels-macos-universal2
2+
3+
on:
4+
push:
5+
branches:
6+
- wheel
7+
tags:
8+
- '*'
9+
workflow_dispatch:
10+
11+
env:
12+
SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1
13+
14+
concurrency:
15+
group: build-wheels-macos-universal2-${{ github.ref }}
16+
cancel-in-progress: true
17+
18+
jobs:
19+
build_wheels_macos_universal2:
20+
name: ${{ matrix.python-version }}
21+
runs-on: ${{ matrix.os }}
22+
strategy:
23+
fail-fast: false
24+
matrix:
25+
os: [macos-latest]
26+
python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]
27+
28+
steps:
29+
- uses: actions/checkout@v4
30+
31+
- name: Build wheels
32+
uses: pypa/cibuildwheel@v2.15.0
33+
env:
34+
CIBW_BUILD: "${{ matrix.python-version}}-* "
35+
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64;x86_64'"
36+
CIBW_ARCHS: "universal2"
37+
CIBW_BUILD_VERBOSITY: 3
38+
39+
# Don't repair macOS wheels
40+
CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""
41+
42+
- name: Display wheels
43+
shell: bash
44+
run: |
45+
ls -lh ./wheelhouse/
46+
47+
- uses: actions/upload-artifact@v4
48+
with:
49+
name: wheel-${{ matrix.python-version }}
50+
path: ./wheelhouse/*.whl
51+
52+
- name: Publish to huggingface
53+
if: matrix.python-version == 'cp38'
54+
env:
55+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
56+
uses: nick-fields/retry@v3
57+
with:
58+
max_attempts: 20
59+
timeout_seconds: 200
60+
shell: bash
61+
command: |
62+
git config --global user.email "csukuangfj@gmail.com"
63+
git config --global user.name "Fangjun Kuang"
64+
65+
rm -rf huggingface
66+
export GIT_LFS_SKIP_SMUDGE=1
67+
export GIT_CLONE_PROTECTION_ACTIVE=false
68+
69+
git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
70+
cd huggingface
71+
git fetch
72+
git pull
73+
git merge -m "merge remote" --ff origin main
74+
75+
cp -v ../wheelhouse/*.whl .
76+
77+
git status
78+
git add .
79+
git commit -m "add more wheels"
80+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main
81+
82+
- name: Publish wheels to PyPI
83+
env:
84+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
85+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
86+
run: |
87+
python3 -m pip install --break-system-packages --upgrade pip
88+
python3 -m pip install --break-system-packages wheel twine setuptools
89+
90+
twine upload ./wheelhouse/*.whl

.github/workflows/build-wheels-macos-x64.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ jobs:
9999
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
100100
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
101101
run: |
102-
python3 -m pip install --upgrade pip
103-
python3 -m pip install wheel twine setuptools
102+
python3 -m pip install --break-system-packages --upgrade pip
103+
python3 -m pip install --break-system-packages wheel twine setuptools
104104
105105
twine upload ./wheelhouse/*.whl

.github/workflows/export-telespeech-ctc.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,49 @@ jobs:
4848
repo_name: k2-fsa/sherpa-onnx
4949
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
5050
tag: asr-models
51+
52+
- name: Publish float32 model to huggingface
53+
shell: bash
54+
env:
55+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
56+
run: |
57+
src=scripts/tele-speech/sherpa-onnx-telespeech-ctc-zh-2024-06-04
58+
git config --global user.email "csukuangfj@gmail.com"
59+
git config --global user.name "Fangjun Kuang"
60+
61+
export GIT_CLONE_PROTECTION_ACTIVE=false
62+
63+
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf
64+
cp -a $src/* hf/
65+
cd hf
66+
git lfs track "*.pdf"
67+
git lfs track "*.onnx"
68+
git add .
69+
git commit -m 'add model files' || true
70+
git status
71+
ls -lh
72+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 main || true
73+
rm -rf hf
74+
75+
- name: Publish int8 model to huggingface
76+
shell: bash
77+
env:
78+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
79+
run: |
80+
src=scripts/tele-speech/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04
81+
git config --global user.email "csukuangfj@gmail.com"
82+
git config --global user.name "Fangjun Kuang"
83+
84+
export GIT_CLONE_PROTECTION_ACTIVE=false
85+
86+
rm -rf hf
87+
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf
88+
cp -a $src/* hf/
89+
cd hf
90+
git lfs track "*.pdf"
91+
git lfs track "*.onnx"
92+
git add .
93+
git commit -m 'add model files' || true
94+
git status
95+
ls -lh
96+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 main || true

.github/workflows/linux.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,34 +130,34 @@ jobs:
130130
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
131131
path: install/*
132132

133-
- name: Test online transducer
133+
- name: Test offline CTC
134134
shell: bash
135135
run: |
136136
du -h -d1 .
137137
export PATH=$PWD/build/bin:$PATH
138-
export EXE=sherpa-onnx
138+
export EXE=sherpa-onnx-offline
139139
140-
.github/scripts/test-online-transducer.sh
140+
.github/scripts/test-offline-ctc.sh
141141
du -h -d1 .
142142
143-
- name: Test online transducer (C API)
143+
- name: Test online transducer
144144
shell: bash
145145
run: |
146146
du -h -d1 .
147147
export PATH=$PWD/build/bin:$PATH
148-
export EXE=decode-file-c-api
148+
export EXE=sherpa-onnx
149149
150150
.github/scripts/test-online-transducer.sh
151151
du -h -d1 .
152152
153-
- name: Test offline CTC
153+
- name: Test online transducer (C API)
154154
shell: bash
155155
run: |
156156
du -h -d1 .
157157
export PATH=$PWD/build/bin:$PATH
158-
export EXE=sherpa-onnx-offline
158+
export EXE=decode-file-c-api
159159
160-
.github/scripts/test-offline-ctc.sh
160+
.github/scripts/test-online-transducer.sh
161161
du -h -d1 .
162162
163163
- name: Test spoken language identification (C++ API)

.github/workflows/macos.yaml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,14 @@ jobs:
107107
otool -L build/bin/sherpa-onnx
108108
otool -l build/bin/sherpa-onnx
109109
110+
- name: Test offline CTC
111+
shell: bash
112+
run: |
113+
export PATH=$PWD/build/bin:$PATH
114+
export EXE=sherpa-onnx-offline
115+
116+
.github/scripts/test-offline-ctc.sh
117+
110118
- name: Test offline transducer
111119
shell: bash
112120
run: |
@@ -192,13 +200,7 @@ jobs:
192200
193201
.github/scripts/test-offline-whisper.sh
194202
195-
- name: Test offline CTC
196-
shell: bash
197-
run: |
198-
export PATH=$PWD/build/bin:$PATH
199-
export EXE=sherpa-onnx-offline
200203
201-
.github/scripts/test-offline-ctc.sh
202204
203205
- name: Test online transducer
204206
shell: bash

.github/workflows/swift.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
strategy:
4040
fail-fast: false
4141
matrix:
42-
os: [macos-13]
42+
os: [macos-latest, macos-14]
4343

4444
steps:
4545
- uses: actions/checkout@v4

.github/workflows/test-go.yaml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,12 @@ concurrency:
3030

3131
jobs:
3232
test-go:
33-
name: ${{ matrix.os }} ${{matrix.arch }}
33+
name: ${{ matrix.os }}
3434
runs-on: ${{ matrix.os }}
3535
strategy:
3636
fail-fast: false
3737
matrix:
38-
include:
39-
- os: macos-latest
40-
arch: amd64
38+
os: [macos-latest, macos-14]
4139

4240
steps:
4341
- uses: actions/checkout@v4
@@ -47,7 +45,7 @@ jobs:
4745
- name: ccache
4846
uses: hendrikmuhs/ccache-action@v1.2
4947
with:
50-
key: ${{ matrix.os }}-${{ matrix.arch }}
48+
key: ${{ matrix.os }}-go
5149

5250
- uses: actions/setup-go@v5
5351
with:
@@ -109,8 +107,6 @@ jobs:
109107
go build
110108
ls -lh
111109
112-
git lfs install
113-
114110
echo "Test vits-ljs"
115111
./run-vits-ljs.sh
116112
rm -rf vits-ljs
@@ -144,7 +140,13 @@ jobs:
144140
go build
145141
ls -lh
146142
147-
git lfs install
143+
echo "Test telespeech ctc"
144+
./run-telespeech-ctc.sh
145+
rm -rf sherpa-onnx-telespeech-ctc-*
146+
147+
echo "Test transducer"
148+
./run-transducer.sh
149+
rm -rf sherpa-onnx-zipformer-en-2023-06-26
148150
149151
echo "Test transducer"
150152
./run-transducer.sh

.github/workflows/test-piper-phonemize.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
5858
mkdir build
5959
cd build
60-
cmake -DCMAKE_VERBOSE_MAKEFILE=ON -D SHERPA_ONNX_ENABLE_TESTS=ON -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install ..
60+
cmake -DSHERPA_ONNX_ENABLE_EPSEAK_NG_EXE=ON -DBUILD_ESPEAK_NG_EXE=ON -DCMAKE_VERBOSE_MAKEFILE=ON -D SHERPA_ONNX_ENABLE_TESTS=ON -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install ..
6161
6262
- name: Build
6363
shell: bash

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,4 @@ node_modules
106106
package-lock.json
107107
sherpa-onnx-nemo-*
108108
sherpa-onnx-vits-*
109+
sherpa-onnx-telespeech-ctc-*

0 commit comments

Comments
 (0)