Skip to content

Commit 677bc1d

Browse files
authored
Add Speaker ID demo for C# (#862)
1 parent a88b3ba commit 677bc1d

File tree

10 files changed

+511
-2
lines changed

10 files changed

+511
-2
lines changed

.github/scripts/test-dot-net.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
cd dotnet-examples/
44

5-
cd streaming-hlg-decoding/
5+
cd speaker-identification
6+
./run.sh
7+
8+
cd ../streaming-hlg-decoding/
69
./run.sh
710

811
cd ../spoken-language-identification

.github/workflows/test-dot-net.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ jobs:
179179
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
180180
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
181181
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
182+
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
182183
183184
ls -lh /tmp
184185

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
22
project(sherpa-onnx)
33

4-
set(SHERPA_ONNX_VERSION "1.9.23")
4+
set(SHERPA_ONNX_VERSION "1.9.24")
55

66
# Disable warning about
77
#

dotnet-examples/sherpa-onnx.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identificat
1717
EndProject
1818
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
1919
EndProject
20+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
21+
EndProject
2022
Global
2123
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2224
Debug|Any CPU = Debug|Any CPU
@@ -54,5 +56,9 @@ Global
5456
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
5557
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
5658
{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
59+
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
60+
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
61+
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
62+
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
5763
EndGlobalSection
5864
EndGlobal
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
//
3+
// This file shows how to do speaker identification with sherpa-onnx.
4+
//
5+
// 1. Download a model from
6+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
7+
//
8+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
9+
//
10+
// 2. Download test data from
11+
//
12+
// git clone https://github.com/csukuangfj/sr-data
13+
//
14+
// 3. Now run it
15+
//
16+
// dotnet run
17+
18+
using SherpaOnnx;
19+
using System.Collections.Generic;
20+
using System;
21+
22+
class SpeakerIdentificationDemo
23+
{
24+
public static float[] ComputeEmbedding(SpeakerEmbeddingExtractor extractor, String filename)
25+
{
26+
WaveReader reader = new WaveReader(filename);
27+
28+
OnlineStream stream = extractor.CreateStream();
29+
stream.AcceptWaveform(reader.SampleRate, reader.Samples);
30+
stream.InputFinished();
31+
32+
float[] embedding = extractor.Compute(stream);
33+
34+
return embedding;
35+
}
36+
37+
static void Main(string[] args)
38+
{
39+
var config = new SpeakerEmbeddingExtractorConfig();
40+
config.Model = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
41+
config.Debug = 1;
42+
var extractor = new SpeakerEmbeddingExtractor(config);
43+
44+
var manager = new SpeakerEmbeddingManager(extractor.Dim);
45+
46+
string[] spk1Files =
47+
new string[] {
48+
"./sr-data/enroll/fangjun-sr-1.wav",
49+
"./sr-data/enroll/fangjun-sr-2.wav",
50+
"./sr-data/enroll/fangjun-sr-3.wav",
51+
};
52+
float[][] spk1Vec = new float[spk1Files.Length][];
53+
54+
for (int i = 0; i < spk1Files.Length; ++i)
55+
{
56+
spk1Vec[i] = ComputeEmbedding(extractor, spk1Files[i]);
57+
}
58+
59+
string[] spk2Files =
60+
new string[] {
61+
"./sr-data/enroll/leijun-sr-1.wav", "./sr-data/enroll/leijun-sr-2.wav",
62+
};
63+
64+
float[][] spk2Vec = new float[spk2Files.Length][];
65+
66+
for (int i = 0; i < spk2Files.Length; ++i)
67+
{
68+
spk2Vec[i] = ComputeEmbedding(extractor, spk2Files[i]);
69+
}
70+
71+
if (!manager.Add("fangjun", spk1Vec))
72+
{
73+
Console.WriteLine("Failed to register fangjun");
74+
return;
75+
}
76+
77+
if (!manager.Add("leijun", spk2Vec))
78+
{
79+
Console.WriteLine("Failed to register leijun");
80+
return;
81+
}
82+
83+
if (manager.NumSpeakers != 2)
84+
{
85+
Console.WriteLine("There should be two speakers");
86+
return;
87+
}
88+
89+
if (!manager.Contains("fangjun"))
90+
{
91+
Console.WriteLine("It should contain the speaker fangjun");
92+
return;
93+
}
94+
95+
if (!manager.Contains("leijun"))
96+
{
97+
Console.WriteLine("It should contain the speaker leijun");
98+
return;
99+
}
100+
101+
Console.WriteLine("---All speakers---");
102+
103+
string[] allSpeakers = manager.GetAllSpeakers();
104+
foreach (var s in allSpeakers)
105+
{
106+
Console.WriteLine(s);
107+
}
108+
Console.WriteLine("------------");
109+
110+
string[] testFiles =
111+
new string[] {
112+
"./sr-data/test/fangjun-test-sr-1.wav",
113+
"./sr-data/test/leijun-test-sr-1.wav",
114+
"./sr-data/test/liudehua-test-sr-1.wav"
115+
};
116+
117+
float threshold = 0.6f;
118+
foreach (var file in testFiles)
119+
{
120+
float[] embedding = ComputeEmbedding(extractor, file);
121+
122+
String name = manager.Search(embedding, threshold);
123+
if (name == "")
124+
{
125+
name = "<Unknown>";
126+
}
127+
Console.WriteLine("{0}: {1}", file, name);
128+
}
129+
130+
// test verify
131+
if (!manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
132+
{
133+
Console.WriteLine("testFiles[0] should match fangjun!");
134+
return;
135+
}
136+
137+
if (!manager.Remove("fangjun"))
138+
{
139+
Console.WriteLine("Failed to remove fangjun");
140+
return;
141+
}
142+
143+
if (manager.Verify("fangjun", ComputeEmbedding(extractor, testFiles[0]), threshold))
144+
{
145+
Console.WriteLine("{0} should match no one!", testFiles[0]);
146+
return;
147+
}
148+
149+
if (manager.NumSpeakers != 1)
150+
{
151+
Console.WriteLine("There should only 1 speaker left.");
152+
return;
153+
}
154+
}
155+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../offline-decode-files/WaveReader.cs
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -e ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
6+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
7+
fi
8+
9+
if [ ! -d ./sr-data ]; then
10+
git clone https://github.com/csukuangfj/sr-data
11+
fi
12+
13+
dotnet run
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<RootNamespace>speaker_identification</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
13+
</ItemGroup>
14+
15+
</Project>
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<RootNamespace>speaker_identification</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<PropertyGroup>
12+
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
13+
</PropertyGroup>
14+
15+
<ItemGroup>
16+
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
17+
</ItemGroup>
18+
19+
</Project>

0 commit comments

Comments
 (0)