Skip to content

Fixed issues in ModelsTrainer and SpeakerIdentifier #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Binary file added Code/.DS_Store
Binary file not shown.
12 changes: 5 additions & 7 deletions Code/ModelsTrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import pickle
import warnings
import numpy as np
from sklearn.mixture import GMM
from sklearn.mixture import GaussianMixture as GMM
from FeaturesExtractor import FeaturesExtractor
from SilenceEliminator import SilenceEliminator
import scipy.io.wavfile

warnings.filterwarnings("ignore")

Expand Down Expand Up @@ -34,12 +34,10 @@

# extract voice features
features_extractor = FeaturesExtractor()
silence_eliminator = SilenceEliminator()

try :
silence_eliminated_wave_file_path = "temp-" + os.path.basename(filepath).split('.')[0] + ".wav"
audio, duration_string = silence_eliminator.ffmpeg_silence_eliminator(filepath, silence_eliminated_wave_file_path)
vector = features_extractor.accelerated_get_features_vector(filepath, audio, 8000)
sample_rate, signal = scipy.io.wavfile.read(filepath)
vector = features_extractor.accelerated_get_features_vector(filepath, signal, 8000)
except:
continue

Expand All @@ -52,7 +50,7 @@
print("ValueError: Shape mismatch")

# adapt gmm
gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3)
gmm = GMM(n_components = 16, covariance_type='diag', n_init = 3)
gmm.fit(features)

# dumping the trained gaussian model
Expand Down
51 changes: 0 additions & 51 deletions Code/SilenceEliminator.py

This file was deleted.

8 changes: 3 additions & 5 deletions Code/SpeakerIdentifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import pickle
import warnings
import numpy as np
from SilenceEliminator import SilenceEliminator
from FeaturesExtractor import FeaturesExtractor
import scipy.io.wavfile

warnings.filterwarnings("ignore")

Expand Down Expand Up @@ -32,11 +32,9 @@
for path in file_paths[:]:
if os.path.basename(path).split('_')[0] in db.keys():
features_extractor = FeaturesExtractor()
silence_eliminator = SilenceEliminator()

silence_eliminated_wave_file_path ="temp-" + os.path.basename(path).split('.')[0] + ".wav"
audio, duration_string = silence_eliminator.ffmpeg_silence_eliminator(path, silence_eliminated_wave_file_path)
vector = features_extractor.accelerated_get_features_vector(path, audio, 8000)
sample_rate, signal = scipy.io.wavfile.read(path)
vector = features_extractor.accelerated_get_features_vector(path, signal, 8000)

if vector.shape != (0,):
print(vector.shape)
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ This script require the follwing modules/libraries:
## Results and disscussion
- The code can be further optimized using multi-threading, acceleration libs and multi-processing.
- The accuracy can be further improved using GMM normalization aka a UBM-GMM system.

## Update
- Eliminated the use of SilenceEliminator
- Achieved an accuracy of 98.21%
- Fixed the import of GMM in FeaturesExtractor
- Updated file_read in ModelsTrainer
31 changes: 15 additions & 16 deletions Run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,18 @@


if __name__== "__main__":
# download dataset
print("# Download dataset zip file")
zip_url = "http://www.openslr.org/resources/45/ST-AEDS-20180100_1-OS.tgz"
urllib.request.urlretrieve(zip_url, 'SLR45.tgz')

# extract and manage dataset files
print("# Mange and organize files")
os.system('python3 Code/DataManager.py')

# train speakers gmm models
print("# Train gender models")
os.system('python3 Code/ModelsTrainer.py')

# test system and recognise/identify speakers
print(" # Identify genders")
os.system('python3 Code/SpeakerIdentifier.py')
# download dataset
print("# Download dataset zip file")
zip_url = "http://www.openslr.org/resources/45/ST-AEDS-20180100_1-OS.tgz"
urllib.request.urlretrieve(zip_url, 'SLR45.tgz')

# extract and manage dataset files
print("# Mange and organize files")
os.system('python3 Code/DataManager.py')
# train speakers gmm models
print("# Train gender models")
os.system('python3 Code/ModelsTrainer.py')

# test system and recognise/identify speakers
print(" # Identify genders")
os.system('python3 Code/SpeakerIdentifier.py')