Skip to content

Commit e2f8b97

Browse files
authored
Add files via upload
1 parent 4d1f901 commit e2f8b97

11 files changed

+530
-0
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# This class is used in the 'Run TF from MATLAB' example
2+
3+
import tensorflow as tf
4+
from tensorflow import keras
5+
from tensorflow.keras import layers
6+
import numpy as np
7+
8+
class SpeechCommandRecognition(tf.Module):
9+
10+
def make_model(self):
11+
# Define the model
12+
inputs = keras.Input(shape=(98, 50, 1))
13+
14+
x = layers.Conv2D(12, 3, strides=1, padding='same')(inputs)
15+
x = layers.BatchNormalization(axis=3)(x)
16+
x = layers.Activation('relu')(x)
17+
18+
x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
19+
20+
x = layers.Conv2D(2 * 12, 3, strides=1, padding='same')(x)
21+
x = layers.BatchNormalization(axis=3)(x)
22+
x = layers.Activation('relu')(x)
23+
24+
x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
25+
26+
x = layers.Conv2D(4 * 12, 3, strides=1, padding='same')(x)
27+
x = layers.BatchNormalization(axis=3)(x)
28+
x = layers.Activation('relu')(x)
29+
30+
x = layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
31+
32+
x = layers.Conv2D(4 * 12, 3, strides=1, padding='same')(x)
33+
x = layers.BatchNormalization(axis=3)(x)
34+
x = layers.Activation('relu')(x)
35+
x = layers.Conv2D(4 * 12, 3, strides=1, padding='same')(x)
36+
x = layers.BatchNormalization(axis=3)(x)
37+
x = layers.Activation('relu')(x)
38+
39+
x = layers.MaxPool2D(pool_size=(13, 1), strides=(1, 1), padding='valid')(x)
40+
41+
x = layers.Dropout(rate=.2)(x)
42+
43+
x = layers.Flatten()(x)
44+
outputs = layers.Dense(11)(x)
45+
46+
model = keras.Model(inputs=inputs, outputs=outputs)
47+
return model
48+
49+
def loss(self, y, y_):
50+
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
51+
l = loss_object(y_true=y, y_pred=y_)
52+
return l
53+
54+
def initializeAcc(self):
55+
self.epoch_loss_avg = tf.keras.metrics.Mean()
56+
self.epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
57+
58+
def __init__(self):
59+
super(SpeechCommandRecognition, self).__init__()
60+
self.model = self.make_model()
61+
lr = tf.Variable(.0003, trainable=False, dtype=tf.float32)
62+
self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
63+
self.initializeAcc()
64+
65+
def forward(self, x, y, training=False):
66+
67+
x = np.expand_dims(x, 3)
68+
if training:
69+
with tf.GradientTape() as tape:
70+
z = self.model(x)
71+
loss_value = self.loss(y, z)
72+
grads = tape.gradient(loss_value, self.model.trainable_variables)
73+
74+
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
75+
76+
# Track progress
77+
self.epoch_loss_avg(loss_value) # Add current batch loss
78+
# Compare predicted label to actual label
79+
self.epoch_accuracy(y, self.model(x))
80+
else:
81+
return self.model(x)
82+
83+
def printAcc(self):
84+
print("Training loss: {:.3f}, Training accuracy: {:.3%}".format(self.epoch_loss_avg.result(),
85+
self.epoch_accuracy.result()))
86+
87+

HelperFiles/cmdRecognition.onnx

230 KB
Binary file not shown.
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# This code is used in the 'Run MATLAB from Python' example
2+
3+
# Copyright 2019-2021 The MathWorks, Inc.
4+
5+
import torch
6+
from torch.utils.data import Dataset, DataLoader
7+
import torch.nn as nn
8+
import torch.onnx
9+
10+
import time
11+
import os
12+
13+
cudaAvailable = torch.cuda.is_available()
14+
if cudaAvailable:
15+
cuda = torch.device('cuda')
16+
17+
# start a MATLAB engine
18+
import matlab.engine
19+
MLEngine = matlab.engine.start_matlab()
20+
21+
miniBatchSize = 128.0
22+
23+
# Prepare training dataset
24+
class TrainData(Dataset):
25+
def __init__(self):
26+
# Create persistent training dataset in MATLAB
27+
MLEngine.setupDatasets(miniBatchSize)
28+
# Set the dataset length to the number of minibatches
29+
# in the training dataset
30+
self.len = int(MLEngine.getNumIterationsPerEpoch())
31+
32+
def __getitem__(self, index):
33+
# Call MATLAB to get a minibatch of features + labels
34+
minibatch = MLEngine.extractTrainingFeatures()
35+
x = torch.FloatTensor(minibatch.get('features'))
36+
y = torch.FloatTensor(minibatch.get('labels'))
37+
return x, y
38+
39+
def __len__(self):
40+
return int(self.len)
41+
42+
print('Setting up datastores...')
43+
trainDataset = TrainData()
44+
print('Datastore setup complete')
45+
print('Minibatch size: ', int(miniBatchSize))
46+
print('Number of training files: ', int(trainDataset.len * miniBatchSize))
47+
print('Number of minibatches per epoch: ', int(trainDataset.len))
48+
49+
trainLoader = DataLoader(dataset=trainDataset, batch_size=1)
50+
51+
print('Computing validation features...')
52+
# Prepare validation dataset
53+
# Call MATLAB to compute validation features
54+
valFeatures = MLEngine.extractValidationFeatures()
55+
XValidation = valFeatures["features"]
56+
YValidation = valFeatures["labels"]
57+
58+
# Create Data Class
59+
class ValData(Dataset):
60+
# Constructor
61+
def __init__(self):
62+
self.x = XValidation
63+
self.y = YValidation
64+
self.len = self.y.size[0]
65+
66+
# Getter
67+
def __getitem__(self, index):
68+
x = torch.FloatTensor(self.x[index])
69+
y = torch.FloatTensor(self.y[index])
70+
return x, y
71+
72+
# Get Length
73+
def __len__(self):
74+
return self.len
75+
76+
valDataset = ValData()
77+
valLoader = DataLoader(dataset = valDataset, batch_size = valDataset.len)
78+
print('Validation feature computation complete')
79+
80+
# Create the neural network
81+
NumF = 12
82+
numHops = 98
83+
timePoolSize = 13
84+
dropoutProb = 0.2
85+
numClasses = 11
86+
87+
class CNN(nn.Module):
88+
89+
# Contructor
90+
def __init__(self, out_1=NumF):
91+
super(CNN, self).__init__()
92+
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=out_1, kernel_size=3, padding=1)
93+
self.batch1 = nn.BatchNorm2d(out_1)
94+
self.relu1 = nn.ReLU()
95+
96+
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
97+
98+
self.cnn2 = nn.Conv2d(in_channels=out_1, out_channels=2*out_1, kernel_size=3, padding=1)
99+
self.batch2 = nn.BatchNorm2d(2*out_1)
100+
self.relu2 = nn.ReLU()
101+
102+
self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
103+
104+
self.cnn3 = nn.Conv2d(in_channels=2*out_1, out_channels=4 * out_1, kernel_size=3, padding=1)
105+
self.batch3 = nn.BatchNorm2d(4 * out_1)
106+
self.relu3 = nn.ReLU()
107+
108+
self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
109+
110+
self.cnn4 = nn.Conv2d(in_channels=4 * out_1, out_channels=4 * out_1, kernel_size=3, padding=1)
111+
self.batch4 = nn.BatchNorm2d(4 * out_1)
112+
self.relu4 = nn.ReLU()
113+
self.cnn5 = nn.Conv2d(in_channels=4 * out_1, out_channels=4 * out_1, kernel_size=3, padding=1)
114+
self.batch5 = nn.BatchNorm2d(4 * out_1)
115+
self.relu5 = nn.ReLU()
116+
117+
self.maxpool4 = nn.MaxPool2d(kernel_size=(timePoolSize, 1))
118+
119+
self.dropout = nn.Dropout2d(dropoutProb)
120+
121+
self.fc = nn.Linear(336, numClasses)
122+
123+
#self.softmax = nn.Softmax(dim = 1)
124+
125+
# Prediction
126+
def forward(self, x):
127+
out = self.cnn1(x)
128+
out = self.batch1(out)
129+
out = self.relu1(out)
130+
131+
out = self.maxpool1(out)
132+
133+
out = self.cnn2(out)
134+
out = self.batch2(out)
135+
out = self.relu2(out)
136+
137+
out = self.maxpool2(out)
138+
139+
out = self.cnn3(out)
140+
out = self.batch3(out)
141+
out = self.relu3(out)
142+
143+
out = self.maxpool3(out)
144+
145+
out = self.cnn4(out)
146+
out = self.batch4(out)
147+
out = self.relu4(out)
148+
out = self.cnn5(out)
149+
out = self.batch5(out)
150+
out = self.relu5(out)
151+
152+
out = self.maxpool4(out)
153+
154+
out = self.dropout(out)
155+
156+
out = out.view(out.size(0), -1)
157+
out = self.fc(out)
158+
#out = self.softmax(out)
159+
160+
return out
161+
162+
model = CNN()
163+
if cudaAvailable:
164+
model.cuda()
165+
166+
# Define training parameters
167+
n_epochs = 25
168+
criterion = nn.CrossEntropyLoss()
169+
learning_rate = 3e-4
170+
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
171+
loss_list = []
172+
accuracy_list = []
173+
numValItems = len(valDataset)
174+
175+
doValidation = True
176+
177+
print('Training...')
178+
179+
for epoch in range(n_epochs):
180+
181+
if epoch == 20:
182+
for g in optimizer.param_groups:
183+
g['lr'] = 3e-5
184+
185+
count = 0
186+
for batch in trainLoader:
187+
count += 1
188+
print('Epoch ', epoch+1, ' Iteration', count, ' of ', trainDataset.len)
189+
if cudaAvailable:
190+
x = batch[0].cuda()
191+
y = batch[1].cuda()
192+
else:
193+
x = batch[0]
194+
y = batch[1]
195+
optimizer.zero_grad()
196+
z = model(torch.squeeze(x.float(), 0))
197+
loss = criterion(z, torch.squeeze(y).long())
198+
loss.backward()
199+
optimizer.step()
200+
201+
if doValidation:
202+
correct = 0
203+
# perform a prediction on the validation data
204+
for x_test, y_test in valLoader:
205+
if cudaAvailable:
206+
x_test = x_test.cuda()
207+
y_test = y_test.cuda()
208+
else:
209+
x_test = x_test
210+
y_test = y_test
211+
z = model(x_test.float())
212+
_ , yhat = torch.max(z.data, 1)
213+
correct += (yhat == y_test.squeeze()).sum().item()
214+
accuracy = correct / numValItems
215+
print('Validation accuracy: ', accuracy)
216+
accuracy_list.append(accuracy)
217+
loss_list.append(loss.data)
218+
219+
# Export the trained model to ONXX format
220+
if cudaAvailable:
221+
x = torch.empty(1, 1, 98, 50).cuda()
222+
else:
223+
x = torch.empty(1, 1, 98, 50)
224+
225+
torch.onnx.export(model,
226+
x,
227+
"cmdRecognition.onnx",
228+
export_params=True,
229+
opset_version=9,
230+
do_constant_folding=True,
231+
input_names=['input'],
232+
output_names=['output'])
233+
234+
print('Training complete')

HelperFiles/extractTrainingFeatures.m

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
function values = extractTrainingFeatures
2+
% extractTrainingFeatures Extract one minibatch of training features
3+
%
4+
% This function is used in the 'Call ML from Python' example. The function
5+
% is called from inside Python.
6+
7+
% Copyright 2019-2020 The MathWorks, Inc.
8+
9+
persistent adsTrain augmenter fileIdx miniBatchSize numFiles
10+
11+
if isempty(adsTrain)
12+
13+
[adsTrain, ~, miniBatchSize] = setupDatasets;
14+
numFiles = numel(adsTrain.Files);
15+
16+
% Data augmenter (pitch shifter)
17+
augmenter = audioDataAugmenter('TimeStretchProbability', 0,...
18+
'VolumeControlProbability', 0,...
19+
'AddNoiseProbability', 0,...
20+
'TimeShiftProbability', 0,...
21+
'PitchShiftProbability',0.75,...
22+
'SemitoneShiftRange',[-8 8]);
23+
24+
fileIdx = 1;
25+
end
26+
27+
if fileIdx > numFiles
28+
adsTrain = shuffle(adsTrain);
29+
fileIdx = 1;
30+
end
31+
32+
% Extract a minibatch of features in parallel using tall (PCT)
33+
adsSub = subset(adsTrain,fileIdx:fileIdx+miniBatchSize-1);
34+
T = tall(adsSub);
35+
augmentedTall = cellfun(@(x)augmentData(x,augmenter),T,"UniformOutput",false);
36+
featureVectorsTall = cellfun(@(x)helperExtractAuditoryFeatures(x, 16e3),augmentedTall,"UniformOutput",false);
37+
[~,XTrainC] = evalc('gather(featureVectorsTall)');
38+
YTrainC = adsTrain.Labels(fileIdx:fileIdx+miniBatchSize-1);
39+
40+
fileIdx = fileIdx + miniBatchSize;
41+
42+
XTrain = cat(4,XTrainC{:});
43+
features = permute(XTrain,[4 3 1 2]);
44+
45+
labels = YTrainC;
46+
47+
values.labels = labels;
48+
values.features = features;
49+
50+
% -------------------------------------------------------------------------
51+
function y = augmentData(x,augmenter)
52+
x = single(x);
53+
results = augment(augmenter,x, 16e3);
54+
y = results.Audio{1};

0 commit comments

Comments
 (0)