Skip to content

Commit 8f636c0

Browse files
committed
Convolution filter backpropagation method finished
1 parent 220b7e3 commit 8f636c0

File tree

5 files changed

+50
-24
lines changed

5 files changed

+50
-24
lines changed

NeuralNetwork.NET/Networks/Layers/Cpu/ConvolutionalLayer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ public override unsafe void Backpropagate(in Tensor x, in Tensor dy, in Tensor z
110110
public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb)
111111
{
112112
Tensor.New(OutputInfo.Channels, KernelInfo.Size, out Tensor dw);
113-
CpuDnn.ConvolutionBackwardFilter(a, InputInfo, delta, OutputInfo, dw);
113+
CpuDnn.ConvolutionBackwardFilter(a, InputInfo, delta, OutputInfo, dw, KernelInfo);
114114
dw.Reshape(1, Weights.Length, out dJdw);
115115
Tensor.New(1, Biases.Length, out dJdb);
116116
CpuDnn.ConvolutionBackwardBias(delta, OutputInfo, dJdb);

NeuralNetwork.NET/cpuDNN/CpuBlas.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
namespace NeuralNetworkNET.cpuDNN
77
{
8+
/// <summary>
9+
/// A class that exposes static BLAS (Basic Linear Algebra Subprograms) methods working on <see cref="Tensor"/> instances
10+
/// </summary>
811
public static class CpuBlas
912
{
1013
/// <summary>
@@ -66,7 +69,7 @@ void Kernel(int i)
6669
}
6770

6871
/// <summary>
69-
/// Performs the in place multiplication (Hadamard product) product between two <see cref="Tensor"/> instances
72+
/// Performs the elementwise multiplication (Hadamard product) product between two <see cref="Tensor"/> instances
7073
/// </summary>
7174
/// <param name="x1">The first <see cref="Tensor"/></param>
7275
/// <param name="x2">The second <see cref="Tensor"/></param>

NeuralNetwork.NET/cpuDNN/CpuDnn{Convolution}.cs

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public static unsafe void ConvolutionBackwardData(
139139
if (imgSize < kSize) throw new ArgumentException("Each subdivided tensor must at least have the size of the kernels");
140140
if (dyInfo.Channels != nKernels) throw new ArgumentException("The source depth must be equal to the number of kernels");
141141

142-
// Traanspose the layer kernels
142+
// Rotate the layer kernels
143143
Rotate180(w, wInfo.Channels, out Tensor w180);
144144

145145
/* ============================
@@ -212,11 +212,12 @@ void BackwardsKernel(int index)
212212
/// <param name="dy">The output error <see cref="Tensor"/></param>
213213
/// <param name="dyInfo">The output error volume info (depth and 2D slices size)</param>
214214
/// <param name="dw">The resulting weights gradient</param>
215+
/// <param name="wInfo">The info on the layer kernels</param>
215216
/// <exception cref="ArgumentException">The size of one of the input <see cref="Tensor"/> instances isn't valid</exception>
216217
public static unsafe void ConvolutionBackwardFilter(
217218
in Tensor x, in TensorInfo xInfo,
218219
in Tensor dy, in TensorInfo dyInfo,
219-
in Tensor dw)
220+
in Tensor dw, in TensorInfo wInfo)
220221
{
221222
// Checks and local parameters
222223
int
@@ -244,15 +245,19 @@ public static unsafe void ConvolutionBackwardFilter(
244245
* Kernels: HK*WK*sourceDepth*kernelsDepth (delta(l + 1) used to calculate the 3D gradient for each kernel)
245246
* Output: sourceDepth*kernelsDepth slices, where each stack of sourceDepth slices is the gradient for the i-th kernel */
246247
int
247-
hResult = imgHeight - kHeight + 1, // Size of each image edge after the convolution
248+
hResult = imgHeight - kHeight + 1, // Size of each image edge after the convolution
248249
wResult = imgWidth - kWidth + 1,
249-
convolutionOutputSize = hResult * wResult, // Size of each processed image
250-
gradientSize = convolutionOutputSize * xInfo.Channels, // Size of each calculated gradient (one for each original kernel, so for each input delta)
251-
finalWidth = gradientSize * dyInfo.Channels, // Final size of each sample row
252-
iterationsPerSample = xInfo.Channels * kDepth; // Each sample has its own list of 3D gradients, one for each kernel
250+
convolutionOutputSize = hResult * wResult, // Size of each processed image
251+
gradientSize = convolutionOutputSize * xInfo.Channels, // Size of each calculated gradient (one for each original kernel, so for each input delta)
252+
finalWidth = gradientSize * dyInfo.Channels, // Final size of each sample row
253+
iterationsPerSample = xInfo.Channels * kDepth; // Each sample has its own list of 3D gradients, one for each kernel
254+
255+
// Rotate the inputs and prepare the temporary tensor
256+
Rotate180(x, xInfo.Channels, out Tensor xt);
257+
Tensor.New(x.Entities, finalWidth, out Tensor dwTemp);
253258

254259
// Process the valid convolution
255-
float* px = x, pdy = dy, pdw = dw;
260+
float* px = xt, pdy = dy, pdw = dwTemp;
256261
void GradientKernel(int index)
257262
{
258263
// Calculate the current indexes
@@ -291,7 +296,17 @@ void GradientKernel(int index)
291296
}
292297
}
293298
Parallel.For(0, n * iterationsPerSample, GradientKernel).AssertCompleted();
294-
throw new NotImplementedException("The CPU gradient convolution isn't implemented correctly yet");
299+
xt.Free();
300+
301+
/* ==========================
302+
* Gradient compression
303+
* ==========================
304+
* At this point, the temporary tensor has the series of (p,q) gradients for all the layer
305+
* kernels, where p is the input depth and q is the kernel index.
306+
* The final weights gradient is the sum for all the samples in the current training batch */
307+
dw.Reshape(1, dw.Size, out Tensor wPlane); // The gradient is [q,p]-shaped, flatten to the size of each sample before compressing
308+
CpuBlas.CompressVertically(dwTemp, wPlane);
309+
dwTemp.Free();
295310
}
296311

297312
/// <summary>

Samples/DigitsTest/Program.cs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
using NeuralNetworkNET.APIs.Results;
77
using NeuralNetworkNET.APIs.Structs;
88
using NeuralNetworkNET.Networks.Activations;
9-
using NeuralNetworkNET.Networks.Cost;
109
using NeuralNetworkNET.SupervisedLearning.Optimization.Parameters;
1110
using NeuralNetworkNET.SupervisedLearning.Optimization.Progress;
1211

@@ -18,10 +17,20 @@ static async Task Main()
1817
{
1918
(var training, var test) = DataParser.LoadDatasets();
2019
INeuralNetwork network = NetworkManager.NewSequential(TensorInfo.CreateForGrayscaleImage(28, 28),
21-
NetworkLayers.FullyConnected(100, ActivationFunctionType.Sigmoid),
22-
NetworkLayers.FullyConnected(10, ActivationFunctionType.Sigmoid, CostFunctionType.CrossEntropy));
23-
TrainingSessionResult result = await NetworkManager.TrainNetworkAsync(network, (training.X, training.Y), 60, 10,
24-
TrainingAlgorithmsInfo.StochasticGradientDescent(), 0.5f,
20+
NetworkLayers.Convolutional((5, 5), 20, ActivationFunctionType.Identity),
21+
NetworkLayers.Pooling(ActivationFunctionType.LeakyReLU),
22+
NetworkLayers.FullyConnected(100, ActivationFunctionType.LeCunTanh),
23+
NetworkLayers.Softmax(10));
24+
TrainingSessionResult result = await NetworkManager.TrainNetworkAsync(network, (training.X, training.Y), 60, 100,
25+
TrainingAlgorithmsInfo.Adadelta(), 0.5f,
26+
new Progress<BatchProgress>(p =>
27+
{
28+
Console.SetCursorPosition(0, Console.CursorTop);
29+
int n = (int)(p.Percentage * 32 / 100);
30+
char[] c = new char[32];
31+
for (int i = 0; i < 32; i++) c[i] = i <= n ? '=' : ' ';
32+
Console.Write($"[{new String(c)}] ");
33+
}),
2534
testParameters: new TestParameters(test, new Progress<BackpropagationProgressEventArgs>(p =>
2635
{
2736
Printf($"Epoch {p.Iteration}, cost: {p.Result.Cost}, accuracy: {p.Result.Accuracy}");

Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ private static unsafe void TestGradient(WeightedLayerBase cpu, WeightedLayerBase
5858
cpu.ComputeGradient(xt, deltat, out Tensor dJdw_cpu, out Tensor dJdb_cpu);
5959
gpu.ComputeGradient(xt, deltat, out Tensor dJdw_gpu, out Tensor dJdb_gpu);
6060
Assert.IsTrue(dJdw_cpu.ContentEquals(dJdw_gpu));
61-
Assert.IsTrue(dJdb_cpu.ContentEquals(dJdb_gpu));
61+
Assert.IsTrue(dJdb_cpu.ContentEquals(dJdb_gpu, 1e-4f, 1e-5f)); // The cuDNN ConvolutionBackwardBias is not always as precise as the CPU version
6262
dJdw_cpu.Free();
6363
dJdw_gpu.Free();
6464
dJdb_cpu.Free();
@@ -202,14 +202,13 @@ public unsafe void ConvolutionBackward()
202202
}
203203

204204
[TestMethod]
205-
public void ConvolutionGradient()
205+
public unsafe void ConvolutionGradient()
206206
{
207-
// TODO: CPU gradient not implemented yet
208-
/* float[,]
209-
x = WeightsProvider.NewFullyConnectedWeights(127, 58 * 58 * 3),
210-
delta = WeightsProvider.NewFullyConnectedWeights(127, 54 * 54 * 20);
207+
float[,]
208+
x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3),
209+
delta = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 54 * 54 * 5, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 54 * 54 * 5);
211210
ConvolutionalLayer
212-
cpu = new ConvolutionalLayer(new TensorInfo(58, 58, 3), ConvolutionInfo.Default, (5, 5), 20, ActivationFunctionType.LeCunTanh, BiasInitializationMode.Gaussian),
211+
cpu = new ConvolutionalLayer(new TensorInfo(58, 58, 3), ConvolutionInfo.Default, (5, 5), 5, ActivationFunctionType.LeCunTanh, BiasInitializationMode.Gaussian),
213212
gpu = new CuDnnConvolutionalLayer(cpu.InputInfo, ConvolutionInfo.Default, cpu.KernelInfo, cpu.OutputInfo, cpu.Weights, cpu.Biases, ActivationFunctionType.LeCunTanh);
214213
fixed (float* px = x)
215214
{
@@ -218,7 +217,7 @@ public void ConvolutionGradient()
218217
z_gpu.Free();
219218
a_gpu.Free();
220219
}
221-
TestGradient(cpu, gpu, x, delta); */
220+
TestGradient(cpu, gpu, x, delta);
222221
}
223222

224223
#endregion

0 commit comments

Comments
 (0)