More code refactoring, ready to remove legacy

Sergio0694 · Sergio0694 · commit 4e0d60cf2a7b · 2017-12-30T20:29:45.000+01:00
diff --git a/NeuralNetwork.NET/Networks/Layers/Cpu/ConvolutionalLayer.cs b/NeuralNetwork.NET/Networks/Layers/Cpu/ConvolutionalLayer.cs
@@ -109,10 +109,9 @@ public override unsafe void Backpropagate(in Tensor dy, in Tensor z, ActivationF
         /// <inheritdoc/>
         public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb)
         {
-            a.Rotate180(InputInfo.Channels, out Tensor a180);
-            ConvolutionExtensions.ConvoluteGradient(a180, InputInfo, delta, OutputInfo, out Tensor dJdwM);
-            dJdwM.Reshape(1, Weights.Length, out dJdw);
-            a180.Free();
+            Tensor.New(OutputInfo.Channels, KernelInfo.Size, out Tensor dw);
+            CpuDnn.ConvolutionBackwardFilter(a, InputInfo, delta, OutputInfo, dw);
+            dw.Reshape(1, Weights.Length, out dJdw);
             Tensor.New(1, Biases.Length, out dJdb);
             CpuDnn.ConvolutionBackwardBias(delta, OutputInfo, dJdb);
         }
diff --git a/NeuralNetwork.NET/cpuDNN/CpuBlas.cs b/NeuralNetwork.NET/cpuDNN/CpuBlas.cs
@@ -65,6 +65,35 @@ void Kernel(int i)
             Parallel.For(0, n, Kernel).AssertCompleted();
         }
 
+        /// <summary>
+        /// Subtracts two <see cref="Tensor"/> instances, element wise
+        /// </summary>
+        /// <param name="x1">The first <see cref="Tensor"/></param>
+        /// <param name="x2">The second <see cref="Tensor"/></param>
+        /// <param name="y">The resulting <see cref="Tensor"/> - it can be the same as one of the inputs</param>
+        internal static unsafe void Subtract(in Tensor x1, in Tensor x2, in Tensor y)
+        {
+            int
+                n = x1.Entities,
+                l = x1.Length;
+            if (!x1.MatchShape(x2)) throw new ArgumentException("The two input tensors must be of equal shape");
+            if (!x1.MatchShape(y)) throw new ArgumentException("The output tensor must have the same shape as the input tensors", nameof(y));
+
+            // Subtract in parallel
+            float* px1 = x1, px2 = x2, py = y;
+            void Kernel(int i)
+            {
+                int offset = i * l;
+                for (int j = 0; j < l; j++)
+                {
+                    int position = offset + j;
+                    py[position] = px1[position] - px2[position];
+                }
+            }
+            Parallel.For(0, n, Kernel).AssertCompleted();
+        }
+
+
         /// <summary>
         /// Compresses a <see cref="Tensor"/> into a row by summing the components column by column
         /// </summary>
diff --git a/NeuralNetwork.NET/cpuDNN/CpuDnn{Convolution}.cs b/NeuralNetwork.NET/cpuDNN/CpuDnn{Convolution}.cs
@@ -18,8 +18,7 @@ public static partial class CpuDnn
         /// <param name="wInfo">The kernels volume info (depth and 2D slices size)</param>
         /// <param name="b">The bias <see cref="Tensor"/> to sum to the resulting images</param>
         /// <param name="y">The resulting convolution <see cref="Tensor"/></param>
-        /// <exception cref="ArgumentException">The size of the matrix isn't valid, or the kernels list isn't valid</exception>
-        /// <exception cref="ArgumentOutOfRangeException">The size of the matrix doesn't match the expected values</exception>
+        /// <exception cref="ArgumentException">The size of one of the input <see cref="Tensor"/> instances isn't valid</exception>
         public static unsafe void ConvolutionForward(
             in Tensor x, in TensorInfo xInfo,
             in Tensor w, in TensorInfo wInfo,
@@ -42,9 +41,9 @@ public static unsafe void ConvolutionForward(
                 imgSize = xInfo.SliceSize,
                 imgHeight = xInfo.Height,
                 imgWidth = xInfo.Width;  // Size of an edge of one of the inner images per sample
-            if (imgSize * xInfo.Channels != l) throw new ArgumentException("Invalid depth parameter for the input matrix", nameof(x));
-            if (imgSize < kSize) throw new ArgumentOutOfRangeException("Each subdivided matrix must at least have the size of the kernels");
-            if (xInfo.Channels != wInfo.Channels) throw new InvalidOperationException("The depth of each kernel must be equal to the depth of each input volume");
+            if (imgSize * xInfo.Channels != l) throw new ArgumentException("Invalid depth parameter for the input tensor", nameof(x));
+            if (imgSize < kSize) throw new ArgumentException("Each subdivided tensor must at least have the size of the kernels");
+            if (xInfo.Channels != wInfo.Channels) throw new ArgumentException("The depth of each kernel must be equal to the depth of each input volume");
             if (b.Length != nKernels) throw new ArgumentException("The sum vector must be as long as the depth of the input volume");
 
             /* ============================
@@ -115,8 +114,7 @@ void ForwardKernel(int index)
         /// <param name="wInfo">The kernels volume info (depth and 2D slices size)</param>
         /// <param name="dx">The resulting backpropagated error <see cref="Tensor"/></param>
         /// <param name="dxInfo">The info on the layer inputs</param>
-        /// <exception cref="ArgumentException">The size of the matrix isn't valid, or the kernels list isn't valid</exception>
-        /// <exception cref="ArgumentOutOfRangeException">The size of the matrix doesn't match the expected values</exception>
+        /// <exception cref="ArgumentException">The size of one of the input <see cref="Tensor"/> instances isn't valid</exception>
         public static unsafe void ConvolutionBackwardData(
             in Tensor dy, in TensorInfo dyInfo,
             in Tensor w, in TensorInfo wInfo,
@@ -137,8 +135,8 @@ public static unsafe void ConvolutionBackwardData(
                 imgSize = dyInfo.SliceSize,
                 imgHeight = dyInfo.Height,
                 imgWidth = dyInfo.Width;
-            if (imgSize * dyInfo.Channels != l) throw new ArgumentException("Invalid depth parameter for the input matrix", nameof(dy));
-            if (imgSize < kSize) throw new ArgumentOutOfRangeException("Each subdivided matrix must at least have the size of the kernels");
+            if (imgSize * dyInfo.Channels != l) throw new ArgumentException("Invalid depth parameter for the input tensor", nameof(dy));
+            if (imgSize < kSize) throw new ArgumentException("Each subdivided tensor must at least have the size of the kernels");
             if (dyInfo.Channels != nKernels) throw new ArgumentException("The source depth must be equal to the number of kernels");
 
             // Traanspose the layer kernels
@@ -209,17 +207,16 @@ void BackwardsKernel(int index)
         /// <summary>
         /// Performs a the backward convolution operation for a network layer and computes the gradient with respect to the layer weights
         /// </summary>
-        /// <param name="x">The source matrix, where each row is a sample in the dataset and each one contains a series of images in row-first order</param>
+        /// <param name="x">The source <see cref="Tensor"/>, where each row is a sample in the dataset and each one contains a series of images in row-first order</param>
         /// <param name="xInfo">The source volume info (depth and 2D slices size)</param>
-        /// <param name="dy">The list of convolution kernels to apply to each image</param>
-        /// <param name="dyInfo">The kernels volume info (depth and 2D slices size)</param>
-        /// <param name="result">The resulting convolution volume</param>
-        /// <exception cref="ArgumentException">The size of the matrix isn't valid, or the kernels list isn't valid</exception>
-        /// <exception cref="ArgumentOutOfRangeException">The size of the matrix doesn't match the expected values</exception>
-        public static unsafe void ConvoluteGradient(
+        /// <param name="dy">The output error <see cref="Tensor"/></param>
+        /// <param name="dyInfo">The output error volume info (depth and 2D slices size)</param>
+        /// <param name="dw">The resulting weights gradient</param>
+        /// <exception cref="ArgumentException">The size of one of the input <see cref="Tensor"/> instances isn't valid</exception>
+        public static unsafe void ConvolutionBackwardFilter(
             in Tensor x, in TensorInfo xInfo,
             in Tensor dy, in TensorInfo dyInfo,
-            out Tensor result)
+            in Tensor dw)
         {
             // Checks and local parameters
             int
@@ -236,8 +233,8 @@ public static unsafe void ConvoluteGradient(
                 imgSize = xInfo.SliceSize,
                 imgHeight = xInfo.Height,
                 imgWidth = xInfo.Width;
-            if (imgSize * xInfo.Channels != l) throw new ArgumentException(nameof(x), "Invalid depth parameter for the input matrix");
-            if (imgSize < kSize) throw new ArgumentOutOfRangeException("Each subdivided matrix must at least have the size of the kernels");
+            if (imgSize * xInfo.Channels != l) throw new ArgumentException(nameof(x), "Invalid depth parameter for the input tensor");
+            if (imgSize < kSize) throw new ArgumentOutOfRangeException("Each subdivided tensor must at least have the size of the kernels");
             if (nKernels != n) throw new ArgumentException(nameof(dy), "There must be a delta volume for each activation sample");
 
             /* ============================
@@ -255,8 +252,7 @@ public static unsafe void ConvoluteGradient(
                 iterationsPerSample = xInfo.Channels * kDepth;             // Each sample has its own list of 3D gradients, one for each kernel
 
             // Process the valid convolution
-            Tensor.New(n, finalWidth, out result);
-            float* psource = x, pkernels = dy, presult = result;
+            float* px = x, pdy = dy, pdw = dw;
             void GradientKernel(int index)
             {
                 // Calculate the current indexes
@@ -287,16 +283,14 @@ void GradientKernel(int index)
                                 kernelRowOffset = kernelBaseOffset + (xEnd - r) * kWidth + highY;
                             for (int c = j; c <= highY; c++)
                             {
-                                temp += psource[sourceRowOffset + c] * pkernels[kernelRowOffset - c];
+                                temp += px[sourceRowOffset + c] * pdy[kernelRowOffset - c];
                             }
                         }
-                        presult[targetRowOffset + j] = temp;
+                        pdw[targetRowOffset + j] = temp;
                     }
                 }
             }
             Parallel.For(0, n * iterationsPerSample, GradientKernel).AssertCompleted();
-
-            // TODO: correct gradient implementation
             throw new NotImplementedException("The CPU gradient convolution isn't implemented correctly yet");
         }
 
@@ -306,6 +300,7 @@ void GradientKernel(int index)
         /// <param name="dy">The output error <see cref="Tensor"/></param>
         /// <param name="dyInfo">The info on the output <see cref="Tensor"/></param>
         /// <param name="db">The resulting gradient</param>
+        /// <exception cref="ArgumentException">The size of one of the input <see cref="Tensor"/> instances isn't valid</exception>
         [PublicAPI]
         public static unsafe void ConvolutionBackwardBias(in Tensor dy, in TensorInfo dyInfo, in Tensor db)
         {
@@ -314,9 +309,9 @@ public static unsafe void ConvolutionBackwardBias(in Tensor dy, in TensorInfo dy
                 depth = dyInfo.Channels,
                 h = dy.Entities,
                 w = dy.Length,
-                imgSize = w % depth == 0 ? w / depth : throw new ArgumentException(nameof(dy), "Invalid depth parameter for the input matrix"),
+                imgSize = w % depth == 0 ? w / depth : throw new ArgumentException("Invalid depth parameter for the input tensor", nameof(dy)),
                 imgAxis = imgSize.IntegerSquare();  // Size of an edge of one of the inner images per sample
-            if (imgAxis * imgAxis != imgSize) throw new ArgumentOutOfRangeException(nameof(dy), "The size of the input matrix isn't valid");
+            if (imgAxis * imgAxis != imgSize) throw new ArgumentException("The size of the input tensor isn't valid", nameof(dy));
             Tensor.New(h, depth, out Tensor temp);
 
             // Kernel to sum each slice
@@ -328,7 +323,7 @@ void Kernel(int index)
                     iSample = index / depth,    // Sample index
                     z = index % depth;          // 2D slice index
 
-                // Reverse the input matrix sequentially
+                // Reverse the input tensor sequentially
                 int baseOffset = iSample * w + z * imgSize;
                 float sum = 0;
                 for (int i = 0; i < imgSize; i++)
@@ -347,19 +342,19 @@ void Kernel(int index)
         /// <summary>
         /// Rotates the input volume by 180 degrees
         /// </summary>
-        /// <param name="x">The input matrix to rotate</param>
+        /// <param name="x">The input <see cref="Tensor"/> to rotate</param>
         /// <param name="depth">The number of images per row</param>
-        /// <param name="y">The rotated input matrix</param>
+        /// <param name="y">The rotated input <see cref="Tensor"/></param>
         private static unsafe void Rotate180(in Tensor x, int depth, out Tensor y)
         {
             // Checks and local parameters
-            if (depth < 1) throw new ArgumentOutOfRangeException(nameof(depth), "The number of images per row can't be lower than 1");
+            if (depth < 1) throw new ArgumentException("The number of images per row can't be lower than 1", nameof(depth));
             int
                 n = x.Entities,
                 l = x.Length,
-                imgSize = l % depth == 0 ? l / depth : throw new ArgumentException(nameof(x), "Invalid depth parameter for the input matrix"),
+                imgSize = l % depth == 0 ? l / depth : throw new ArgumentException("Invalid depth parameter for the input tensor", nameof(x)),
                 imgAxis = imgSize.IntegerSquare();  // Size of an edge of one of the inner images per sample
-            if (imgAxis * imgAxis != imgSize) throw new ArgumentOutOfRangeException(nameof(x), "The size of the input matrix isn't valid");
+            if (imgAxis * imgAxis != imgSize) throw new ArgumentException("The size of the input tensor isn't valid", nameof(x));
             int
                 threshold = imgSize / 2,
                 edge = imgSize - 1;
@@ -375,7 +370,7 @@ void Kernel(int index)
                     iSample = index / depth,    // Sample index
                     z = index % depth;          // 2D slice index
 
-                // Reverse the input matrix sequentially
+                // Reverse the input tensor sequentially
                 int baseOffset = iSample * l + z * imgSize;
                 for (int i = 0; i < threshold; i++)
                 {
diff --git a/NeuralNetwork.NET/cpuDNN/CpuDnn{Main}.cs b/NeuralNetwork.NET/cpuDNN/CpuDnn{Main}.cs
@@ -3,6 +3,7 @@
 using JetBrains.Annotations;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Networks.Activations;
 using NeuralNetworkNET.Networks.Activations.Delegates;
 
 namespace NeuralNetworkNET.cpuDNN
@@ -40,6 +41,47 @@ void Kernel(int i)
             Parallel.For(0, n, Kernel).AssertCompleted();
         }
 
+        /// <summary>
+        /// Performs the softmax activation on the input <see cref="Tensor"/> and applies the output normalization
+        /// </summary>
+        /// <param name="x">The input <see cref="Tensor"/></param>
+        /// <param name="y">The output <see cref="Tensor"/></param>
+        public static unsafe void SoftmaxForward(in Tensor x, in Tensor y)
+        {
+            // Setup
+            if (!x.MatchShape(y)) throw new ArgumentException("The input tensor doesn't have the same shape as the output tensor");
+            int n = x.Entities, l = x.Length;
+            Tensor.New(1, n, out Tensor partials);
+            float* pp = partials, px = x, py = y;
+
+            // Activation
+            void ActivationWithAggregate(int i)
+            {
+                int offset = i * l;
+                float sum = 0;
+                for (int j = 0; j < l; j++)
+                {
+                    int target = offset + j;
+                    float value = ActivationFunctions.Softmax(px[target]);
+                    py[target] = value;
+                    sum += value;
+                }
+                pp[i] = sum;
+            }
+            Parallel.For(0, n, ActivationWithAggregate).AssertCompleted();
+
+            // Normalization of the tensor values
+            void NormalizationKernel(int i)
+            {
+                int offset = i * l;
+                float factor = pp[i];
+                for (int j = 0; j < l; j++)
+                    py[offset + j] /= factor;
+            }
+            Parallel.For(0, n, NormalizationKernel).AssertCompleted();
+            partials.Free();
+        }
+
         /// <summary>
         /// Executes the backward activation function on the target <see cref="Tensor"/>, with the given error delta
         /// </summary>
@@ -83,7 +125,7 @@ void Kernel(int i)
         /// <param name="y">The output <see cref="Tensor"/> for the current layer</param>
         public static unsafe void FullyConnectedForward(in Tensor x, in Tensor w, in Tensor b, in Tensor y)
         {
-            // Initialize the parameters and the result matrix
+            // Initialize the parameters and the result tensor
             if (x.Length != w.Entities) throw new ArgumentOutOfRangeException("Invalid tensors shapes");
             if (!b.MatchShape(1, w.Length)) throw new ArgumentException("Invalid biases shape", nameof(b));
             if (!y.MatchShape(x.Entities, w.Length)) throw new ArgumentException("The output tensor doesn't have the right shape", nameof(y));
@@ -128,7 +170,7 @@ public static unsafe void FullyConnectedBackwardData(in Tensor x, in Tensor w, i
             Tensor.New(w.Length, w.Entities, out Tensor wt);
             CpuBlas.Transpose(w, wt);
 
-            // Initialize the parameters and the result matrix
+            // Initialize the parameters and the result tensor
             int 
                 h = dy.Entities,
                 l = dy.Length,
@@ -149,7 +191,7 @@ void Kernel(int i)
                         res += pdy[i1 + q] * pwt[i2];
                     }
 
-                    // res has now the matrix multiplication result for position [i, j]
+                    // res has now the tensor multiplication result for position [i, j]
                     int zIndex = i * k + j;
                     pdx[zIndex] = f_(px[zIndex]) * res;
                 }
@@ -187,7 +229,7 @@ public static unsafe void FullyConnectedBackwardBias(in Tensor dy, in Tensor db)
                 l = dy.Length;
             float* pdy = dy, pdb = db;
 
-            // Compress the matrix
+            // Compress the tensor
             void Kernel(int j)
             {
                 float sum = 0;
diff --git a/NeuralNetwork.NET/cpuDNN/CpuDnn{Pooling}.cs b/NeuralNetwork.NET/cpuDNN/CpuDnn{Pooling}.cs

Original file line number	Diff line number	Diff line change
`@@ -109,10 +109,9 @@ public override unsafe void Backpropagate(in Tensor dy, in Tensor z, ActivationF`
`109`	`109`	`/// <inheritdoc/>`
`110`	`110`	`public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb)`
`111`	`111`	`{`
`112`		`- a.Rotate180(InputInfo.Channels, out Tensor a180);`
`113`		`- ConvolutionExtensions.ConvoluteGradient(a180, InputInfo, delta, OutputInfo, out Tensor dJdwM);`
`114`		`- dJdwM.Reshape(1, Weights.Length, out dJdw);`
`115`		`- a180.Free();`
	`112`	`+ Tensor.New(OutputInfo.Channels, KernelInfo.Size, out Tensor dw);`
	`113`	`+ CpuDnn.ConvolutionBackwardFilter(a, InputInfo, delta, OutputInfo, dw);`
	`114`	`+ dw.Reshape(1, Weights.Length, out dJdw);`
`116`	`115`	`Tensor.New(1, Biases.Length, out dJdb);`
`117`	`116`	`CpuDnn.ConvolutionBackwardBias(delta, OutputInfo, dJdb);`
`118`	`117`	`}`