Skip to content

Commit 127e241

Browse files
committed
Minor code improvements to the CuDnn layers
1 parent 1711109 commit 127e241

File tree

3 files changed

+64
-84
lines changed

3 files changed

+64
-84
lines changed

NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs

Lines changed: 41 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -74,71 +74,63 @@ public CuDnnConvolutionalLayer(
7474
#region Implementation
7575

7676
/// <inheritdoc/>
77-
public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
77+
public override void Forward(in Tensor x, out Tensor z, out Tensor a)
7878
{
79-
fixed (float* pw = Weights)
79+
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size))
8080
{
81-
Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
82-
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size))
81+
// Tensors info setup
82+
InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
83+
OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width);
84+
85+
// Forward convolution
86+
DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
87+
DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size);
88+
using (DeviceMemory<float>
89+
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
90+
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights))
91+
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
8392
{
84-
// Tensors info setup
85-
InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
86-
OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width);
87-
88-
// Forward convolution
89-
DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
90-
DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size);
91-
using (DeviceMemory<float>
92-
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
93-
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor))
94-
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
95-
{
96-
DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr);
97-
}
93+
DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr);
94+
}
9895

99-
// Biases
100-
using (DeviceMemory<float> b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
101-
{
102-
DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr);
103-
}
104-
z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
96+
// Biases
97+
using (DeviceMemory<float> b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
98+
{
99+
DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr);
100+
}
101+
z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
105102

106-
// Activation
107-
if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a);
108-
else
109-
{
110-
DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation);
111-
z_gpu.CopyToHost(z.Entities, z.Length, out a);
112-
}
103+
// Activation
104+
if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a);
105+
else
106+
{
107+
DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation);
108+
z_gpu.CopyToHost(z.Entities, z.Length, out a);
113109
}
114110
}
115111
}
116112

117113
/// <inheritdoc/>
118-
public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
114+
public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
119115
{
120-
fixed (float* pw = Weights)
116+
using (DeviceMemory<float> delta_gpu = DnnInstance.Gpu.AllocateDevice<float>(z.Size))
121117
{
122-
Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
118+
// Convolution
123119
DnnInstance.GetConvolutionBackwardDataAlgorithm(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm);
124120
DnnInstance.GetConvolutionBackwardDataWorkspaceSize(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, algorithm, out IntPtr size);
125-
using (DeviceMemory<float> delta_gpu = DnnInstance.Gpu.AllocateDevice<float>(z.Size))
121+
using (DeviceMemory<float>
122+
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
123+
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights))
124+
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
126125
{
127-
// Backwards convolution
128-
using (DeviceMemory<float>
129-
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
130-
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor))
131-
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
132-
{
133-
DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr);
134-
}
126+
DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr);
127+
}
135128

136-
// Activation
137-
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice(z))
138-
{
139-
DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime);
140-
z_gpu.CopyTo(z);
141-
}
129+
// Activation
130+
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice(z))
131+
{
132+
DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime);
133+
z_gpu.CopyTo(z);
142134
}
143135
}
144136
}

NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,39 +30,31 @@ public CuDnnFullyConnectedLayer(in TensorInfo input, int neurons, [NotNull] floa
3030
#region Implementation
3131

3232
/// <inheritdoc/>
33-
public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
33+
public override void Forward(in Tensor x, out Tensor z, out Tensor a)
3434
{
35-
fixed (float* pw = Weights)
35+
using (DeviceMemory<float>
36+
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
37+
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
38+
y_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size),
39+
b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
3640
{
37-
Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
38-
using (DeviceMemory<float>
39-
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
40-
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
41-
y_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size),
42-
b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
43-
{
44-
DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr);
45-
y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
46-
DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
47-
y_gpu.CopyToHost(z.Entities, z.Length, out a);
48-
}
41+
DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr);
42+
y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
43+
DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
44+
y_gpu.CopyToHost(z.Entities, z.Length, out a);
4945
}
5046
}
5147

5248
/// <inheritdoc/>
53-
public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
49+
public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
5450
{
55-
fixed (float* pw = Weights)
51+
using (DeviceMemory<float>
52+
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
53+
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
54+
z_gpu = DnnInstance.Gpu.AllocateDevice(z))
5655
{
57-
Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
58-
using (DeviceMemory<float>
59-
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
60-
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
61-
z_gpu = DnnInstance.Gpu.AllocateDevice(z))
62-
{
63-
DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime);
64-
z_gpu.CopyTo(z);
65-
}
56+
DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime);
57+
z_gpu.CopyTo(z);
6658
}
6759
}
6860

NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,13 @@ public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
4242
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size))
4343
{
4444
// Linear pass
45-
fixed (float* pw = Weights)
45+
using (DeviceMemory<float>
46+
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
47+
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
48+
b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
4649
{
47-
Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
48-
using (DeviceMemory<float>
49-
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
50-
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
51-
b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
52-
{
53-
DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, z_gpu.Ptr);
54-
z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
55-
}
50+
DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, z_gpu.Ptr);
51+
z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
5652
}
5753

5854
// Activation

0 commit comments

Comments
 (0)