Skip to content

Commit bfa35a7

Browse files
authored
Merge pull request #49 from Sergio0694/master
Latest changes
2 parents 7851e01 + 2d56d11 commit bfa35a7

24 files changed

+1646
-118
lines changed

NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs renamed to NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,18 @@ public static INetworkLayer Convolutional(
6767
[PublicAPI]
6868
[Pure, NotNull]
6969
public static INetworkLayer Pooling(in TensorInfo input, in PoolingInfo info, ActivationFunctionType activation) => new CuDnnPoolingLayer(input, info, activation);
70+
71+
/// <summary>
72+
/// Creates a new inception layer with the given input and features
73+
/// </summary>
74+
/// <param name="input">The input volume to process</param>
75+
/// <param name="info">The info on the operations to execute inside the layer</param>
76+
/// <param name="biasMode">Indicates the desired initialization mode to use for the layer bias values</param>
77+
[PublicAPI]
78+
[Pure, NotNull]
79+
public static INetworkLayer Inception(
80+
in TensorInfo input, in InceptionInfo info,
81+
BiasInitializationMode biasMode = BiasInitializationMode.Zero)
82+
=> new CuDnnInceptionLayer(input, info, biasMode);
7083
}
7184
}

NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ private static INetworkLayer Deserialize([NotNull] Stream stream, LayerType type
3131
case LayerType.Convolutional: return CuDnnConvolutionalLayer.Deserialize(stream);
3232
case LayerType.Pooling: return CuDnnPoolingLayer.Deserialize(stream);
3333
case LayerType.Softmax: return CuDnnSoftmaxLayer.Deserialize(stream);
34+
case LayerType.Inception: return CuDnnInceptionLayer.Deserialize(stream);
3435
default: return null;
3536
}
3637
}

NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,39 @@ public static DeviceMemory<float> AllocateDevice([NotNull] this Gpu gpu, in Tens
2727
: throw new InvalidOperationException($"Failed to copy the source data on the target GPU device, [CUDA ERROR] {result}");
2828
}
2929

30+
/// <summary>
31+
/// Allocates a memory area on device memory, reading the target values at a given offset from the input <see cref="Tensor"/>
32+
/// </summary>
33+
/// <param name="gpu">The <see cref="Gpu"/> device to use</param>
34+
/// <param name="source">The source <see cref="Tensor"/> with the data to copy</param>
35+
/// <param name="offset">The column offset for the data to read from each row</param>
36+
/// <param name="length"></param>
37+
[MustUseReturnValue, NotNull]
38+
public static unsafe DeviceMemory<float> AllocateDevice([NotNull] this Gpu gpu, in Tensor source, int offset, int length)
39+
{
40+
// Checks
41+
if (source.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid");
42+
43+
// Memory copy
44+
DeviceMemory<float> result_gpu = gpu.AllocateDevice<float>(source.Entities * length);
45+
CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1];
46+
ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st
47+
{
48+
srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST,
49+
srcHost = source.Ptr + sizeof(float) * offset,
50+
srcPitch = new IntPtr(sizeof(float) * source.Length),
51+
dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE,
52+
dstDevice = result_gpu.Handle,
53+
dstPitch = new IntPtr(sizeof(float) * length),
54+
WidthInBytes = new IntPtr(sizeof(float) * length),
55+
Height = new IntPtr(source.Entities)
56+
};
57+
CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt);
58+
return result == CUDAInterop.cudaError_enum.CUDA_SUCCESS
59+
? result_gpu
60+
: throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
61+
}
62+
3063
/// <summary>
3164
/// Copies the contents of the input <see cref="DeviceMemory{T}"/> instance to the target host memory area
3265
/// </summary>
@@ -40,6 +73,37 @@ public static void CopyTo([NotNull] this DeviceMemory<float> source, in Tensor d
4073
throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
4174
}
4275

76+
/// <summary>
77+
/// Copies the source data into the target <see cref="Tensor"/>, splitting each individual entry into its own row
78+
/// </summary>
79+
/// <param name="source">The source memory area with the concatenated data for each entry</param>
80+
/// <param name="destination">The destination <see cref="Tensor"/> that will store the data</param>
81+
/// <param name="offset">The column offset for the data for each entry</param>
82+
/// <param name="length">The number of values to copy for each entry</param>
83+
public static unsafe void CopyTo([NotNull] this DeviceMemory<float> source, in Tensor destination, int offset, int length)
84+
{
85+
// Checks
86+
if (source.Length / length != destination.Entities) throw new ArgumentOutOfRangeException(nameof(length), "The input length doesn't match the given arguments");
87+
if (destination.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid");
88+
89+
// Memory copy
90+
CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1];
91+
ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st
92+
{
93+
srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE,
94+
srcDevice = source.Handle,
95+
srcPitch = new IntPtr(sizeof(float) * length),
96+
dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST,
97+
dstHost = destination.Ptr + sizeof(float) * offset,
98+
dstPitch = new IntPtr(sizeof(float) * destination.Length),
99+
WidthInBytes = new IntPtr(sizeof(float) * length),
100+
Height = new IntPtr(destination.Entities)
101+
};
102+
CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt);
103+
if (result != CUDAInterop.cudaError_enum.CUDA_SUCCESS)
104+
throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
105+
}
106+
43107
/// <summary>
44108
/// Copies the contents of the input <see cref="DeviceMemory{T}"/> to a new memory area on the unmanaged heap
45109
/// </summary>

NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ internal sealed class CuDnnConvolutionalLayer : ConvolutionalLayer
4747
[NotNull]
4848
private readonly Dnn DnnInstance = DnnService.Instance;
4949

50-
/// <summary>
51-
/// Sets the cuDNN fields that will be used during future forward/backwards operations
52-
/// </summary>
50+
// cuDNN fields setup
5351
private void SetupCuDnnInfo()
5452
{
5553
ConvolutionDescription.Set2D(OperationInfo.VerticalPadding, OperationInfo.HorizontalPadding, OperationInfo.VerticalStride, OperationInfo.HorizontalStride, 1, 1, (Alea.cuDNN.ConvolutionMode)OperationInfo.Mode);
@@ -151,7 +149,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
151149
{
152150
DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, OutputDescription, delta_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, FilterDescription, w_gpu.Ptr);
153151
}
154-
w_gpu.CopyToHost(Kernels, KernelInfo.Size, out dJdw);
152+
w_gpu.CopyToHost(1, Weights.Length, out dJdw);
155153
}
156154

157155
// Bias

NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
6767
w_gpu = DnnInstance.Gpu.AllocateDevice<float>(a.Length * delta.Length))
6868
{
6969
DnnInstance.FullyConnectedBackwardFilter(a.Entities, a.Length, delta.Length, a_gpu.Ptr, delta_gpu.Ptr, w_gpu.Ptr);
70-
w_gpu.CopyToHost(a.Length, delta.Length, out dJdw);
70+
w_gpu.CopyToHost(1, Weights.Length, out dJdw);
7171
}
7272
delta.CompressVertically(out dJdb); // Doing this on CPU is generally faster than launching the kernels
7373
}

0 commit comments

Comments
 (0)