@@ -47,9 +47,7 @@ internal sealed class CuDnnConvolutionalLayer : ConvolutionalLayer
47
47
[ NotNull ]
48
48
private readonly Dnn DnnInstance = DnnService . Instance ;
49
49
50
- /// <summary>
51
- /// Sets the cuDNN fields that will be used during future forward/backwards operations
52
- /// </summary>
50
+ // cuDNN fields setup
53
51
private void SetupCuDnnInfo ( )
54
52
{
55
53
ConvolutionDescription . Set2D ( OperationInfo . VerticalPadding , OperationInfo . HorizontalPadding , OperationInfo . VerticalStride , OperationInfo . HorizontalStride , 1 , 1 , ( Alea . cuDNN . ConvolutionMode ) OperationInfo . Mode ) ;
@@ -74,71 +72,63 @@ public CuDnnConvolutionalLayer(
74
72
#region Implementation
75
73
76
74
/// <inheritdoc/>
77
- public override unsafe void Forward ( in Tensor x , out Tensor z , out Tensor a )
75
+ public override void Forward ( in Tensor x , out Tensor z , out Tensor a )
78
76
{
79
- fixed ( float * pw = Weights )
77
+ using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice < float > ( x . Entities * OutputInfo . Size ) )
80
78
{
81
- Tensor . Reshape ( pw , OutputInfo . Channels , KernelInfo . Size , out Tensor wTensor ) ;
82
- using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice < float > ( x . Entities * OutputInfo . Size ) )
79
+ // Tensors info setup
80
+ InputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , InputInfo . Channels , InputInfo . Height , InputInfo . Width ) ;
81
+ OutputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , OutputInfo . Channels , OutputInfo . Height , OutputInfo . Width ) ;
82
+
83
+ // Forward convolution
84
+ DnnInstance . GetConvolutionForwardAlgorithm ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , ConvolutionFwdPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionFwdAlgo algorithm ) ;
85
+ DnnInstance . GetConvolutionForwardWorkspaceSize ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , algorithm , out IntPtr size ) ;
86
+ using ( DeviceMemory < float >
87
+ x_gpu = DnnInstance . Gpu . AllocateDevice ( x ) ,
88
+ w_gpu = DnnInstance . Gpu . AllocateDevice ( Weights ) )
89
+ using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
83
90
{
84
- // Tensors info setup
85
- InputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , InputInfo . Channels , InputInfo . Height , InputInfo . Width ) ;
86
- OutputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , OutputInfo . Channels , OutputInfo . Height , OutputInfo . Width ) ;
87
-
88
- // Forward convolution
89
- DnnInstance . GetConvolutionForwardAlgorithm ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , ConvolutionFwdPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionFwdAlgo algorithm ) ;
90
- DnnInstance . GetConvolutionForwardWorkspaceSize ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , algorithm , out IntPtr size ) ;
91
- using ( DeviceMemory < float >
92
- x_gpu = DnnInstance . Gpu . AllocateDevice ( x ) ,
93
- w_gpu = DnnInstance . Gpu . AllocateDevice ( wTensor ) )
94
- using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
95
- {
96
- DnnInstance . ConvolutionForward ( 1 , InputDescription , x_gpu . Ptr , FilterDescription , w_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , OutputDescription , z_gpu . Ptr ) ;
97
- }
91
+ DnnInstance . ConvolutionForward ( 1 , InputDescription , x_gpu . Ptr , FilterDescription , w_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , OutputDescription , z_gpu . Ptr ) ;
92
+ }
98
93
99
- // Biases
100
- using ( DeviceMemory < float > b_gpu = DnnInstance . Gpu . AllocateDevice ( Biases ) )
101
- {
102
- DnnInstance . AddTensor ( 1 , BiasDescription , b_gpu . Ptr , 1 , OutputDescription , z_gpu . Ptr ) ;
103
- }
104
- z_gpu . CopyToHost ( x . Entities , OutputInfo . Size , out z ) ;
94
+ // Biases
95
+ using ( DeviceMemory < float > b_gpu = DnnInstance . Gpu . AllocateDevice ( Biases ) )
96
+ {
97
+ DnnInstance . AddTensor ( 1 , BiasDescription , b_gpu . Ptr , 1 , OutputDescription , z_gpu . Ptr ) ;
98
+ }
99
+ z_gpu . CopyToHost ( x . Entities , OutputInfo . Size , out z ) ;
105
100
106
- // Activation
107
- if ( ActivationFunctionType == ActivationFunctionType . Identity ) z . Duplicate ( out a ) ;
108
- else
109
- {
110
- DnnInstance . ActivationForward ( z . Entities , z . Length , z_gpu . Ptr , z_gpu . Ptr , ActivationFunctions . Activation ) ;
111
- z_gpu . CopyToHost ( z . Entities , z . Length , out a ) ;
112
- }
101
+ // Activation
102
+ if ( ActivationFunctionType == ActivationFunctionType . Identity ) z . Duplicate ( out a ) ;
103
+ else
104
+ {
105
+ DnnInstance . ActivationForward ( z . Entities , z . Length , z_gpu . Ptr , z_gpu . Ptr , ActivationFunctions . Activation ) ;
106
+ z_gpu . CopyToHost ( z . Entities , z . Length , out a ) ;
113
107
}
114
108
}
115
109
}
116
110
117
111
/// <inheritdoc/>
118
- public override unsafe void Backpropagate ( in Tensor delta_1 , in Tensor z , ActivationFunction activationPrime )
112
+ public override void Backpropagate ( in Tensor delta_1 , in Tensor z , ActivationFunction activationPrime )
119
113
{
120
- fixed ( float * pw = Weights )
114
+ using ( DeviceMemory < float > delta_gpu = DnnInstance . Gpu . AllocateDevice < float > ( z . Size ) )
121
115
{
122
- Tensor . Reshape ( pw , OutputInfo . Channels , KernelInfo . Size , out Tensor wTensor ) ;
116
+ // Convolution
123
117
DnnInstance . GetConvolutionBackwardDataAlgorithm ( FilterDescription , OutputDescription , ConvolutionDescription , InputDescription , ConvolutionBwdDataPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionBwdDataAlgo algorithm ) ;
124
118
DnnInstance . GetConvolutionBackwardDataWorkspaceSize ( FilterDescription , OutputDescription , ConvolutionDescription , InputDescription , algorithm , out IntPtr size ) ;
125
- using ( DeviceMemory < float > delta_gpu = DnnInstance . Gpu . AllocateDevice < float > ( z . Size ) )
119
+ using ( DeviceMemory < float >
120
+ delta_1_gpu = DnnInstance . Gpu . AllocateDevice ( delta_1 ) ,
121
+ w_gpu = DnnInstance . Gpu . AllocateDevice ( Weights ) )
122
+ using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
126
123
{
127
- // Backwards convolution
128
- using ( DeviceMemory < float >
129
- delta_1_gpu = DnnInstance . Gpu . AllocateDevice ( delta_1 ) ,
130
- w_gpu = DnnInstance . Gpu . AllocateDevice ( wTensor ) )
131
- using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
132
- {
133
- DnnInstance . ConvolutionBackwardData ( 1 , FilterDescription , w_gpu . Ptr , OutputDescription , delta_1_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , InputDescription , delta_gpu . Ptr ) ;
134
- }
124
+ DnnInstance . ConvolutionBackwardData ( 1 , FilterDescription , w_gpu . Ptr , OutputDescription , delta_1_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , InputDescription , delta_gpu . Ptr ) ;
125
+ }
135
126
136
- // Activation
137
- using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice ( z ) )
138
- {
139
- DnnInstance . ActivationBackward ( z . Entities , z . Length , z_gpu . Ptr , delta_gpu . Ptr , activationPrime ) ;
140
- z_gpu . CopyTo ( z ) ;
141
- }
127
+ // Activation
128
+ using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice ( z ) )
129
+ {
130
+ DnnInstance . ActivationBackward ( z . Entities , z . Length , z_gpu . Ptr , delta_gpu . Ptr , activationPrime ) ;
131
+ z_gpu . CopyTo ( z ) ;
142
132
}
143
133
}
144
134
}
@@ -159,7 +149,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
159
149
{
160
150
DnnInstance . ConvolutionBackwardFilter ( 1 , InputDescription , a_gpu . Ptr , OutputDescription , delta_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , FilterDescription , w_gpu . Ptr ) ;
161
151
}
162
- w_gpu . CopyToHost ( Kernels , KernelInfo . Size , out dJdw ) ;
152
+ w_gpu . CopyToHost ( 1 , Weights . Length , out dJdw ) ;
163
153
}
164
154
165
155
// Bias
0 commit comments