@@ -74,71 +74,63 @@ public CuDnnConvolutionalLayer(
74
74
#region Implementation
75
75
76
76
/// <inheritdoc/>
77
- public override unsafe void Forward ( in Tensor x , out Tensor z , out Tensor a )
77
+ public override void Forward ( in Tensor x , out Tensor z , out Tensor a )
78
78
{
79
- fixed ( float * pw = Weights )
79
+ using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice < float > ( x . Entities * OutputInfo . Size ) )
80
80
{
81
- Tensor . Reshape ( pw , OutputInfo . Channels , KernelInfo . Size , out Tensor wTensor ) ;
82
- using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice < float > ( x . Entities * OutputInfo . Size ) )
81
+ // Tensors info setup
82
+ InputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , InputInfo . Channels , InputInfo . Height , InputInfo . Width ) ;
83
+ OutputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , OutputInfo . Channels , OutputInfo . Height , OutputInfo . Width ) ;
84
+
85
+ // Forward convolution
86
+ DnnInstance . GetConvolutionForwardAlgorithm ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , ConvolutionFwdPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionFwdAlgo algorithm ) ;
87
+ DnnInstance . GetConvolutionForwardWorkspaceSize ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , algorithm , out IntPtr size ) ;
88
+ using ( DeviceMemory < float >
89
+ x_gpu = DnnInstance . Gpu . AllocateDevice ( x ) ,
90
+ w_gpu = DnnInstance . Gpu . AllocateDevice ( Weights ) )
91
+ using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
83
92
{
84
- // Tensors info setup
85
- InputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , InputInfo . Channels , InputInfo . Height , InputInfo . Width ) ;
86
- OutputDescription . Set4D ( DataType . FLOAT , TensorFormat . CUDNN_TENSOR_NCHW , x . Entities , OutputInfo . Channels , OutputInfo . Height , OutputInfo . Width ) ;
87
-
88
- // Forward convolution
89
- DnnInstance . GetConvolutionForwardAlgorithm ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , ConvolutionFwdPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionFwdAlgo algorithm ) ;
90
- DnnInstance . GetConvolutionForwardWorkspaceSize ( InputDescription , FilterDescription , ConvolutionDescription , OutputDescription , algorithm , out IntPtr size ) ;
91
- using ( DeviceMemory < float >
92
- x_gpu = DnnInstance . Gpu . AllocateDevice ( x ) ,
93
- w_gpu = DnnInstance . Gpu . AllocateDevice ( wTensor ) )
94
- using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
95
- {
96
- DnnInstance . ConvolutionForward ( 1 , InputDescription , x_gpu . Ptr , FilterDescription , w_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , OutputDescription , z_gpu . Ptr ) ;
97
- }
93
+ DnnInstance . ConvolutionForward ( 1 , InputDescription , x_gpu . Ptr , FilterDescription , w_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , OutputDescription , z_gpu . Ptr ) ;
94
+ }
98
95
99
- // Biases
100
- using ( DeviceMemory < float > b_gpu = DnnInstance . Gpu . AllocateDevice ( Biases ) )
101
- {
102
- DnnInstance . AddTensor ( 1 , BiasDescription , b_gpu . Ptr , 1 , OutputDescription , z_gpu . Ptr ) ;
103
- }
104
- z_gpu . CopyToHost ( x . Entities , OutputInfo . Size , out z ) ;
96
+ // Biases
97
+ using ( DeviceMemory < float > b_gpu = DnnInstance . Gpu . AllocateDevice ( Biases ) )
98
+ {
99
+ DnnInstance . AddTensor ( 1 , BiasDescription , b_gpu . Ptr , 1 , OutputDescription , z_gpu . Ptr ) ;
100
+ }
101
+ z_gpu . CopyToHost ( x . Entities , OutputInfo . Size , out z ) ;
105
102
106
- // Activation
107
- if ( ActivationFunctionType == ActivationFunctionType . Identity ) z . Duplicate ( out a ) ;
108
- else
109
- {
110
- DnnInstance . ActivationForward ( z . Entities , z . Length , z_gpu . Ptr , z_gpu . Ptr , ActivationFunctions . Activation ) ;
111
- z_gpu . CopyToHost ( z . Entities , z . Length , out a ) ;
112
- }
103
+ // Activation
104
+ if ( ActivationFunctionType == ActivationFunctionType . Identity ) z . Duplicate ( out a ) ;
105
+ else
106
+ {
107
+ DnnInstance . ActivationForward ( z . Entities , z . Length , z_gpu . Ptr , z_gpu . Ptr , ActivationFunctions . Activation ) ;
108
+ z_gpu . CopyToHost ( z . Entities , z . Length , out a ) ;
113
109
}
114
110
}
115
111
}
116
112
117
113
/// <inheritdoc/>
118
- public override unsafe void Backpropagate ( in Tensor delta_1 , in Tensor z , ActivationFunction activationPrime )
114
+ public override void Backpropagate ( in Tensor delta_1 , in Tensor z , ActivationFunction activationPrime )
119
115
{
120
- fixed ( float * pw = Weights )
116
+ using ( DeviceMemory < float > delta_gpu = DnnInstance . Gpu . AllocateDevice < float > ( z . Size ) )
121
117
{
122
- Tensor . Reshape ( pw , OutputInfo . Channels , KernelInfo . Size , out Tensor wTensor ) ;
118
+ // Convolution
123
119
DnnInstance . GetConvolutionBackwardDataAlgorithm ( FilterDescription , OutputDescription , ConvolutionDescription , InputDescription , ConvolutionBwdDataPreference . PREFER_FASTEST , IntPtr . Zero , out ConvolutionBwdDataAlgo algorithm ) ;
124
120
DnnInstance . GetConvolutionBackwardDataWorkspaceSize ( FilterDescription , OutputDescription , ConvolutionDescription , InputDescription , algorithm , out IntPtr size ) ;
125
- using ( DeviceMemory < float > delta_gpu = DnnInstance . Gpu . AllocateDevice < float > ( z . Size ) )
121
+ using ( DeviceMemory < float >
122
+ delta_1_gpu = DnnInstance . Gpu . AllocateDevice ( delta_1 ) ,
123
+ w_gpu = DnnInstance . Gpu . AllocateDevice ( Weights ) )
124
+ using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
126
125
{
127
- // Backwards convolution
128
- using ( DeviceMemory < float >
129
- delta_1_gpu = DnnInstance . Gpu . AllocateDevice ( delta_1 ) ,
130
- w_gpu = DnnInstance . Gpu . AllocateDevice ( wTensor ) )
131
- using ( DeviceMemory < byte > workspace_gpu = DnnInstance . Gpu . AllocateDevice < byte > ( size ) )
132
- {
133
- DnnInstance . ConvolutionBackwardData ( 1 , FilterDescription , w_gpu . Ptr , OutputDescription , delta_1_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , InputDescription , delta_gpu . Ptr ) ;
134
- }
126
+ DnnInstance . ConvolutionBackwardData ( 1 , FilterDescription , w_gpu . Ptr , OutputDescription , delta_1_gpu . Ptr , ConvolutionDescription , algorithm , workspace_gpu . Ptr , size , 0 , InputDescription , delta_gpu . Ptr ) ;
127
+ }
135
128
136
- // Activation
137
- using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice ( z ) )
138
- {
139
- DnnInstance . ActivationBackward ( z . Entities , z . Length , z_gpu . Ptr , delta_gpu . Ptr , activationPrime ) ;
140
- z_gpu . CopyTo ( z ) ;
141
- }
129
+ // Activation
130
+ using ( DeviceMemory < float > z_gpu = DnnInstance . Gpu . AllocateDevice ( z ) )
131
+ {
132
+ DnnInstance . ActivationBackward ( z . Entities , z . Length , z_gpu . Ptr , delta_gpu . Ptr , activationPrime ) ;
133
+ z_gpu . CopyTo ( z ) ;
142
134
}
143
135
}
144
136
}
0 commit comments