@@ -120,6 +120,28 @@ function cudnnConvolutionForwardAD(w, x, bias, z; y, activation, convDesc, wDesc
120
120
return y
121
121
end
122
122
123
+ function cudnnGetConvolutionDescriptor (d:: cudnnConvolutionDescriptor )
124
+ # we don't know the dimension of the convolution, so we start by
125
+ # allocating the maximum size it can be.
126
+ nbDimsRequested = CUDNN_DIM_MAX - 2
127
+ # later, here we get the actual dimensionality of the convolution
128
+ arrlen = Ref {Cint} (nbDimsRequested)
129
+ padding = Array {Cint} (undef, nbDimsRequested)
130
+ stride = Array {Cint} (undef, nbDimsRequested)
131
+ dilation = Array {Cint} (undef, nbDimsRequested)
132
+ mode = Ref {cuDNN.cudnnConvolutionMode_t} (CUDNN_CONVOLUTION)
133
+ dataType = Ref {cuDNN.cudnnDataType_t} (cuDNN. CUDNN_DATA_FLOAT)
134
+
135
+ cudnnGetConvolutionNdDescriptor (d, nbDimsRequested, arrlen, padding, stride, dilation,
136
+ mode, dataType)
137
+ T = juliaDataType (dataType[])
138
+ SZ = arrlen[]
139
+ P = (padding[1 : SZ]. .. , )
140
+ S = (stride[1 : SZ]. .. , )
141
+ D = (dilation[1 : SZ]. .. , )
142
+ return T, mode[], SZ, P, S, D
143
+ end
144
+
123
145
# Helper for cudnnConvolutionDescriptor
124
146
function cudnnSetConvolutionDescriptor (
125
147
ptr:: cudnnConvolutionDescriptor_t ,
@@ -179,9 +201,15 @@ const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock()
179
201
It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
180
202
"""
181
203
function cudnnConvolutionFwdAlgoPerf (xDesc, x, wDesc, w, convDesc, yDesc, y, biasDesc, activation, allocateTmpBuf= true )
182
- key = (xDesc, wDesc, convDesc, biasDesc, activation)
204
+ xDesc_native = cudnnGetTensorDescriptor (xDesc)
205
+ wDesc_native = cudnnGetFilterDescriptor (wDesc)
206
+ convDesc_native = cudnnGetConvolutionDescriptor (convDesc)
207
+ biasDesc_native = (isnothing (biasDesc) ? nothing
208
+ : cudnnGetTensorDescriptor (biasDesc))
209
+
210
+ key = (xDesc_native, wDesc_native, convDesc_native, biasDesc, activation)
183
211
val = lock (cudnnConvolutionFwdAlgoPerfCacheLock) do
184
- get (cudnnConvolutionFwdAlgoPerfCache, key, nothing )
212
+ get (cudnnConvolutionFwdAlgoPerfCache, key, nothing )
185
213
end
186
214
if val === nothing
187
215
requestedAlgoCount = Int (CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
@@ -210,7 +238,11 @@ const cudnnConvolutionBwdDataAlgoPerfCacheLock = ReentrantLock()
210
238
It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
211
239
"""
212
240
function cudnnConvolutionBwdDataAlgoPerf (wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, allocateTmpBuf= true )
213
- key = (wDesc, dyDesc, convDesc)
241
+ wDesc_native = cudnnGetFilterDescriptor (wDesc)
242
+ dyDesc_native = cudnnGetTensorDescriptor (dyDesc)
243
+ convDesc_native = cudnnGetConvolutionDescriptor (convDesc)
244
+
245
+ key = (wDesc_native, dyDesc_native, convDesc_native)
214
246
val = lock (cudnnConvolutionBwdDataAlgoPerfCacheLock) do
215
247
get (cudnnConvolutionBwdDataAlgoPerfCache, key, nothing )
216
248
end
@@ -241,7 +273,11 @@ const cudnnConvolutionBwdFilterAlgoPerfCacheLock = ReentrantLock()
241
273
It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
242
274
"""
243
275
function cudnnConvolutionBwdFilterAlgoPerf (xDesc, x, dyDesc, dy, convDesc, dwDesc, dw, allocateTmpBuf= true )
244
- key = (xDesc, dyDesc, convDesc)
276
+ xDesc_native = cudnnGetTensorDescriptor (xDesc)
277
+ dyDesc_native = cudnnGetTensorDescriptor (dyDesc)
278
+ convDesc_native = cudnnGetConvolutionDescriptor (convDesc)
279
+
280
+ key = (xDesc_native, dyDesc_native, convDesc_native)
245
281
val = lock (cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
246
282
get (cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc), nothing )
247
283
end
0 commit comments