@@ -172,52 +172,70 @@ end
172
172
const cudnnConvolutionFwdAlgoPerfCache = Dict {Tuple,cudnnConvolutionFwdAlgoPerf_t} ()
173
173
const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock ()
174
174
function cudnnConvolutionFwdAlgoPerf (xDesc, x, wDesc, w, convDesc, yDesc, y, biasDesc, activation)
175
- lock (cudnnConvolutionFwdAlgoPerfCacheLock) do
176
- get! (cudnnConvolutionFwdAlgoPerfCache, (xDesc, wDesc, convDesc, biasDesc, activation)) do
177
- requestedAlgoCount = Int (CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
178
- returnedAlgoCount = Cint[0 ]
179
- perfResults = Array {cudnnConvolutionFwdAlgoPerf_t} (undef,requestedAlgoCount)
180
- workspaceSize () = cudnnFindConvolutionAlgorithmWorkspaceSize (x)
181
- with_workspace (workspaceSize) do workspace
182
- cudnnFindConvolutionForwardAlgorithmEx (handle (),xDesc,x,wDesc,w,convDesc,yDesc,y,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof (workspace))
183
- end
184
- cudnnConvolutionAlgoPerfChoose (perfResults, returnedAlgoCount[1 ])
175
+ key = (xDesc, wDesc, convDesc, biasDesc, activation)
176
+ val = lock (cudnnConvolutionFwdAlgoPerfCacheLock) do
177
+ get (cudnnConvolutionFwdAlgoPerfCache, key, nothing )
178
+ end
179
+ if val === nothing
180
+ requestedAlgoCount = Int (CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
181
+ returnedAlgoCount = Cint[0 ]
182
+ perfResults = Array {cudnnConvolutionFwdAlgoPerf_t} (undef,requestedAlgoCount)
183
+ workspaceSize () = cudnnFindConvolutionAlgorithmWorkspaceSize (x)
184
+ with_workspace (workspaceSize) do workspace
185
+ cudnnFindConvolutionForwardAlgorithmEx (handle (),xDesc,x,wDesc,w,convDesc,yDesc,y,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof (workspace))
186
+ end
187
+ val = cudnnConvolutionAlgoPerfChoose (perfResults, returnedAlgoCount[1 ])
188
+ lock (cudnnConvolutionFwdAlgoPerfCacheLock) do
189
+ cudnnConvolutionFwdAlgoPerfCache[key] = val
185
190
end
186
191
end
192
+ return val
187
193
end
188
194
189
195
const cudnnConvolutionBwdDataAlgoPerfCache = Dict {Tuple,cudnnConvolutionBwdDataAlgoPerf_t} ()
190
196
const cudnnConvolutionBwdDataAlgoPerfCacheLock = ReentrantLock ()
191
197
function cudnnConvolutionBwdDataAlgoPerf (wDesc, w, dyDesc, dy, convDesc, dxDesc, dx)
192
- lock (cudnnConvolutionBwdDataAlgoPerfCacheLock) do
193
- get! (cudnnConvolutionBwdDataAlgoPerfCache, (wDesc, dyDesc, convDesc)) do
194
- requestedAlgoCount = Int (CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT)
195
- returnedAlgoCount = Cint[0 ]
196
- perfResults = Array {cudnnConvolutionBwdDataAlgoPerf_t} (undef,requestedAlgoCount)
197
- workspaceSize () = cudnnFindConvolutionAlgorithmWorkspaceSize (dx)
198
- with_workspace (workspaceSize) do workspace
199
- cudnnFindConvolutionBackwardDataAlgorithmEx (handle (),wDesc,w,dyDesc,dy,convDesc,dxDesc,dx,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof (workspace))
200
- end
201
- cudnnConvolutionAlgoPerfChoose (perfResults, returnedAlgoCount[1 ])
198
+ key = (wDesc, dyDesc, convDesc)
199
+ val = lock (cudnnConvolutionBwdDataAlgoPerfCacheLock) do
200
+ get (cudnnConvolutionBwdDataAlgoPerfCache, key, nothing )
201
+ end
202
+ if val === nothing
203
+ requestedAlgoCount = Int (CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT)
204
+ returnedAlgoCount = Cint[0 ]
205
+ perfResults = Array {cudnnConvolutionBwdDataAlgoPerf_t} (undef,requestedAlgoCount)
206
+ workspaceSize () = cudnnFindConvolutionAlgorithmWorkspaceSize (dx)
207
+ with_workspace (workspaceSize) do workspace
208
+ cudnnFindConvolutionBackwardDataAlgorithmEx (handle (),wDesc,w,dyDesc,dy,convDesc,dxDesc,dx,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof (workspace))
209
+ end
210
+ val = cudnnConvolutionAlgoPerfChoose (perfResults, returnedAlgoCount[1 ])
211
+ lock (cudnnConvolutionBwdDataAlgoPerfCacheLock) do
212
+ cudnnConvolutionBwdDataAlgoPerfCache[key] = val
202
213
end
203
214
end
215
+ val
204
216
end
205
217
206
218
const cudnnConvolutionBwdFilterAlgoPerfCache = Dict {Tuple,cudnnConvolutionBwdFilterAlgoPerf_t} ()
207
219
const cudnnConvolutionBwdFilterAlgoPerfCacheLock = ReentrantLock ()
208
220
function cudnnConvolutionBwdFilterAlgoPerf (xDesc, x, dyDesc, dy, convDesc, dwDesc, dw)
209
- lock (cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
210
- get! (cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc)) do
211
- requestedAlgoCount = Int (CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT)
212
- returnedAlgoCount = Cint[0 ]
213
- perfResults = Array {cudnnConvolutionBwdFilterAlgoPerf_t} (undef,requestedAlgoCount)
214
- workspaceSize () = cudnnFindConvolutionAlgorithmWorkspaceSize (x)
215
- with_workspace (workspaceSize) do workspace
216
- cudnnFindConvolutionBackwardFilterAlgorithmEx (handle (),xDesc,x,dyDesc,dy,convDesc,dwDesc,dw,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof (workspace))
217
- end
218
- cudnnConvolutionAlgoPerfChoose (perfResults, returnedAlgoCount[1 ])
221
+ key = (xDesc, dyDesc, convDesc)
222
+ val = lock (cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
223
+ get (cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc), nothing )
224
+ end
225
+ if val === nothing
226
+ requestedAlgoCount = Int (CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT)
227
+ returnedAlgoCount = Cint[0 ]
228
+ perfResults = Array {cudnnConvolutionBwdFilterAlgoPerf_t} (undef,requestedAlgoCount)
229
+ workspaceSize () = cudnnFindConvolutionAlgorithmWorkspaceSize (x)
230
+ with_workspace (workspaceSize) do workspace
231
+ cudnnFindConvolutionBackwardFilterAlgorithmEx (handle (),xDesc,x,dyDesc,dy,convDesc,dwDesc,dw,requestedAlgoCount,returnedAlgoCount,perfResults,workspace,sizeof (workspace))
232
+ end
233
+ val = cudnnConvolutionAlgoPerfChoose (perfResults, returnedAlgoCount[1 ])
234
+ lock (cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
235
+ cudnnConvolutionBwdFilterAlgoPerfCache[key] = val
219
236
end
220
237
end
238
+ val
221
239
end
222
240
223
241
0 commit comments