@@ -191,55 +191,19 @@ def test_conv_backward_pass_options(self):
191
191
###########################################################################
192
192
class TestAutotuner (unittest .TestCase ):
193
193
194
- # For such small sizes, autotuner will probably not help.
195
- # NOTE: Use "--tuner_min_launch_total_threads=1" for running small sizes
196
- # tc.small_sizes_autotuner_settings has this option set already
197
- def test_autotuner_no_cache_small_size (self ):
198
- lang = MATMUL_LANG
199
- matmul = tc .define (lang , name = "matmul" )
200
- mat1 , mat2 = torch .randn (3 , 4 ).cuda (), torch .randn (4 , 5 ).cuda ()
201
- options = matmul .autotune (mat1 , mat2 , ** tc .small_sizes_autotuner_settings )
202
-
203
194
def test_autotuner_no_cache_medium_size (self ):
204
195
lang = MATMUL_LANG
205
196
matmul = tc .define (lang , name = "matmul" )
206
197
mat1 , mat2 = torch .randn (72 , 26 ).cuda (), torch .randn (26 , 72 ).cuda ()
207
198
options = matmul .autotune (mat1 , mat2 , ** tc .autotuner_settings )
208
199
209
- def test_autotuner_no_cache (self ):
210
- lang = MATMUL_LANG
211
- matmul = tc .define (lang , name = "matmul" )
212
- mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
213
- options = matmul .autotune (mat1 , mat2 , ** tc .autotuner_settings )
214
-
215
- def test_autotuner_no_cache_explicit_set (self ):
216
- lang = MATMUL_LANG
217
- matmul = tc .define (lang , name = "matmul" )
218
- mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
219
- options = matmul .autotune (mat1 , mat2 , cache = False , ** tc .autotuner_settings )
220
-
221
- def test_autotuner_cache_to_default (self ):
222
- lang = MATMUL_LANG
223
- matmul = tc .define (lang , name = "matmul" )
224
- mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
225
- matmul .autotune (mat1 , mat2 , cache = True , ** tc .autotuner_settings )
226
-
227
200
def test_autotuner_cachefile_first (self ):
228
201
cache_file = "{}/matmul_100_400_500" .format (PATH_PREFIX ) # use argparse if input from command line
229
202
lang = MATMUL_LANG
230
203
matmul = tc .define (lang , name = "matmul" )
231
204
mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
232
205
matmul .autotune (mat1 , mat2 , cache = cache_file , ** tc .autotuner_settings )
233
206
234
- def test_autotuner_cachefile_load (self ):
235
- lang = MATMUL_LANG
236
- cache_file = "{}/matmul_100_400_500" .format (PATH_PREFIX ) # use argparse if input from command line
237
- assert os .path .isfile ("{}.cuda" .format (cache_file )), "looks like the cache_file doesn't exist"
238
-
239
- matmul = tc .define (lang , name = "matmul" )
240
- mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
241
- out = matmul (mat1 , mat2 , cache = cache_file )
242
-
243
207
def test_autotuner_cachefile_load_automatic (self ):
244
208
lang = MATMUL_LANG
245
209
cache_file = "{}/matmul_100_400_500" .format (PATH_PREFIX ) # use argparse if input from command line
@@ -252,28 +216,13 @@ def test_autotuner_cachefile_load_automatic(self):
252
216
# already compiled earlier
253
217
out2 = matmul (mat1 , mat2 )
254
218
255
- def test_autotuner_no_cache_and_run_kernel (self ):
256
- lang = MATMUL_LANG
257
- matmul = tc .define (lang , name = "matmul" )
258
- mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
259
- options = matmul .autotune (mat1 , mat2 , ** tc .autotuner_settings )
260
- out = matmul (mat1 , mat2 , options = options )
261
-
262
219
def test_autotuner_no_cache_and_run_kernel_automatic (self ):
263
220
lang = MATMUL_LANG
264
221
matmul = tc .define (lang , name = "matmul" )
265
222
mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
266
223
matmul .autotune (mat1 , mat2 , ** tc .autotuner_settings )
267
224
out = matmul (mat1 , mat2 )
268
225
269
- def test_autotuner_start_options_and_run_kernel (self ):
270
- lang = MATMUL_LANG
271
- matmul = tc .define (lang , name = "matmul" )
272
- mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
273
- options = Options ("mlp" )
274
- best_options = matmul .autotune (mat1 , mat2 , cache = True , options = options , ** tc .autotuner_settings )
275
- out = matmul (mat1 , mat2 , options = best_options )
276
-
277
226
def test_autotuner_multiple_tc (self ):
278
227
lang = MATMUL_ABS_LANG
279
228
matmul = tc .define (lang , name = "matmul" )
@@ -286,35 +235,6 @@ def test_autotuner_multiple_tc(self):
286
235
absolute .autotune (A , cache = True , ** tc .autotuner_settings )
287
236
out = absolute (A )
288
237
289
- ###########################################################################
290
- # Pass tuple inputs for autotuning
291
- ###########################################################################
292
- def test_autotuner_tuple_size_no_cache (self ):
293
- lang = MATMUL_LANG
294
- matmul = tc .define (lang , name = "matmul" )
295
- matmul .autotune ((3 , 4 ), (4 , 5 ), ** tc .small_sizes_autotuner_settings )
296
- matmul .autotune ((100 , 400 ), (400 , 500 ), ** tc .autotuner_settings )
297
-
298
- def test_autotuner_tuple_size_cache_to_default (self ):
299
- lang = MATMUL_LANG
300
- matmul = tc .define (lang , name = "matmul" )
301
- matmul .autotune ((3 , 4 ), (4 , 5 ), cache = True , ** tc .small_sizes_autotuner_settings )
302
- matmul .autotune ((100 , 400 ), (400 , 500 ), cache = True , ** tc .autotuner_settings )
303
-
304
- def test_autotuner_tuple_size_cache_to_file_run_kernel (self ):
305
- lang = MATMUL_LANG
306
- matmul = tc .define (lang , name = "matmul" )
307
- cache1 = "{}/matmul_3_4_5" .format (PATH_PREFIX )
308
- cache2 = "{}/matmul_100_400_500" .format (PATH_PREFIX )
309
- matmul .autotune ((3 , 4 ), (4 , 5 ), cache = cache1 , ** tc .small_sizes_autotuner_settings )
310
- matmul .autotune ((100 , 400 ), (400 , 500 ), cache = cache2 , ** tc .autotuner_settings )
311
-
312
- mat1 , mat2 = torch .randn (3 , 4 ).cuda (), torch .randn (4 , 5 ).cuda ()
313
- out = matmul (mat1 , mat2 , cache = cache1 )
314
-
315
- mat1 , mat2 = torch .randn (100 , 400 ).cuda (), torch .randn (400 , 500 ).cuda ()
316
- out = matmul (mat1 , mat2 , cache = cache2 )
317
-
318
238
##########################################################################
319
239
# Training layer autotuning
320
240
##########################################################################
@@ -331,8 +251,9 @@ def test_conv_train_autotune_no_cache_no_options_seed(self):
331
251
convolution = tc .define (lang , training = True , name = "convolution" , backward = "convolution_grad" , constants = {"sh" :sH , "sw" :sW })
332
252
I , W = torch .randn (N , C , H , W ).cuda (), torch .randn (O , C , kH , kW ).cuda ()
333
253
convolution .autotune (I , W , ** tc .autotuner_settings )
334
- # on the second call, autotuning will be seeded from previous best options
335
- convolution .autotune (I , W , ** tc .autotuner_settings , generations = 5 , pop_size = 20 )
254
+ # on the second call, autotuning will be seeded from previous best options,
255
+ # verify the seeding and new tuning settings being picked up
256
+ convolution .autotune (I , W , generations = 3 , pop_size = 5 )
336
257
337
258
def test_conv_train_autotune_cache_no_options_seed (self ):
338
259
lang = CONV_TRAIN
0 commit comments