Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit d4d6f2e

Browse files
Merge pull request #223 from facebookresearch/autotuner-opts
Bump autotune default options, use small gen=2 for test and remove redundant tests
2 parents f2d7743 + 9d8a2ea commit d4d6f2e

File tree

2 files changed

+8
-85
lines changed

2 files changed

+8
-85
lines changed

tensor_comprehensions/tc_unit.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,14 @@
3030

3131

3232
# these are quick options for finishing autotuning
33-
autotuner_settings = {"threads": 32}
33+
autotuner_settings = {
34+
"threads": 32, "generations": 2, "pop_size": 5,
35+
}
3436

3537
# TC prunes autotuning for kernels which require < 256 threads. So to tune small
3638
# size kernels, we set the min kernel threads to 1
3739
small_sizes_autotuner_settings = {
38-
"threads": 32, "generations": 5, "tuner_min_launch_total_threads": 1,
40+
"threads": 32, "generations": 2, "pop_size": 5, "tuner_min_launch_total_threads": 1,
3941
}
4042

4143
###############################################################################
@@ -189,7 +191,7 @@ def __init__(self, tc_lang, **kwargs):
189191
self.set_autotuner_parameters(**kwargs)
190192

191193
def set_autotuner_parameters(
192-
self, pop_size=10, crossover_rate=80, mutation_rate=7, generations=2,
194+
self, pop_size=20, crossover_rate=80, mutation_rate=7, generations=10,
193195
number_elites=1, threads=8, gpus="0", proto="/tmp/tuner.txt",
194196
restore_from_proto=False, restore_number=10, log_generations=False,
195197
tuner_min_launch_total_threads=64, **kwargs

test_python/test_tc_torch.py

Lines changed: 3 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -191,55 +191,19 @@ def test_conv_backward_pass_options(self):
191191
###########################################################################
192192
class TestAutotuner(unittest.TestCase):
193193

194-
# For such small sizes, autotuner will probably not help.
195-
# NOTE: Use "--tuner_min_launch_total_threads=1" for running small sizes
196-
# tc.small_sizes_autotuner_settings has this option set already
197-
def test_autotuner_no_cache_small_size(self):
198-
lang = MATMUL_LANG
199-
matmul = tc.define(lang, name="matmul")
200-
mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda()
201-
options = matmul.autotune(mat1, mat2, **tc.small_sizes_autotuner_settings)
202-
203194
def test_autotuner_no_cache_medium_size(self):
204195
lang = MATMUL_LANG
205196
matmul = tc.define(lang, name="matmul")
206197
mat1, mat2 = torch.randn(72, 26).cuda(), torch.randn(26, 72).cuda()
207198
options = matmul.autotune(mat1, mat2, **tc.autotuner_settings)
208199

209-
def test_autotuner_no_cache(self):
210-
lang = MATMUL_LANG
211-
matmul = tc.define(lang, name="matmul")
212-
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
213-
options = matmul.autotune(mat1, mat2, **tc.autotuner_settings)
214-
215-
def test_autotuner_no_cache_explicit_set(self):
216-
lang = MATMUL_LANG
217-
matmul = tc.define(lang, name="matmul")
218-
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
219-
options = matmul.autotune(mat1, mat2, cache=False, **tc.autotuner_settings)
220-
221-
def test_autotuner_cache_to_default(self):
222-
lang = MATMUL_LANG
223-
matmul = tc.define(lang, name="matmul")
224-
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
225-
matmul.autotune(mat1, mat2, cache=True, **tc.autotuner_settings)
226-
227200
def test_autotuner_cachefile_first(self):
228201
cache_file = "{}/matmul_100_400_500".format(PATH_PREFIX) # use argparse if input from command line
229202
lang = MATMUL_LANG
230203
matmul = tc.define(lang, name="matmul")
231204
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
232205
matmul.autotune(mat1, mat2, cache=cache_file, **tc.autotuner_settings)
233206

234-
def test_autotuner_cachefile_load(self):
235-
lang = MATMUL_LANG
236-
cache_file = "{}/matmul_100_400_500".format(PATH_PREFIX) # use argparse if input from command line
237-
assert os.path.isfile("{}.cuda".format(cache_file)), "looks like the cache_file doesn't exist"
238-
239-
matmul = tc.define(lang, name="matmul")
240-
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
241-
out = matmul(mat1, mat2, cache=cache_file)
242-
243207
def test_autotuner_cachefile_load_automatic(self):
244208
lang = MATMUL_LANG
245209
cache_file = "{}/matmul_100_400_500".format(PATH_PREFIX) # use argparse if input from command line
@@ -252,28 +216,13 @@ def test_autotuner_cachefile_load_automatic(self):
252216
# already compiled earlier
253217
out2 = matmul(mat1, mat2)
254218

255-
def test_autotuner_no_cache_and_run_kernel(self):
256-
lang = MATMUL_LANG
257-
matmul = tc.define(lang, name="matmul")
258-
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
259-
options = matmul.autotune(mat1, mat2, **tc.autotuner_settings)
260-
out = matmul(mat1, mat2, options=options)
261-
262219
def test_autotuner_no_cache_and_run_kernel_automatic(self):
263220
lang = MATMUL_LANG
264221
matmul = tc.define(lang, name="matmul")
265222
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
266223
matmul.autotune(mat1, mat2, **tc.autotuner_settings)
267224
out = matmul(mat1, mat2)
268225

269-
def test_autotuner_start_options_and_run_kernel(self):
270-
lang = MATMUL_LANG
271-
matmul = tc.define(lang, name="matmul")
272-
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
273-
options = Options("mlp")
274-
best_options = matmul.autotune(mat1, mat2, cache=True, options=options, **tc.autotuner_settings)
275-
out = matmul(mat1, mat2, options=best_options)
276-
277226
def test_autotuner_multiple_tc(self):
278227
lang = MATMUL_ABS_LANG
279228
matmul = tc.define(lang, name="matmul")
@@ -286,35 +235,6 @@ def test_autotuner_multiple_tc(self):
286235
absolute.autotune(A, cache=True, **tc.autotuner_settings)
287236
out = absolute(A)
288237

289-
###########################################################################
290-
# Pass tuple inputs for autotuning
291-
###########################################################################
292-
def test_autotuner_tuple_size_no_cache(self):
293-
lang = MATMUL_LANG
294-
matmul = tc.define(lang, name="matmul")
295-
matmul.autotune((3, 4), (4, 5), **tc.small_sizes_autotuner_settings)
296-
matmul.autotune((100, 400), (400, 500), **tc.autotuner_settings)
297-
298-
def test_autotuner_tuple_size_cache_to_default(self):
299-
lang = MATMUL_LANG
300-
matmul = tc.define(lang, name="matmul")
301-
matmul.autotune((3, 4), (4, 5), cache=True, **tc.small_sizes_autotuner_settings)
302-
matmul.autotune((100, 400), (400, 500), cache=True, **tc.autotuner_settings)
303-
304-
def test_autotuner_tuple_size_cache_to_file_run_kernel(self):
305-
lang = MATMUL_LANG
306-
matmul = tc.define(lang, name="matmul")
307-
cache1 = "{}/matmul_3_4_5".format(PATH_PREFIX)
308-
cache2 = "{}/matmul_100_400_500".format(PATH_PREFIX)
309-
matmul.autotune((3, 4), (4, 5), cache=cache1, **tc.small_sizes_autotuner_settings)
310-
matmul.autotune((100, 400), (400, 500), cache=cache2, **tc.autotuner_settings)
311-
312-
mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda()
313-
out = matmul(mat1, mat2, cache=cache1)
314-
315-
mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
316-
out = matmul(mat1, mat2, cache=cache2)
317-
318238
##########################################################################
319239
# Training layer autotuning
320240
##########################################################################
@@ -331,8 +251,9 @@ def test_conv_train_autotune_no_cache_no_options_seed(self):
331251
convolution = tc.define(lang, training=True, name="convolution", backward="convolution_grad", constants={"sh":sH, "sw":sW})
332252
I, W = torch.randn(N, C, H, W).cuda(), torch.randn(O, C, kH, kW).cuda()
333253
convolution.autotune(I, W, **tc.autotuner_settings)
334-
# on the second call, autotuning will be seeded from previous best options
335-
convolution.autotune(I, W, **tc.autotuner_settings, generations=5, pop_size=20)
254+
# on the second call, autotuning will be seeded from previous best options,
255+
# verify the seeding and new tuning settings being picked up
256+
convolution.autotune(I, W, generations=3, pop_size=5)
336257

337258
def test_conv_train_autotune_cache_no_options_seed(self):
338259
lang = CONV_TRAIN

0 commit comments

Comments
 (0)