Merge pull request #223 from facebookresearch/autotuner-opts

nicolasvasilache · web-flow · commit d4d6f2ebbf37 · 2018-03-28T15:57:47.000-04:00
Bump autotune default options, use small gen=2 for test and remove redundant tests
diff --git a/tensor_comprehensions/tc_unit.py b/tensor_comprehensions/tc_unit.py
@@ -30,12 +30,14 @@
 
 
 # these are quick options for finishing autotuning
-autotuner_settings = {"threads": 32}
+autotuner_settings = {
+    "threads": 32, "generations": 2, "pop_size": 5,
+}
 
 # TC prunes autotuning for kernels which require < 256 threads. So to tune small
 # size kernels, we set the min kernel threads to 1
 small_sizes_autotuner_settings = {
-    "threads": 32, "generations": 5, "tuner_min_launch_total_threads": 1,
+    "threads": 32, "generations": 2, "pop_size": 5, "tuner_min_launch_total_threads": 1,
 }
 
 ###############################################################################
@@ -189,7 +191,7 @@ def __init__(self, tc_lang, **kwargs):
         self.set_autotuner_parameters(**kwargs)
 
     def set_autotuner_parameters(
-        self, pop_size=10, crossover_rate=80, mutation_rate=7, generations=2,
+        self, pop_size=20, crossover_rate=80, mutation_rate=7, generations=10,
         number_elites=1, threads=8, gpus="0", proto="/tmp/tuner.txt",
         restore_from_proto=False, restore_number=10, log_generations=False,
         tuner_min_launch_total_threads=64, **kwargs
diff --git a/test_python/test_tc_torch.py b/test_python/test_tc_torch.py
@@ -191,55 +191,19 @@ def test_conv_backward_pass_options(self):
 ###########################################################################
 class TestAutotuner(unittest.TestCase):
 
-    # For such small sizes, autotuner will probably not help.
-    # NOTE: Use "--tuner_min_launch_total_threads=1" for running small sizes
-    # tc.small_sizes_autotuner_settings has this option set already
-    def test_autotuner_no_cache_small_size(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda()
-        options = matmul.autotune(mat1, mat2, **tc.small_sizes_autotuner_settings)
-
     def test_autotuner_no_cache_medium_size(self):
         lang = MATMUL_LANG
         matmul = tc.define(lang, name="matmul")
         mat1, mat2 = torch.randn(72, 26).cuda(), torch.randn(26, 72).cuda()
         options = matmul.autotune(mat1, mat2, **tc.autotuner_settings)
 
-    def test_autotuner_no_cache(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
-        options = matmul.autotune(mat1, mat2, **tc.autotuner_settings)
-
-    def test_autotuner_no_cache_explicit_set(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
-        options = matmul.autotune(mat1, mat2, cache=False, **tc.autotuner_settings)
-
-    def test_autotuner_cache_to_default(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
-        matmul.autotune(mat1, mat2, cache=True, **tc.autotuner_settings)
-
     def test_autotuner_cachefile_first(self):
         cache_file = "{}/matmul_100_400_500".format(PATH_PREFIX)    # use argparse if input from command line
         lang = MATMUL_LANG
         matmul = tc.define(lang, name="matmul")
         mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
         matmul.autotune(mat1, mat2, cache=cache_file, **tc.autotuner_settings)
 
-    def test_autotuner_cachefile_load(self):
-        lang = MATMUL_LANG
-        cache_file = "{}/matmul_100_400_500".format(PATH_PREFIX)    # use argparse if input from command line
-        assert os.path.isfile("{}.cuda".format(cache_file)), "looks like the cache_file doesn't exist"
-
-        matmul = tc.define(lang, name="matmul")
-        mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
-        out = matmul(mat1, mat2, cache=cache_file)
-
     def test_autotuner_cachefile_load_automatic(self):
         lang = MATMUL_LANG
         cache_file = "{}/matmul_100_400_500".format(PATH_PREFIX)    # use argparse if input from command line
@@ -252,28 +216,13 @@ def test_autotuner_cachefile_load_automatic(self):
         # already compiled earlier
         out2 = matmul(mat1, mat2)
 
-    def test_autotuner_no_cache_and_run_kernel(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
-        options = matmul.autotune(mat1, mat2, **tc.autotuner_settings)
-        out = matmul(mat1, mat2, options=options)
-
     def test_autotuner_no_cache_and_run_kernel_automatic(self):
         lang = MATMUL_LANG
         matmul = tc.define(lang, name="matmul")
         mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
         matmul.autotune(mat1, mat2, **tc.autotuner_settings)
         out = matmul(mat1, mat2)
 
-    def test_autotuner_start_options_and_run_kernel(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
-        options = Options("mlp")
-        best_options = matmul.autotune(mat1, mat2, cache=True, options=options, **tc.autotuner_settings)
-        out = matmul(mat1, mat2, options=best_options)
-
     def test_autotuner_multiple_tc(self):
         lang = MATMUL_ABS_LANG
         matmul = tc.define(lang, name="matmul")
@@ -286,35 +235,6 @@ def test_autotuner_multiple_tc(self):
         absolute.autotune(A, cache=True, **tc.autotuner_settings)
         out = absolute(A)
 
-    ###########################################################################
-    # Pass tuple inputs for autotuning
-    ###########################################################################
-    def test_autotuner_tuple_size_no_cache(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        matmul.autotune((3, 4), (4, 5), **tc.small_sizes_autotuner_settings)
-        matmul.autotune((100, 400), (400, 500), **tc.autotuner_settings)
-
-    def test_autotuner_tuple_size_cache_to_default(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        matmul.autotune((3, 4), (4, 5), cache=True, **tc.small_sizes_autotuner_settings)
-        matmul.autotune((100, 400), (400, 500), cache=True, **tc.autotuner_settings)
-
-    def test_autotuner_tuple_size_cache_to_file_run_kernel(self):
-        lang = MATMUL_LANG
-        matmul = tc.define(lang, name="matmul")
-        cache1 = "{}/matmul_3_4_5".format(PATH_PREFIX)
-        cache2 = "{}/matmul_100_400_500".format(PATH_PREFIX)
-        matmul.autotune((3, 4), (4, 5), cache=cache1, **tc.small_sizes_autotuner_settings)
-        matmul.autotune((100, 400), (400, 500), cache=cache2, **tc.autotuner_settings)
-
-        mat1, mat2 = torch.randn(3, 4).cuda(), torch.randn(4, 5).cuda()
-        out = matmul(mat1, mat2, cache=cache1)
-
-        mat1, mat2 = torch.randn(100, 400).cuda(), torch.randn(400, 500).cuda()
-        out = matmul(mat1, mat2, cache=cache2)
-
     ##########################################################################
     # Training layer autotuning
     ##########################################################################
@@ -331,8 +251,9 @@ def test_conv_train_autotune_no_cache_no_options_seed(self):
         convolution = tc.define(lang, training=True, name="convolution", backward="convolution_grad", constants={"sh":sH, "sw":sW})
         I, W = torch.randn(N, C, H, W).cuda(), torch.randn(O, C, kH, kW).cuda()
         convolution.autotune(I, W, **tc.autotuner_settings)
-        # on the second call, autotuning will be seeded from previous best options
-        convolution.autotune(I, W, **tc.autotuner_settings, generations=5, pop_size=20)
+        # on the second call, autotuning will be seeded from previous best options,
+        # verify the seeding and new tuning settings being picked up
+        convolution.autotune(I, W, generations=3, pop_size=5)
 
     def test_conv_train_autotune_cache_no_options_seed(self):
         lang = CONV_TRAIN