@@ -14,16 +14,16 @@ helion.Config(block_sizes=[16, 32, 16], loop_orders=[[1, 0]], l2_groupings=[8],
14
14
helion.Config(block_sizes=[16, 32, 32], loop_orders=[[0, 1]], l2_groupings=[2], range_unroll_factors=[0, 2], range_warp_specializes=[None, None], range_num_stages=[0, 0], range_multi_buffers=[None, None], range_flattens=[None, True], num_warps=2, num_stages=6, indexing='tensor_descriptor', pid_type='flat')
15
15
16
16
--- assertExpectedJournal(TestAutotuner.test_config_fragment1)
17
- helion.Config(block_sizes=[8, 16, 16], loop_orders=[[0, 1, 2]], flatten_loops=[False], range_unroll_factors=[0], range_warp_specializes=[None], range_num_stages=[0], range_multi_buffers=[None], range_flattens=[None], num_warps=4, num_stages=3, indexing='pointer', pid_type='flat')
18
- helion.Config(block_sizes=[2, 128, 128], loop_orders=[[1, 2, 0]], flatten_loops=[False], range_unroll_factors=[2 ], range_warp_specializes=[None ], range_num_stages=[4 ], range_multi_buffers=[False ], range_flattens=[None ], num_warps=8 , num_stages=4, indexing='tensor_descriptor', pid_type='persistent_blocked')
19
- helion.Config(block_sizes=[2, 16, 4 ], loop_orders=[[0, 2, 1]], flatten_loops=[True], range_unroll_factors=[0 ], range_warp_specializes=[None ], range_num_stages=[0 ], range_multi_buffers=[None], range_flattens=[None], num_warps=1, num_stages=2 , indexing='tensor_descriptor ', pid_type='flat ')
20
- helion.Config(block_sizes=[8, 2, 512 ], loop_orders=[[0, 2, 1]], flatten_loops=[True], range_unroll_factors=[4 ], range_warp_specializes=[False ], range_num_stages=[0 ], range_multi_buffers=[False ], range_flattens=[True ], num_warps=8 , num_stages=3 , indexing='block_ptr ', pid_type='persistent_interleaved')
21
- helion.Config(block_sizes=[1, 16, 32 ], loop_orders=[[0, 2, 1 ]], flatten_loops=[False ], range_unroll_factors=[4 ], range_warp_specializes=[True ], range_num_stages=[4 ], range_multi_buffers=[None], range_flattens=[False ], num_warps=8 , num_stages=4 , indexing='tensor_descriptor', pid_type='persistent_interleaved ')
22
- helion.Config(block_sizes=[1, 32, 512 ], loop_orders=[[0, 2, 1 ]], flatten_loops=[False ], range_unroll_factors=[0], range_warp_specializes=[None ], range_num_stages=[0], range_multi_buffers=[None ], range_flattens=[None ], num_warps=2 , num_stages=5, indexing='pointer ', pid_type='flat ')
23
- helion.Config(block_sizes=[1, 32, 32 ], loop_orders=[[1 , 2, 0 ]], flatten_loops=[True], range_unroll_factors=[4 ], range_warp_specializes=[None], range_num_stages=[2], range_multi_buffers=[None ], range_flattens=[None ], num_warps=16, num_stages=3 , indexing='tensor_descriptor ', pid_type='persistent_blocked')
24
- helion.Config(block_sizes=[1 , 4, 32 ], loop_orders=[[1, 0, 2 ]], flatten_loops=[True ], range_unroll_factors=[0 ], range_warp_specializes=[None ], range_num_stages=[0], range_multi_buffers=[None ], range_flattens=[None ], num_warps=16 , num_stages=6 , indexing='block_ptr ', pid_type='flat ')
25
- helion.Config(block_sizes=[4, 16, 1 ], loop_orders=[[2, 1 , 0]], flatten_loops=[True], range_unroll_factors=[2 ], range_warp_specializes=[None ], range_num_stages=[2 ], range_multi_buffers=[None ], range_flattens=[True], num_warps=4 , num_stages=8 , indexing='block_ptr ', pid_type='persistent_interleaved ')
26
- helion.Config(block_sizes=[8, 128, 4 ], loop_orders=[[1 , 0, 2 ]], flatten_loops=[False], range_unroll_factors=[0], range_warp_specializes=[None], range_num_stages=[0], range_multi_buffers=[None], range_flattens=[None], num_warps=2 , num_stages=4 , indexing='tensor_descriptor', pid_type='flat')
17
+ helion.Config(block_sizes=[8, 16, 16], loop_orders=[[0, 1, 2]], flatten_loops=[False], l2_groupings=[1], range_unroll_factors=[0], range_warp_specializes=[None], range_num_stages=[0], range_multi_buffers=[None], range_flattens=[None], num_warps=4, num_stages=3, indexing='pointer', pid_type='flat')
18
+ helion.Config(block_sizes=[2, 128, 128], loop_orders=[[1, 2, 0]], flatten_loops=[False], l2_groupings=[4], range_unroll_factors=[1 ], range_warp_specializes=[True ], range_num_stages=[3 ], range_multi_buffers=[None ], range_flattens=[False ], num_warps=16 , num_stages=4, indexing='tensor_descriptor', pid_type='persistent_blocked')
19
+ helion.Config(block_sizes=[4, 32, 8 ], loop_orders=[[0, 2, 1]], flatten_loops=[True], l2_groupings=[8], range_unroll_factors=[4 ], range_warp_specializes=[False ], range_num_stages=[2 ], range_multi_buffers=[None], range_flattens=[None], num_warps=1, num_stages=4 , indexing='block_ptr ', pid_type='persistent_blocked ')
20
+ helion.Config(block_sizes=[1, 512, 1 ], loop_orders=[[0, 2, 1]], flatten_loops=[True], l2_groupings=[1], range_unroll_factors=[2 ], range_warp_specializes=[True ], range_num_stages=[3 ], range_multi_buffers=[True ], range_flattens=[None ], num_warps=4 , num_stages=7 , indexing='pointer ', pid_type='persistent_interleaved')
21
+ helion.Config(block_sizes=[1, 8, 512 ], loop_orders=[[1, 0, 2 ]], flatten_loops=[True ], l2_groupings=[8], range_unroll_factors=[0 ], range_warp_specializes=[None ], range_num_stages=[0 ], range_multi_buffers=[None], range_flattens=[None ], num_warps=4 , num_stages=2 , indexing='tensor_descriptor', pid_type='flat ')
22
+ helion.Config(block_sizes=[4, 2, 128 ], loop_orders=[[0, 1, 2 ]], flatten_loops=[True ], l2_groupings=[1], range_unroll_factors=[0], range_warp_specializes=[True ], range_num_stages=[0], range_multi_buffers=[True ], range_flattens=[False ], num_warps=4 , num_stages=5, indexing='block_ptr ', pid_type='persistent_blocked ')
23
+ helion.Config(block_sizes=[2, 16, 2 ], loop_orders=[[0 , 2, 1 ]], flatten_loops=[True], l2_groupings=[64], range_unroll_factors=[0 ], range_warp_specializes=[None], range_num_stages=[2], range_multi_buffers=[False ], range_flattens=[True ], num_warps=16, num_stages=4 , indexing='block_ptr ', pid_type='persistent_blocked')
24
+ helion.Config(block_sizes=[4 , 4, 1 ], loop_orders=[[1, 2, 0 ]], flatten_loops=[False ], l2_groupings=[16], range_unroll_factors=[2 ], range_warp_specializes=[False ], range_num_stages=[0], range_multi_buffers=[True ], range_flattens=[False ], num_warps=8 , num_stages=5 , indexing='tensor_descriptor ', pid_type='persistent_blocked ')
25
+ helion.Config(block_sizes=[4, 4, 16 ], loop_orders=[[1, 2 , 0]], flatten_loops=[True], l2_groupings=[8], range_unroll_factors=[1 ], range_warp_specializes=[False ], range_num_stages=[1 ], range_multi_buffers=[True ], range_flattens=[True], num_warps=8 , num_stages=3 , indexing='tensor_descriptor ', pid_type='persistent_blocked ')
26
+ helion.Config(block_sizes=[4, 8, 8 ], loop_orders=[[2 , 0, 1 ]], flatten_loops=[False], l2_groupings=[4], range_unroll_factors=[0], range_warp_specializes=[None], range_num_stages=[0], range_multi_buffers=[None], range_flattens=[None], num_warps=8 , num_stages=5 , indexing='tensor_descriptor', pid_type='flat')
27
27
28
28
--- assertExpectedJournal(TestAutotuner.test_save_load_config)
29
29
{
0 commit comments