[QST]What is the purpose of tile_atom_to_shape? · Issue #2707 · NVIDIA/cutlass

What is your question?
I noticed that both FP8 Blockwise GEMM and Blockscaled GEMM contain tile_atom_to_shape_SFA / tile_atom_to_shape_SFB in their Config objects.
Blockwise:

cutlass/include/cutlass/detail/blockwise_scale_layout.hpp

Lines 126 to 179 in e6e2cc2

    
           // The following function is provided for user fill dynamic problem size to the layout_SFA. 
        
           template <class ProblemShape> 
        
           CUTE_HOST_DEVICE 
        
           static constexpr auto  
        
           tile_atom_to_shape_SFA(ProblemShape problem_shape) { 
        
             auto problem_shape_MNKL = append<4>(problem_shape, 1); 
        
             auto strides = [&]() CUTLASS_LAMBDA_FUNC_INLINE { 
        
               auto [M, N, K, L] = problem_shape_MNKL; 
        
               if constexpr (majorSFA == UMMA::Major::MN) { 
        
                 return make_stride(make_stride(_0{}, _1{}), make_stride(_0{}, cute::ceil_div(M, SFVecSizeM))); 
        
               } 
        
               else { 
        
                 return make_stride(make_stride(_0{}, cute::ceil_div(K, SFVecSizeK)), make_stride(_0{}, _1{})); 
        
               } 
        
             }(); 
        
             auto [M, N, K, L] = problem_shape_MNKL; 
        
             auto mk_layout = make_layout( 
        
               make_shape(make_shape(Int<SFVecSizeM>{}, cute::ceil_div(M, SFVecSizeM)), 
        
                          make_shape(Int<SFVecSizeK>{}, cute::ceil_div(K, SFVecSizeK))), 
        
               strides 
        
             ); 
        
             return make_layout(append(shape(mk_layout), L), append(stride(mk_layout), size(filter_zeros(mk_layout)))); 
        
           } 
        
           // The following function is provided for user fill dynamic problem size to the layout_SFB. 
        
           template <class ProblemShape> 
        
           CUTE_HOST_DEVICE 
        
           static constexpr auto  
        
           tile_atom_to_shape_SFB(ProblemShape problem_shape) { 
        
             auto problem_shape_MNKL = append<4>(problem_shape, 1); 
        
             auto strides = [&]() CUTLASS_LAMBDA_FUNC_INLINE { 
        
               auto [M, N, K, L] = problem_shape_MNKL; 
        
               if constexpr (majorSFB == UMMA::Major::MN) { 
        
                 return make_stride(make_stride(_0{}, _1{}), make_stride(_0{}, cute::ceil_div(N, SFVecSizeN))); 
        
               } 
        
               else { 
        
                 return make_stride(make_stride(_0{}, cute::ceil_div(K, SFVecSizeK)), make_stride(_0{}, _1{})); 
        
               } 
        
             }(); 
        
             auto [M, N, K, L] = problem_shape_MNKL; 
        
             auto nk_layout = make_layout( 
        
               make_shape(make_shape(Int<SFVecSizeN>{}, cute::ceil_div(N, SFVecSizeN)), 
        
                          make_shape(Int<SFVecSizeK>{}, cute::ceil_div(K, SFVecSizeK))), 
        
               strides 
        
             ); 
        
             return make_layout(append(shape(nk_layout), L), append(stride(nk_layout), size(filter_zeros(nk_layout)))); 
        
           }

Blockscaled:

cutlass/include/cutlass/detail/sm100_blockscaled_layout.hpp

Lines 86 to 104 in e6e2cc2

    
           // The following function is provided for user fill dynamic problem size to the layout_SFA. 
        
           template < class ProblemShape, class LayoutSFA = LayoutSF> 
        
           CUTE_HOST_DEVICE 
        
           static constexpr auto 
        
           tile_atom_to_shape_SFA(ProblemShape problem_shape, LayoutSFA layout_sfa = LayoutSFA{}) { 
        
             auto problem_shape_MNKL = append<4>(problem_shape, 1); 
        
             auto [M, N, K, L] = problem_shape_MNKL; 
        
             return tile_to_shape(SfAtom{}, make_shape(M,K,L), Step<_2,_1,_3>{}); 
        
           } 
        
           // The following function is provided for user fill dynamic problem size to the layout_SFB. 
        
           template <class ProblemShape, class LayoutSFB = LayoutSF> 
        
           CUTE_HOST_DEVICE 
        
           static constexpr auto 
        
           tile_atom_to_shape_SFB(ProblemShape problem_shape, LayoutSFB layout_sfb = LayoutSFB{}) { 
        
             auto problem_shape_MNKL = append<4>(problem_shape, 1); 
        
             auto [M, N, K, L] = problem_shape_MNKL; 
        
             return tile_to_shape(SfAtom{}, make_shape(N,K,L), Step<_2,_1,_3>{}); 
        
           }

What are these two functions used for? It looks like there will be an 'Atom' layout in it. What does this 'Atom' layout mean? Will it affect the Layout of Scale Factor?

I'm writing a mxfp8 grouped gemm on the SM100 architecture. I understand that if my A matrix is a K-Major (M, K) shape matrix, then my Scale Factor A should be a (M, ceil_div(K, 32)), K-Major matrix, and will not be affected by tile_atom_to_shape_SFA. Is my understanding correct?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[QST]What is the purpose of tile_atom_to_shape? #2707

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

	// The following function is provided for user fill dynamic problem size to the layout_SFA.
	template <class ProblemShape>
	CUTE_HOST_DEVICE
	static constexpr auto
	tile_atom_to_shape_SFA(ProblemShape problem_shape) {
	auto problem_shape_MNKL = append<4>(problem_shape, 1);

	auto strides = [&]() CUTLASS_LAMBDA_FUNC_INLINE {
	auto [M, N, K, L] = problem_shape_MNKL;
	if constexpr (majorSFA == UMMA::Major::MN) {
	return make_stride(make_stride(_0{}, _1{}), make_stride(_0{}, cute::ceil_div(M, SFVecSizeM)));
	}
	else {
	return make_stride(make_stride(_0{}, cute::ceil_div(K, SFVecSizeK)), make_stride(_0{}, _1{}));
	}
	}();

	auto [M, N, K, L] = problem_shape_MNKL;
	auto mk_layout = make_layout(
	make_shape(make_shape(Int<SFVecSizeM>{}, cute::ceil_div(M, SFVecSizeM)),
	make_shape(Int<SFVecSizeK>{}, cute::ceil_div(K, SFVecSizeK))),
	strides
	);

	return make_layout(append(shape(mk_layout), L), append(stride(mk_layout), size(filter_zeros(mk_layout))));
	}

	// The following function is provided for user fill dynamic problem size to the layout_SFB.
	template <class ProblemShape>
	CUTE_HOST_DEVICE
	static constexpr auto
	tile_atom_to_shape_SFB(ProblemShape problem_shape) {
	auto problem_shape_MNKL = append<4>(problem_shape, 1);

	auto strides = [&]() CUTLASS_LAMBDA_FUNC_INLINE {
	auto [M, N, K, L] = problem_shape_MNKL;

	if constexpr (majorSFB == UMMA::Major::MN) {
	return make_stride(make_stride(_0{}, _1{}), make_stride(_0{}, cute::ceil_div(N, SFVecSizeN)));
	}
	else {
	return make_stride(make_stride(_0{}, cute::ceil_div(K, SFVecSizeK)), make_stride(_0{}, _1{}));
	}
	}();

	auto [M, N, K, L] = problem_shape_MNKL;
	auto nk_layout = make_layout(
	make_shape(make_shape(Int<SFVecSizeN>{}, cute::ceil_div(N, SFVecSizeN)),
	make_shape(Int<SFVecSizeK>{}, cute::ceil_div(K, SFVecSizeK))),
	strides
	);

	return make_layout(append(shape(nk_layout), L), append(stride(nk_layout), size(filter_zeros(nk_layout))));
	}

	// The following function is provided for user fill dynamic problem size to the layout_SFA.
	template < class ProblemShape, class LayoutSFA = LayoutSF>
	CUTE_HOST_DEVICE
	static constexpr auto
	tile_atom_to_shape_SFA(ProblemShape problem_shape, LayoutSFA layout_sfa = LayoutSFA{}) {
	auto problem_shape_MNKL = append<4>(problem_shape, 1);
	auto [M, N, K, L] = problem_shape_MNKL;
	return tile_to_shape(SfAtom{}, make_shape(M,K,L), Step<_2,_1,_3>{});
	}

	// The following function is provided for user fill dynamic problem size to the layout_SFB.
	template <class ProblemShape, class LayoutSFB = LayoutSF>
	CUTE_HOST_DEVICE
	static constexpr auto
	tile_atom_to_shape_SFB(ProblemShape problem_shape, LayoutSFB layout_sfb = LayoutSFB{}) {
	auto problem_shape_MNKL = append<4>(problem_shape, 1);
	auto [M, N, K, L] = problem_shape_MNKL;
	return tile_to_shape(SfAtom{}, make_shape(N,K,L), Step<_2,_1,_3>{});
	}

[QST]What is the purpose of tile_atom_to_shape? #2707

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions