-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Open
Labels
Description
What is your question?
I noticed that both FP8 Blockwise GEMM and Blockscaled GEMM contain tile_atom_to_shape_SFA
/ tile_atom_to_shape_SFB
in their Config
objects.
Blockwise:
cutlass/include/cutlass/detail/blockwise_scale_layout.hpp
Lines 126 to 179 in e6e2cc2
// The following function is provided for user fill dynamic problem size to the layout_SFA. | |
template <class ProblemShape> | |
CUTE_HOST_DEVICE | |
static constexpr auto | |
tile_atom_to_shape_SFA(ProblemShape problem_shape) { | |
auto problem_shape_MNKL = append<4>(problem_shape, 1); | |
auto strides = [&]() CUTLASS_LAMBDA_FUNC_INLINE { | |
auto [M, N, K, L] = problem_shape_MNKL; | |
if constexpr (majorSFA == UMMA::Major::MN) { | |
return make_stride(make_stride(_0{}, _1{}), make_stride(_0{}, cute::ceil_div(M, SFVecSizeM))); | |
} | |
else { | |
return make_stride(make_stride(_0{}, cute::ceil_div(K, SFVecSizeK)), make_stride(_0{}, _1{})); | |
} | |
}(); | |
auto [M, N, K, L] = problem_shape_MNKL; | |
auto mk_layout = make_layout( | |
make_shape(make_shape(Int<SFVecSizeM>{}, cute::ceil_div(M, SFVecSizeM)), | |
make_shape(Int<SFVecSizeK>{}, cute::ceil_div(K, SFVecSizeK))), | |
strides | |
); | |
return make_layout(append(shape(mk_layout), L), append(stride(mk_layout), size(filter_zeros(mk_layout)))); | |
} | |
// The following function is provided for user fill dynamic problem size to the layout_SFB. | |
template <class ProblemShape> | |
CUTE_HOST_DEVICE | |
static constexpr auto | |
tile_atom_to_shape_SFB(ProblemShape problem_shape) { | |
auto problem_shape_MNKL = append<4>(problem_shape, 1); | |
auto strides = [&]() CUTLASS_LAMBDA_FUNC_INLINE { | |
auto [M, N, K, L] = problem_shape_MNKL; | |
if constexpr (majorSFB == UMMA::Major::MN) { | |
return make_stride(make_stride(_0{}, _1{}), make_stride(_0{}, cute::ceil_div(N, SFVecSizeN))); | |
} | |
else { | |
return make_stride(make_stride(_0{}, cute::ceil_div(K, SFVecSizeK)), make_stride(_0{}, _1{})); | |
} | |
}(); | |
auto [M, N, K, L] = problem_shape_MNKL; | |
auto nk_layout = make_layout( | |
make_shape(make_shape(Int<SFVecSizeN>{}, cute::ceil_div(N, SFVecSizeN)), | |
make_shape(Int<SFVecSizeK>{}, cute::ceil_div(K, SFVecSizeK))), | |
strides | |
); | |
return make_layout(append(shape(nk_layout), L), append(stride(nk_layout), size(filter_zeros(nk_layout)))); | |
} |
Blockscaled:
cutlass/include/cutlass/detail/sm100_blockscaled_layout.hpp
Lines 86 to 104 in e6e2cc2
// The following function is provided for user fill dynamic problem size to the layout_SFA. | |
template < class ProblemShape, class LayoutSFA = LayoutSF> | |
CUTE_HOST_DEVICE | |
static constexpr auto | |
tile_atom_to_shape_SFA(ProblemShape problem_shape, LayoutSFA layout_sfa = LayoutSFA{}) { | |
auto problem_shape_MNKL = append<4>(problem_shape, 1); | |
auto [M, N, K, L] = problem_shape_MNKL; | |
return tile_to_shape(SfAtom{}, make_shape(M,K,L), Step<_2,_1,_3>{}); | |
} | |
// The following function is provided for user fill dynamic problem size to the layout_SFB. | |
template <class ProblemShape, class LayoutSFB = LayoutSF> | |
CUTE_HOST_DEVICE | |
static constexpr auto | |
tile_atom_to_shape_SFB(ProblemShape problem_shape, LayoutSFB layout_sfb = LayoutSFB{}) { | |
auto problem_shape_MNKL = append<4>(problem_shape, 1); | |
auto [M, N, K, L] = problem_shape_MNKL; | |
return tile_to_shape(SfAtom{}, make_shape(N,K,L), Step<_2,_1,_3>{}); | |
} |
What are these two functions used for? It looks like there will be an 'Atom' layout in it. What does this 'Atom' layout mean? Will it affect the Layout of Scale Factor?
I'm writing a mxfp8 grouped gemm on the SM100 architecture. I understand that if my A matrix is a K-Major (M, K) shape matrix, then my Scale Factor A should be a (M, ceil_div(K, 32)), K-Major matrix, and will not be affected by tile_atom_to_shape_SFA
. Is my understanding correct?