@@ -1891,10 +1891,12 @@ template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1891
1891
T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
1892
1892
ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d (
1893
1893
config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload) {
1894
+ using RawT = __ESIMD_DNS::__raw_t <T>;
1894
1895
__ESIMD_DNS::check_lsc_block_2d_restrictions<
1895
- T , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1896
+ RawT , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1896
1897
__ESIMD_DNS::block_2d_op::load>();
1897
1898
using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1899
+ using CacheVectorT = __ESIMD_DNS::vector_type_t <uint8_t , 2 >;
1898
1900
__ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
1899
1901
PropertyListT>();
1900
1902
constexpr int ElemsPerDword = 4 / sizeof (T);
@@ -1920,27 +1922,28 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
1920
1922
__ESIMD_DNS::roundUpNextMultiple<DstElements * sizeof (T), GrfBytes>();
1921
1923
constexpr uint32_t DstLength =
1922
1924
(DstBlockSize / GrfBytes) > 31 ? 31 : (DstBlockSize / GrfBytes);
1923
- constexpr uint32_t DstLengthMask = DstLength << 20 ;
1924
1925
1925
1926
static_assert (N == ActualN || N == DstElements, " Incorrect element count" );
1926
1927
1927
- constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
1928
- << 17 ;
1929
- constexpr uint32_t base_desc = 0x2000003 ;
1930
- constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0 ;
1931
- constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0 ;
1932
- constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9 ;
1933
- __ESIMD_NS::simd<T, N> oldDst;
1934
- constexpr uint32_t exDesc = 0x0 ;
1935
- constexpr uint32_t desc = base_desc | cache_mask | transformMask |
1936
- transposeMask | dataSizeMask | DstLengthMask;
1937
- constexpr uint8_t execSize = 1 ;
1938
- constexpr uint8_t sfid = 0xF ;
1939
- constexpr uint8_t numSrc0 = 0x1 ;
1940
- constexpr uint8_t numDst = (N * sizeof (T)) / 64 ;
1941
- __ESIMD_NS::simd<T, ActualN> Raw =
1942
- __ESIMD_NS::raw_send<execSize, sfid, numSrc0, numDst>(
1943
- oldDst, payload.get_raw_data (), exDesc, desc);
1928
+ __ESIMD_NS::simd<RawT, N> oldDst;
1929
+ constexpr uint16_t Mask = 1 ;
1930
+ constexpr CacheVectorT Cache = {static_cast <uint8_t >(L1H),
1931
+ static_cast <uint8_t >(L2H)};
1932
+
1933
+ __ESIMD_NS::simd<T, ActualN> Raw;
1934
+
1935
+ if constexpr (Transposed)
1936
+ Raw = __esimd_lsc_load2d_descriptor_transpose<RawT, NBlocks, BlockWidth,
1937
+ BlockHeight, 0 , 0 , N>(
1938
+ Mask, payload.get_raw_data ().data (), oldDst.data (), Cache);
1939
+ else if constexpr (Transformed)
1940
+ Raw = __esimd_lsc_load2d_descriptor_transform<RawT, NBlocks, BlockWidth,
1941
+ BlockHeight, 0 , 0 , N>(
1942
+ Mask, payload.get_raw_data ().data (), oldDst.data (), Cache);
1943
+ else
1944
+ Raw = __esimd_lsc_load2d_descriptor<RawT, NBlocks, BlockWidth, BlockHeight,
1945
+ 0 , 0 , N>(
1946
+ Mask, payload.get_raw_data ().data (), oldDst.data (), Cache);
1944
1947
1945
1948
if constexpr (ActualN == N) {
1946
1949
return Raw;
@@ -1988,27 +1991,24 @@ template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
1988
1991
ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d (
1989
1992
config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload) {
1990
1993
using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1994
+ using CacheVectorT = __ESIMD_DNS::vector_type_t <uint8_t , 2 >;
1995
+ using RawT = __ESIMD_DNS::__raw_t <T>;
1991
1996
__ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
1992
1997
PropertyListT>();
1993
1998
__ESIMD_DNS::check_lsc_block_2d_restrictions<
1994
- T , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1999
+ RawT , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1995
2000
__ESIMD_DNS::block_2d_op::prefetch>();
1996
2001
static_assert (!Transposed || !Transformed,
1997
2002
" Transposed and transformed is not supported" );
1998
- constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
1999
- << 17 ;
2000
- constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9 ;
2001
- constexpr uint32_t base_desc = 0x2000003 ;
2002
- constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0 ;
2003
- constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0 ;
2004
- constexpr uint32_t exDesc = 0x0 ;
2005
- constexpr uint32_t desc =
2006
- base_desc | cache_mask | transformMask | transposeMask | dataSizeMask;
2007
- constexpr uint8_t execSize = 1 ;
2008
- constexpr uint8_t sfid = 0xF ;
2009
- constexpr uint8_t numDst = (N * sizeof (T)) / 64 ;
2010
- __ESIMD_NS::raw_send<execSize, sfid, numDst>(payload.get_raw_data (), exDesc,
2011
- desc);
2003
+
2004
+ __ESIMD_NS::simd<RawT, N> oldDst;
2005
+ constexpr uint16_t Mask = 1 ;
2006
+ constexpr CacheVectorT Cache = {static_cast <uint8_t >(L1H),
2007
+ static_cast <uint8_t >(L2H)};
2008
+
2009
+ __esimd_lsc_prefetch_descriptor<RawT, NBlocks, BlockWidth, BlockHeight, 0 , 0 ,
2010
+ N>(Mask, payload.get_raw_data ().data (),
2011
+ oldDst.data (), Cache);
2012
2012
}
2013
2013
2014
2014
// / A variation of \c 2D stateless block store \c with parameters passed as
@@ -2033,27 +2033,21 @@ template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
2033
2033
ESIMD_INLINE SYCL_ESIMD_FUNCTION void
2034
2034
lsc_store_2d (config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload,
2035
2035
__ESIMD_NS::simd<T, N> Data) {
2036
+ using RawT = __ESIMD_DNS::__raw_t <T>;
2036
2037
__ESIMD_DNS::check_lsc_block_2d_restrictions<
2037
- T , BlockWidth, BlockHeight, NBlocks, false , false ,
2038
+ RawT , BlockWidth, BlockHeight, NBlocks, false , false ,
2038
2039
__ESIMD_DNS::block_2d_op::store>();
2039
2040
using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2041
+ using CacheVectorT = __ESIMD_DNS::vector_type_t <uint8_t , 2 >;
2040
2042
__ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::store,
2041
2043
PropertyListT>();
2042
2044
2043
- constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
2044
- << 17 ;
2045
- constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9 ;
2046
- constexpr uint32_t base_desc = 0x2000007 ;
2047
-
2048
- constexpr uint32_t exDesc = 0x0 ;
2049
- constexpr uint32_t desc = base_desc | cache_mask | dataSizeMask;
2050
- constexpr uint8_t execSize = 1 ;
2051
- constexpr uint8_t sfid = 0xF ;
2052
- constexpr uint8_t numSrc0 = 0x1 ;
2053
- constexpr uint8_t numSrc1 = (N * sizeof (T)) / 64 ;
2045
+ constexpr uint16_t Mask = 1 ;
2046
+ constexpr CacheVectorT Cache = {static_cast <uint8_t >(L1H),
2047
+ static_cast <uint8_t >(L2H)};
2054
2048
2055
- __ESIMD_NS::raw_sends<execSize, sfid, numSrc0, numSrc1 >(
2056
- payload.get_raw_data (), Data, exDesc, desc );
2049
+ __esimd_lsc_store_descriptor<RawT, NBlocks, BlockWidth, BlockHeight, 0 , 0 , N >(
2050
+ Mask, payload.get_raw_data (). data () , Data. data (), Cache );
2057
2051
}
2058
2052
2059
2053
namespace detail {
0 commit comments