Skip to content

Commit 8210083

Browse files
authored
[ESIMD] DPAS cleanup: unify the internal functions on device and host (#6610)
The purpose of this patch is to clean-up the DPAS code and prepare it for movement from experimental::esimd namespace to esimd namespace. The implementation is not final yet. The final low level API may a) have different number/order of template arguments b) it may allow all types and features supported by maximal/latest target device by default c) it may have target device dependent macros for extended parameters & returns verification at compile time. Current patch includes: 1) Unification of __esimd_dpas* functions on device and host. This unification also required adding +1 to 'argument_type' enum elements to sync their values with those expected by vISA. 2) Unification of __esimd_dpas* functions with their underlying GenX intrinsics. 3) Removal of ESIMD_XE_HPG macro usage in DPAS implementation. It was wrongly aliased with ESIMD_XE_HPC macro. 4) Minor corrections in static asserts verifying DPAS arguments/returns: - systolic_depth must be 8 (cannot be 4 as before); - non-PVC targets cannot return half/bfloat types. Signed-off-by: Vyacheslav N Klochkov <vyacheslav.n.klochkov@intel.com>
1 parent d9da9ff commit 8210083

File tree

4 files changed

+135
-181
lines changed

4 files changed

+135
-181
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -502,11 +502,11 @@ class ESIMDIntrinDescTable {
502502
{"raw_send2_noresult",
503503
{"raw.send2.noresult",
504504
{a(0), a(1), ai1(2), a(3), a(4), a(5), a(6), a(7)}}},
505-
{"dpas",
506-
{"dpas2", {a(0), a(1), a(2), a(3), a(4), a(5), a(6), a(7), a(8)}}},
507-
{"dpas2", {"dpas.nosrc0", {a(0), a(1), a(2)}}},
508-
{"dpasw", {"dpasw", {a(0), a(1), a(2), a(3)}}},
509-
{"dpasw2", {"dpasw.nosrc0", {a(0), a(1), a(2)}}},
505+
{"dpas2",
506+
{"dpas2", {a(0), a(1), a(2), t(0), t(1), t(2), t(3), t(11), t(12)}}},
507+
{"dpas_nosrc0", {"dpas.nosrc0", {a(0), a(1), t(0)}}},
508+
{"dpasw", {"dpasw", {a(0), a(1), a(2), t(0)}}},
509+
{"dpasw_nosrc0", {"dpasw.nosrc0", {a(0), a(1), t(0)}}},
510510
{"nbarrier", {"nbarrier", {a(0), a(1), a(2)}}},
511511
{"raw_send_nbarrier_signal",
512512
{"raw.send.noresult", {a(0), ai1(4), a(1), a(2), a(3)}}},

sycl/include/sycl/ext/intel/experimental/esimd/common.hpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,17 @@ namespace ext::intel::experimental::esimd {
2828
/// @{
2929

3030
enum class argument_type {
31-
U1 = 0, // unsigned 1 bit
32-
S1 = 1, // signed 1 bit
33-
U2 = 2, // unsigned 2 bits
34-
S2 = 3, // signed 2 bits
35-
U4 = 4, // unsigned 4 bits
36-
S4 = 5, // signed 4 bits
37-
U8 = 6, // unsigned 8 bits
38-
S8 = 7, // signed 8 bits
39-
BF16 = 8, // bfloat 16
40-
FP16 = 9, // half float
41-
TF32 = 11 // tensorfloat 32
31+
U1 = 1, // unsigned 1 bit
32+
S1 = 2, // signed 1 bit
33+
U2 = 3, // unsigned 2 bits
34+
S2 = 4, // signed 2 bits
35+
U4 = 5, // unsigned 4 bits
36+
S4 = 6, // signed 4 bits
37+
U8 = 7, // unsigned 8 bits
38+
S8 = 8, // signed 8 bits
39+
BF16 = 9, // bfloat 16
40+
FP16 = 10, // half float
41+
TF32 = 12 // tensorfloat 32
4242
};
4343

4444
/// The scope that lsc_fence operation should apply to

sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp

Lines changed: 47 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -110,36 +110,7 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
110110
}
111111
#endif // __SYCL_DEVICE_ONLY__
112112

113-
#ifdef __SYCL_DEVICE_ONLY__
114-
115-
// TODO: __esimd_dpas* should have single declaration for host and device:
116-
// Ret __esimd_dpas*(...)
117-
template <typename T, typename T0, typename T1, typename T2, int N, int N1,
118-
int N2>
119-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t<T, N>
120-
__esimd_dpas(__ESIMD_DNS::vector_type_t<T0, N> src0,
121-
__ESIMD_DNS::vector_type_t<T1, N1> src1,
122-
__ESIMD_DNS::vector_type_t<T2, N2> src2, int src1_precision,
123-
int src2_precision, int depth, int repeat, int sign_res,
124-
int sign_acc);
125-
126-
template <typename T, typename T1, typename T2, int N, int N1, int N2>
127-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t<T, N>
128-
__esimd_dpas2(__ESIMD_DNS::vector_type_t<T1, N1> src1,
129-
__ESIMD_DNS::vector_type_t<T2, N2> src2, int dpas_info);
130-
131-
template <typename T, typename T1, typename T2, int N, int N1, int N2>
132-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t<T, N>
133-
__esimd_dpasw(__ESIMD_DNS::vector_type_t<T, N> src0,
134-
__ESIMD_DNS::vector_type_t<T1, N1> src1,
135-
__ESIMD_DNS::vector_type_t<T2, N2> src2, int dpas_info);
136-
137-
template <typename T, typename T1, typename T2, int N, int N1, int N2>
138-
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t<T, N>
139-
__esimd_dpasw2(__ESIMD_DNS::vector_type_t<T1, N1> src1,
140-
__ESIMD_DNS::vector_type_t<T2, N2> src2, int dpas_info);
141-
142-
#else // __SYCL_DEVICE_ONLY__
113+
#ifndef __SYCL_DEVICE_ONLY__
143114

144115
template <typename T0, typename T1, int SZ>
145116
__ESIMD_INTRIN __ESIMD_raw_vec_t(T0, SZ)
@@ -659,56 +630,73 @@ __esimd_dpas_inner(const __ESIMD_DNS::vector_type_t<T0, SZ> *src0,
659630

660631
return retv;
661632
}
633+
#endif // #ifndef __SYCL_DEVICE_ONLY__
662634

663635
template <__ESIMD_ENS::argument_type src1_precision,
664636
__ESIMD_ENS::argument_type src2_precision, int systolic_depth,
665637
int repeat_count, typename T, typename T0, typename T1, typename T2,
666-
int N, int N1, int N2>
667-
inline __ESIMD_DNS::vector_type_t<T, N>
668-
__esimd_dpas(__ESIMD_DNS::vector_type_t<T0, N> src0,
669-
__ESIMD_DNS::vector_type_t<T1, N1> src1,
670-
__ESIMD_DNS::vector_type_t<T2, N2> src2) {
638+
int N, int N1, int N2, int res_sign = std::is_signed_v<T>,
639+
int acc_sign = std::is_signed_v<T0>>
640+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
641+
__esimd_dpas2(__ESIMD_DNS::vector_type_t<T0, N> src0,
642+
__ESIMD_DNS::vector_type_t<T1, N1> src1,
643+
__ESIMD_DNS::vector_type_t<T2, N2> src2)
644+
#ifdef __SYCL_DEVICE_ONLY__
645+
;
646+
#else // !__SYCL_DEVICE_ONLY__
647+
{
671648
return __esimd_dpas_inner<src1_precision, src2_precision, systolic_depth,
672649
repeat_count, T, T0, T1, T2, N, N1, N2>(
673650
std::addressof(src0), src1, src2);
674651
}
652+
#endif // !__SYCL_DEVICE_ONLY__
675653

676-
template <__ESIMD_ENS::argument_type src1_precision,
677-
__ESIMD_ENS::argument_type src2_precision, int systolic_depth,
678-
int repeat_count, typename T, typename T1, typename T2, int N, int N1,
679-
int N2>
680-
inline __ESIMD_DNS::vector_type_t<T, N>
681-
__esimd_dpas2(__ESIMD_DNS::vector_type_t<T1, N1> src1,
682-
__ESIMD_DNS::vector_type_t<T2, N2> src2) {
654+
template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
655+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
656+
__esimd_dpas_nosrc0(__ESIMD_DNS::vector_type_t<T1, N1> src1,
657+
__ESIMD_DNS::vector_type_t<T2, N2> src2)
658+
#ifdef __SYCL_DEVICE_ONLY__
659+
;
660+
#else // !__SYCL_DEVICE_ONLY__
661+
{
662+
constexpr __ESIMD_ENS::argument_type src1_precision =
663+
static_cast<__ESIMD_ENS::argument_type>(Info & 0xff);
664+
constexpr __ESIMD_ENS::argument_type src2_precision =
665+
static_cast<__ESIMD_ENS::argument_type>((Info >> 8) & 0xff);
666+
constexpr int systolic_depth = (Info >> 16) & 0xff;
667+
constexpr int repeat_count = (Info >> 24) & 0xff;
683668
return __esimd_dpas_inner<src1_precision, src2_precision, systolic_depth,
684-
repeat_count, T, T, T1, T2, N, N1, N2>(nullptr, src1,
685-
src2);
669+
repeat_count, T, T, T1, T2, N, N1, N2>(nullptr,
670+
src1, src2);
686671
}
672+
#endif // !__SYCL_DEVICE_ONLY__
687673

688-
template <__ESIMD_ENS::argument_type src1_precision,
689-
__ESIMD_ENS::argument_type src2_precision, int systolic_depth,
690-
int repeat_count, typename T, typename T1, typename T2, int N, int N1,
691-
int N2>
692-
inline __ESIMD_DNS::vector_type_t<T, N>
674+
template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
675+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
693676
__esimd_dpasw(__ESIMD_DNS::vector_type_t<T, N> src0,
694677
__ESIMD_DNS::vector_type_t<T1, N1> src1,
695-
__ESIMD_DNS::vector_type_t<T2, N2> src2) {
678+
__ESIMD_DNS::vector_type_t<T2, N2> src2)
679+
#ifdef __SYCL_DEVICE_ONLY__
680+
;
681+
#else // !__SYCL_DEVICE_ONLY__
682+
{
696683
__ESIMD_UNSUPPORTED_ON_HOST;
697684
return __ESIMD_DNS::vector_type_t<T, N>();
698685
}
686+
#endif // !__SYCL_DEVICE_ONLY__
699687

700-
template <__ESIMD_ENS::argument_type src1_precision,
701-
__ESIMD_ENS::argument_type src2_precision, int systolic_depth,
702-
int repeat_count, typename T, typename T1, typename T2, int N, int N1,
703-
int N2>
704-
inline __ESIMD_DNS::vector_type_t<T, N>
705-
__esimd_dpasw2(__ESIMD_DNS::vector_type_t<T1, N1> src1,
706-
__ESIMD_DNS::vector_type_t<T2, N2> src2) {
688+
template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
689+
__ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N>
690+
__esimd_dpasw_nosrc0(__ESIMD_DNS::vector_type_t<T1, N1> src1,
691+
__ESIMD_DNS::vector_type_t<T2, N2> src2)
692+
#ifdef __SYCL_DEVICE_ONLY__
693+
;
694+
#else // !__SYCL_DEVICE_ONLY__
695+
{
707696
__ESIMD_UNSUPPORTED_ON_HOST;
708697
return __ESIMD_DNS::vector_type_t<T, N>();
709698
}
710-
711-
#endif // #ifdef __SYCL_DEVICE_ONLY__
699+
#endif // !__SYCL_DEVICE_ONLY__
712700

713701
#undef __ESIMD_raw_vec_t
714702
#undef __ESIMD_cpp_vec_t

0 commit comments

Comments
 (0)