@@ -110,36 +110,7 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
110
110
}
111
111
#endif // __SYCL_DEVICE_ONLY__
112
112
113
- #ifdef __SYCL_DEVICE_ONLY__
114
-
115
- // TODO: __esimd_dpas* should have single declaration for host and device:
116
- // Ret __esimd_dpas*(...)
117
- template <typename T, typename T0, typename T1, typename T2, int N, int N1,
118
- int N2>
119
- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
120
- __esimd_dpas (__ESIMD_DNS::vector_type_t <T0, N> src0,
121
- __ESIMD_DNS::vector_type_t <T1, N1> src1,
122
- __ESIMD_DNS::vector_type_t <T2, N2> src2, int src1_precision,
123
- int src2_precision, int depth, int repeat, int sign_res,
124
- int sign_acc);
125
-
126
- template <typename T, typename T1, typename T2, int N, int N1, int N2>
127
- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
128
- __esimd_dpas2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
129
- __ESIMD_DNS::vector_type_t <T2, N2> src2, int dpas_info);
130
-
131
- template <typename T, typename T1, typename T2, int N, int N1, int N2>
132
- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
133
- __esimd_dpasw (__ESIMD_DNS::vector_type_t <T, N> src0,
134
- __ESIMD_DNS::vector_type_t <T1, N1> src1,
135
- __ESIMD_DNS::vector_type_t <T2, N2> src2, int dpas_info);
136
-
137
- template <typename T, typename T1, typename T2, int N, int N1, int N2>
138
- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
139
- __esimd_dpasw2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
140
- __ESIMD_DNS::vector_type_t <T2, N2> src2, int dpas_info);
141
-
142
- #else // __SYCL_DEVICE_ONLY__
113
+ #ifndef __SYCL_DEVICE_ONLY__
143
114
144
115
template <typename T0, typename T1, int SZ>
145
116
__ESIMD_INTRIN __ESIMD_raw_vec_t (T0, SZ)
@@ -659,56 +630,73 @@ __esimd_dpas_inner(const __ESIMD_DNS::vector_type_t<T0, SZ> *src0,
659
630
660
631
return retv;
661
632
}
633
+ #endif // #ifndef __SYCL_DEVICE_ONLY__
662
634
663
635
template <__ESIMD_ENS::argument_type src1_precision,
664
636
__ESIMD_ENS::argument_type src2_precision, int systolic_depth,
665
637
int repeat_count, typename T, typename T0, typename T1, typename T2,
666
- int N, int N1, int N2>
667
- inline __ESIMD_DNS::vector_type_t <T, N>
668
- __esimd_dpas (__ESIMD_DNS::vector_type_t <T0, N> src0,
669
- __ESIMD_DNS::vector_type_t <T1, N1> src1,
670
- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
638
+ int N, int N1, int N2, int res_sign = std::is_signed_v<T>,
639
+ int acc_sign = std::is_signed_v<T0>>
640
+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
641
+ __esimd_dpas2 (__ESIMD_DNS::vector_type_t <T0, N> src0,
642
+ __ESIMD_DNS::vector_type_t <T1, N1> src1,
643
+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
644
+ #ifdef __SYCL_DEVICE_ONLY__
645
+ ;
646
+ #else // !__SYCL_DEVICE_ONLY__
647
+ {
671
648
return __esimd_dpas_inner<src1_precision, src2_precision, systolic_depth,
672
649
repeat_count, T, T0, T1, T2, N, N1, N2>(
673
650
std::addressof (src0), src1, src2);
674
651
}
652
+ #endif // !__SYCL_DEVICE_ONLY__
675
653
676
- template <__ESIMD_ENS::argument_type src1_precision,
677
- __ESIMD_ENS::argument_type src2_precision, int systolic_depth,
678
- int repeat_count, typename T, typename T1, typename T2, int N, int N1,
679
- int N2>
680
- inline __ESIMD_DNS::vector_type_t <T, N>
681
- __esimd_dpas2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
682
- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
654
+ template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
655
+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
656
+ __esimd_dpas_nosrc0 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
657
+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
658
+ #ifdef __SYCL_DEVICE_ONLY__
659
+ ;
660
+ #else // !__SYCL_DEVICE_ONLY__
661
+ {
662
+ constexpr __ESIMD_ENS::argument_type src1_precision =
663
+ static_cast <__ESIMD_ENS::argument_type>(Info & 0xff );
664
+ constexpr __ESIMD_ENS::argument_type src2_precision =
665
+ static_cast <__ESIMD_ENS::argument_type>((Info >> 8 ) & 0xff );
666
+ constexpr int systolic_depth = (Info >> 16 ) & 0xff ;
667
+ constexpr int repeat_count = (Info >> 24 ) & 0xff ;
683
668
return __esimd_dpas_inner<src1_precision, src2_precision, systolic_depth,
684
- repeat_count, T, T, T1, T2, N, N1, N2>(nullptr , src1,
685
- src2);
669
+ repeat_count, T, T, T1, T2, N, N1, N2>(nullptr ,
670
+ src1, src2);
686
671
}
672
+ #endif // !__SYCL_DEVICE_ONLY__
687
673
688
- template <__ESIMD_ENS::argument_type src1_precision,
689
- __ESIMD_ENS::argument_type src2_precision, int systolic_depth,
690
- int repeat_count, typename T, typename T1, typename T2, int N, int N1,
691
- int N2>
692
- inline __ESIMD_DNS::vector_type_t <T, N>
674
+ template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
675
+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
693
676
__esimd_dpasw (__ESIMD_DNS::vector_type_t <T, N> src0,
694
677
__ESIMD_DNS::vector_type_t <T1, N1> src1,
695
- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
678
+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
679
+ #ifdef __SYCL_DEVICE_ONLY__
680
+ ;
681
+ #else // !__SYCL_DEVICE_ONLY__
682
+ {
696
683
__ESIMD_UNSUPPORTED_ON_HOST;
697
684
return __ESIMD_DNS::vector_type_t <T, N>();
698
685
}
686
+ #endif // !__SYCL_DEVICE_ONLY__
699
687
700
- template <__ESIMD_ENS::argument_type src1_precision,
701
- __ESIMD_ENS::argument_type src2_precision, int systolic_depth,
702
- int repeat_count, typename T, typename T1, typename T2, int N, int N1,
703
- int N2>
704
- inline __ESIMD_DNS::vector_type_t <T, N>
705
- __esimd_dpasw2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
706
- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
688
+ template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
689
+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
690
+ __esimd_dpasw_nosrc0 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
691
+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
692
+ #ifdef __SYCL_DEVICE_ONLY__
693
+ ;
694
+ #else // !__SYCL_DEVICE_ONLY__
695
+ {
707
696
__ESIMD_UNSUPPORTED_ON_HOST;
708
697
return __ESIMD_DNS::vector_type_t <T, N>();
709
698
}
710
-
711
- #endif // #ifdef __SYCL_DEVICE_ONLY__
699
+ #endif // !__SYCL_DEVICE_ONLY__
712
700
713
701
#undef __ESIMD_raw_vec_t
714
702
#undef __ESIMD_cpp_vec_t
0 commit comments