@@ -322,7 +322,7 @@ constexpr void check_lsc_atomic() {
322
322
// / @param pred is predicates.
323
323
// / @return is a vector of type T and size N * NElts
324
324
// /
325
- template <typename T, uint8_t NElts = 1 ,
325
+ template <typename T, int NElts = 1 ,
326
326
lsc_data_size DS = lsc_data_size::default_size, int N>
327
327
__ESIMD_API __ESIMD_NS::simd<T, N * NElts>
328
328
lsc_slm_gather (__ESIMD_NS::simd<uint32_t , N> offsets,
@@ -357,8 +357,7 @@ lsc_slm_gather(__ESIMD_NS::simd<uint32_t, N> offsets,
357
357
// / @param offset is the zero-based offset for SLM buffer in bytes.
358
358
// / @return is a vector of type T and size NElts
359
359
// /
360
- template <typename T, uint8_t NElts = 1 ,
361
- lsc_data_size DS = lsc_data_size::default_size>
360
+ template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size>
362
361
__ESIMD_API __ESIMD_NS::simd<T, NElts> lsc_slm_block_load (uint32_t offset) {
363
362
detail::check_lsc_vector_size<NElts>();
364
363
detail::check_lsc_data_size<T, DS>();
@@ -396,10 +395,9 @@ __ESIMD_API __ESIMD_NS::simd<T, NElts> lsc_slm_block_load(uint32_t offset) {
396
395
// / @param pred is predicates.
397
396
// / @return is a vector of type T and size N * NElts
398
397
// /
399
- template <typename T, uint8_t NElts = 1 ,
400
- lsc_data_size DS = lsc_data_size::default_size,
401
- cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
402
- int N>
398
+ template <
399
+ typename T, int NElts = 1 , lsc_data_size DS = lsc_data_size::default_size,
400
+ cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N>
403
401
__ESIMD_API __ESIMD_NS::simd<T, N * NElts>
404
402
lsc_gather (const T *p, __ESIMD_NS::simd<uint32_t , N> offsets,
405
403
__ESIMD_NS::simd_mask<N> pred = 1 ) {
@@ -442,7 +440,7 @@ lsc_gather(const T *p, __ESIMD_NS::simd<uint32_t, N> offsets,
442
440
// / @param pred is predicates.
443
441
// / @return is a vector of type T and size N * NElts
444
442
// /
445
- template <typename T, uint8_t NElts = 1 ,
443
+ template <typename T, int NElts = 1 ,
446
444
lsc_data_size DS = lsc_data_size::default_size,
447
445
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
448
446
int N, typename AccessorTy>
@@ -490,8 +488,7 @@ lsc_gather(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
490
488
// / the operation.
491
489
// / @return is a vector of type T and size NElts
492
490
// /
493
- template <typename T, uint8_t NElts = 1 ,
494
- lsc_data_size DS = lsc_data_size::default_size,
491
+ template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
495
492
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
496
493
__ESIMD_API __ESIMD_NS::simd<T, NElts>
497
494
lsc_block_load (const T *p, __ESIMD_NS::simd_mask<1 > pred = 1 ) {
@@ -533,8 +530,7 @@ lsc_block_load(const T *p, __ESIMD_NS::simd_mask<1> pred = 1) {
533
530
// / the operation.
534
531
// / @return is a vector of type T and size NElts
535
532
// /
536
- template <typename T, uint8_t NElts = 1 ,
537
- lsc_data_size DS = lsc_data_size::default_size,
533
+ template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
538
534
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
539
535
typename AccessorTy>
540
536
__ESIMD_API std::enable_if_t <!std::is_pointer<AccessorTy>::value,
@@ -580,10 +576,9 @@ lsc_block_load(AccessorTy acc, uint32_t offset,
580
576
// / @param offsets is the zero-based offsets in bytes.
581
577
// / @param pred is predicates.
582
578
// /
583
- template <typename T, uint8_t NElts = 1 ,
584
- lsc_data_size DS = lsc_data_size::default_size,
585
- cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
586
- int N>
579
+ template <
580
+ typename T, int NElts = 1 , lsc_data_size DS = lsc_data_size::default_size,
581
+ cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N>
587
582
__ESIMD_API void lsc_prefetch (const T *p, __ESIMD_NS::simd<uint32_t , N> offsets,
588
583
__ESIMD_NS::simd_mask<N> pred = 1 ) {
589
584
detail::check_lsc_vector_size<NElts>();
@@ -617,7 +612,7 @@ __ESIMD_API void lsc_prefetch(const T *p, __ESIMD_NS::simd<uint32_t, N> offsets,
617
612
// / @tparam L3H is L3 cache hint.
618
613
// / @param p is the base pointer.
619
614
// /
620
- template <typename T, uint8_t NElts = 1 ,
615
+ template <typename T, int NElts = 1 ,
621
616
lsc_data_size DS = lsc_data_size::default_size,
622
617
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
623
618
__ESIMD_API void lsc_prefetch (const T *p) {
@@ -658,7 +653,7 @@ __ESIMD_API void lsc_prefetch(const T *p) {
658
653
// / @param offsets is the zero-based offsets in bytes.
659
654
// / @param pred is predicates.
660
655
// /
661
- template <typename T, uint8_t NElts = 1 ,
656
+ template <typename T, int NElts = 1 ,
662
657
lsc_data_size DS = lsc_data_size::default_size,
663
658
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
664
659
int N, typename AccessorTy>
@@ -701,7 +696,7 @@ lsc_prefetch(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
701
696
// / @param acc is the SYCL accessor.
702
697
// / @param offset is the zero-based offset in bytes.
703
698
// /
704
- template <typename T, uint8_t NElts = 1 ,
699
+ template <typename T, int NElts = 1 ,
705
700
lsc_data_size DS = lsc_data_size::default_size,
706
701
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
707
702
typename AccessorTy>
@@ -746,7 +741,7 @@ lsc_prefetch(AccessorTy acc, uint32_t offset) {
746
741
// / @param vals is values to store.
747
742
// / @param pred is predicates.
748
743
// /
749
- template <typename T, uint8_t NElts = 1 ,
744
+ template <typename T, int NElts = 1 ,
750
745
lsc_data_size DS = lsc_data_size::default_size, int N>
751
746
__ESIMD_API void lsc_slm_scatter (__ESIMD_NS::simd<uint32_t , N> offsets,
752
747
__ESIMD_NS::simd<T, N * NElts> vals,
@@ -780,8 +775,7 @@ __ESIMD_API void lsc_slm_scatter(__ESIMD_NS::simd<uint32_t, N> offsets,
780
775
// / @param offset is the zero-based offset for SLM buffer in bytes.
781
776
// / @param vals is values to store.
782
777
// /
783
- template <typename T, uint8_t NElts = 1 ,
784
- lsc_data_size DS = lsc_data_size::default_size>
778
+ template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size>
785
779
__ESIMD_API void lsc_slm_block_store (uint32_t offset,
786
780
__ESIMD_NS::simd<T, NElts> vals) {
787
781
detail::check_lsc_vector_size<NElts>();
@@ -819,10 +813,9 @@ __ESIMD_API void lsc_slm_block_store(uint32_t offset,
819
813
// / @param vals is values to store.
820
814
// / @param pred is predicates.
821
815
// /
822
- template <typename T, uint8_t NElts = 1 ,
823
- lsc_data_size DS = lsc_data_size::default_size,
824
- cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
825
- int N>
816
+ template <
817
+ typename T, int NElts = 1 , lsc_data_size DS = lsc_data_size::default_size,
818
+ cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, int N>
826
819
__ESIMD_API void lsc_scatter (T *p, __ESIMD_NS::simd<uint32_t , N> offsets,
827
820
__ESIMD_NS::simd<T, N * NElts> vals,
828
821
__ESIMD_NS::simd_mask<N> pred = 1 ) {
@@ -864,7 +857,7 @@ __ESIMD_API void lsc_scatter(T *p, __ESIMD_NS::simd<uint32_t, N> offsets,
864
857
// / @param vals is values to store.
865
858
// / @param pred is predicates.
866
859
// /
867
- template <typename T, uint8_t NElts = 1 ,
860
+ template <typename T, int NElts = 1 ,
868
861
lsc_data_size DS = lsc_data_size::default_size,
869
862
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
870
863
int N, typename AccessorTy>
@@ -913,8 +906,7 @@ lsc_scatter(AccessorTy acc, __ESIMD_NS::simd<uint32_t, N> offsets,
913
906
// / entirely, non-zero - operation is performed. The default is '1' - perform
914
907
// / the operation.
915
908
// /
916
- template <typename T, uint8_t NElts = 1 ,
917
- lsc_data_size DS = lsc_data_size::default_size,
909
+ template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
918
910
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none>
919
911
__ESIMD_API void lsc_block_store (T *p, __ESIMD_NS::simd<T, NElts> vals,
920
912
__ESIMD_NS::simd_mask<1 > pred = 1 ) {
@@ -955,8 +947,7 @@ __ESIMD_API void lsc_block_store(T *p, __ESIMD_NS::simd<T, NElts> vals,
955
947
// / entirely, non-zero - operation is performed. The default is '1' - perform
956
948
// / the operation.
957
949
// /
958
- template <typename T, uint8_t NElts = 1 ,
959
- lsc_data_size DS = lsc_data_size::default_size,
950
+ template <typename T, int NElts, lsc_data_size DS = lsc_data_size::default_size,
960
951
cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,
961
952
typename AccessorTy>
962
953
__ESIMD_API std::enable_if_t <!std::is_pointer<AccessorTy>::value>
0 commit comments