@@ -800,18 +800,18 @@ load_direct_blocked(const ItemT &item, InputIteratorT input_iter,
800
800
// / work-group, guarded by range.
801
801
// /
802
802
// / \tparam T The data type to load.
803
+ // / \tparam DefaultT The type of default value to assign out-of-bound items.
803
804
// / \tparam ElementsPerWorkItem The number of consecutive elements partitioned
804
805
// / onto each work-item.
805
806
// / \tparam InputIteratorT The random-access iterator type for input \iterator.
806
- // / \tparam DefaultT The type of default value to assign out-of-bound items.
807
807
// / \tparam ItemT The sycl::nd_item index space class.
808
808
// / \param item The calling work-item.
809
809
// / \param input_iter The work-group's base input iterator for loading from.
810
810
// / \param data Data to load.
811
811
// / \param valid_items Number of valid items to load
812
812
// / \param default_value Default value to assign out-of-bound items.
813
- template <typename T, int ElementsPerWorkItem, typename InputIteratorT ,
814
- typename DefaultT , typename ItemT>
813
+ template <typename T, typename DefaultT, int ElementsPerWorkItem ,
814
+ typename InputIteratorT , typename ItemT>
815
815
__dpct_inline__ void
816
816
load_direct_striped (const ItemT &item, InputIteratorT input_iter,
817
817
T (&data)[ElementsPerWorkItem], int valid_items,
@@ -1241,23 +1241,24 @@ class group_load {
1241
1241
T (&data)[ElementsPerWorkItem], int valid_items,
1242
1242
DefaultT default_value) {
1243
1243
if constexpr (LoadAlgorithm == group_load_algorithm::blocked) {
1244
- load_direct_blocked<T, ElementsPerWorkItem, InputIteratorT, DefaultT ,
1244
+ load_direct_blocked<T, DefaultT, ElementsPerWorkItem, InputIteratorT ,
1245
1245
ItemT>(item, input_iter, data, valid_items,
1246
1246
default_value);
1247
1247
} else if constexpr (LoadAlgorithm == group_load_algorithm::striped) {
1248
- load_direct_striped<T, ElementsPerWorkItem, InputIteratorT, DefaultT ,
1248
+ load_direct_striped<T, DefaultT, ElementsPerWorkItem, InputIteratorT ,
1249
1249
ItemT>(item, input_iter, data, valid_items,
1250
1250
default_value);
1251
1251
} else if constexpr (LoadAlgorithm == group_load_algorithm::transpose) {
1252
- load_direct_striped<T, ElementsPerWorkItem, InputIteratorT, ItemT>(
1253
- item, input_iter, data, valid_items, default_value);
1252
+ load_direct_striped<T, DefaultT, ElementsPerWorkItem, InputIteratorT,
1253
+ ItemT>(item, input_iter, data, valid_items,
1254
+ default_value);
1254
1255
dpct::group::exchange<T, ElementsPerWorkItem>(_local_memory)
1255
1256
.striped_to_blocked (item, data, data);
1256
1257
} else if constexpr (LoadAlgorithm ==
1257
1258
group_load_algorithm::sub_group_transpose) {
1258
- load_direct_sub_group_striped<T, ElementsPerWorkItem, InputIteratorT ,
1259
- ItemT>(item, input_iter, data, valid_items,
1260
- default_value);
1259
+ load_direct_sub_group_striped<T, DefaultT, ElementsPerWorkItem ,
1260
+ InputIteratorT, ItemT>(
1261
+ item, input_iter, data, valid_items, default_value);
1261
1262
dpct::group::exchange<T, ElementsPerWorkItem>(_local_memory)
1262
1263
.sub_group_striped_to_blocked (item, data, data);
1263
1264
}
0 commit comments