Skip to content

Commit 2b4e950

Browse files
authored
[SYCL] Work around codegen issue with vectors-of-3 (#19087)
Vectors of length 3 have an adjusted length of 4 where the last element is poison. Copying that poison element results in wrong code being generated. Avoid that by only copying the first 3 elements.
1 parent ca2285e commit 2b4e950

File tree

3 files changed

+32
-6
lines changed

3 files changed

+32
-6
lines changed

sycl/include/sycl/detail/builtins/helper_macros.hpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,13 +192,26 @@
192192
template <NUM_ARGS##_TYPENAME_TYPE> \
193193
detail::ENABLER<NUM_ARGS##_TEMPLATE_TYPE>(NAME)( \
194194
NUM_ARGS##_TEMPLATE_TYPE_ARG) { \
195+
using ToTy = detail::ENABLER<NUM_ARGS##_TEMPLATE_TYPE>; \
195196
if constexpr (detail::is_marray_v<T0>) { \
196197
return detail::DELEGATOR( \
197198
[](NUM_ARGS##_AUTO_ARG) { return (NS::NAME)(NUM_ARGS##_ARG); }, \
198199
NUM_ARGS##_ARG); \
200+
} else if constexpr (detail::is_vec_v<ToTy>) { \
201+
if constexpr (ToTy::size() == 3) { \
202+
/* For vectors of length 3, make sure to only copy 3 elements, not 4, \
203+
to work around code generation issues, see LLVM #144454. */ \
204+
auto From = __VA_ARGS__(NUM_ARGS##_CONVERTED_ARG); \
205+
ToTy To; \
206+
constexpr auto N = \
207+
ToTy::size() * sizeof(detail::get_elem_type_t<ToTy>); \
208+
sycl::detail::memcpy_no_adl(&To, &From, N); \
209+
return To; \
210+
} else { \
211+
return bit_cast<ToTy>(__VA_ARGS__(NUM_ARGS##_CONVERTED_ARG)); \
212+
} \
199213
} else { \
200-
return bit_cast<detail::ENABLER<NUM_ARGS##_TEMPLATE_TYPE>>( \
201-
__VA_ARGS__(NUM_ARGS##_CONVERTED_ARG)); \
214+
return bit_cast<ToTy>(__VA_ARGS__(NUM_ARGS##_CONVERTED_ARG)); \
202215
} \
203216
}
204217

sycl/include/sycl/detail/builtins/math_functions.inc

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,19 @@ auto builtin_delegate_ptr_impl(FuncTy F, PtrTy p, Ts... xs) {
254254
detail::NON_SCALAR_ENABLER<SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE), \
255255
PtrTy> \
256256
NAME(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE_ARG), PtrTy p) { \
257-
return bit_cast<detail::NON_SCALAR_ENABLER< \
258-
SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE), PtrTy>>( \
259-
detail::NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _ARG), p)); \
257+
auto From = detail::NAME##_impl(SYCL_CONCAT(LESS_ONE(NUM_ARGS), _ARG), p); \
258+
using ToTy = detail::NON_SCALAR_ENABLER< \
259+
SYCL_CONCAT(LESS_ONE(NUM_ARGS), _TEMPLATE_TYPE), PtrTy>; \
260+
if constexpr (ToTy::size() == 3) { \
261+
/* For vectors of length 3, make sure to only copy 3 elements, not 4, to \
262+
work around code generation issues, see LLVM #144454. */ \
263+
ToTy To; \
264+
constexpr auto N = ToTy::size() * sizeof(detail::get_elem_type_t<ToTy>); \
265+
sycl::detail::memcpy_no_adl(&To, &From, N); \
266+
return To; \
267+
} else { \
268+
return bit_cast<ToTy>(From); \
269+
} \
260270
}
261271

262272
#if __SYCL_DEVICE_ONLY__

sycl/include/sycl/vector.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,12 +530,15 @@ class __SYCL_EBO vec :
530530
// `vector_t` is the same as `DataT`. Not that the other ctor isn't a template
531531
// so we don't even need a smart `enable_if` condition here, the mere fact of
532532
// this being a template makes the other ctor preferred.
533+
// For vectors of length 3, make sure to only copy 3 elements, not 4, to work
534+
// around code generation issues, see LLVM #144454.
533535
template <
534536
typename vector_t_ = vector_t,
535537
typename = typename std::enable_if_t<std::is_same_v<vector_t_, vector_t>>>
536538
constexpr vec(vector_t_ openclVector) {
537539
sycl::detail::memcpy_no_adl(&this->m_Data, &openclVector,
538-
sizeof(openclVector));
540+
NumElements *
541+
sizeof(element_type_for_vector_t));
539542
}
540543

541544
/* @SYCL2020

0 commit comments

Comments
 (0)