[NFCI] Move sycl::vec::convert definition to vector_convert.hpp (#15160)

aelovikov-intel · web-flow · commit c603a7fde907 · 2024-08-22T10:25:19.000-07:00
1) It keeps all `convert`-related code in one place 
2) It allows not to include that extra functionality if this particular
   method isn't used
3) Potentially, it moves all instances where `half`/`bfloat16` needs to
   be "complete" in `vector.hpp`. Can't really verify that because
   `generic_type_traits.hpp` includes definitions for both at the moment
diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp
@@ -58,6 +58,7 @@
 #include <sycl/exception.hpp>                  // for errc
 
 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
+#include <sycl/vector.hpp>
 
 #ifndef __SYCL_DEVICE_ONLY__
 #include <cfenv> // for fesetround, fegetround
@@ -153,8 +154,6 @@ __imf_ushort_as_bfloat16(unsigned short x);
 
 namespace sycl {
 
-enum class rounding_mode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 };
-
 inline namespace _V1 {
 #ifndef __SYCL_DEVICE_ONLY__
 // TODO: Refactor includes so we can just "#include".
@@ -870,6 +869,98 @@ auto ConvertImpl(std::byte val) {
 }
 #endif
 
+// We interpret bool as int8_t, std::byte as uint8_t for conversion to other
+// types.
+template <typename T>
+using ConvertBoolAndByteT =
+    typename detail::map_type<T,
+#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
+                              std::byte, /*->*/ std::uint8_t, //
+#endif
+                              bool, /*->*/ std::uint8_t, //
+                              T, /*->*/ T                //
+                              >::type;
 } // namespace detail
+
+template <typename DataT, int NumElements>
+template <typename convertT, rounding_mode roundingMode>
+vec<convertT, NumElements> vec<DataT, NumElements>::convert() const {
+  using T = detail::ConvertBoolAndByteT<DataT>;
+  using R = detail::ConvertBoolAndByteT<convertT>;
+  using bfloat16 = sycl::ext::oneapi::bfloat16;
+  static_assert(std::is_integral_v<R> || detail::is_floating_point<R>::value ||
+                    std::is_same_v<R, bfloat16>,
+                "Unsupported convertT");
+
+  using OpenCLT = detail::ConvertToOpenCLType_t<T>;
+  using OpenCLR = detail::ConvertToOpenCLType_t<R>;
+  vec<convertT, NumElements> Result;
+
+  // convertImpl can't be called with the same From and To types and therefore
+  // we need some special processing in a few cases.
+  if constexpr (std::is_same_v<DataT, convertT>) {
+    return *this;
+  } else if constexpr (std::is_same_v<OpenCLT, OpenCLR> ||
+                       std::is_same_v<T, R>) {
+    for (size_t I = 0; I < NumElements; ++I)
+      Result[I] = static_cast<convertT>(getValue(I));
+    return Result;
+  } else {
+
+#ifdef __SYCL_DEVICE_ONLY__
+    using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
+    using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));
+
+    auto NativeVector = sycl::bit_cast<vector_t>(*this);
+    using ConvertTVecType = typename vec<convertT, NumElements>::vector_t;
+
+    // Whole vector conversion can only be done, if:
+    constexpr bool canUseNativeVectorConvert =
+#ifdef __NVPTX__
+        //  TODO: Likely unnecessary as
+        //  https://github.com/intel/llvm/issues/11840 has been closed
+        //  already.
+        false &&
+#endif
+        NumElements > 1 &&
+        // - vec storage has an equivalent OpenCL native vector it is
+        //   implicitly convertible to. There are some corner cases where it
+        //   is not the case with char, long and long long types.
+        std::is_convertible_v<vector_t, OpenCLVecT> &&
+        std::is_convertible_v<ConvertTVecType, OpenCLVecR> &&
+        // - it is not a signed to unsigned (or vice versa) conversion
+        //   see comments within 'convertImpl' for more details;
+        !detail::is_sint_to_from_uint<T, R>::value &&
+        // - destination type is not bool. bool is stored as integer under the
+        //   hood and therefore conversion to bool looks like conversion
+        //   between two integer types. Since bit pattern for true and false
+        //   is not defined, there is no guarantee that integer conversion
+        //   yields right results here;
+        !std::is_same_v<convertT, bool>;
+
+    if constexpr (canUseNativeVectorConvert) {
+      auto val = detail::convertImpl<T, R, roundingMode, NumElements,
+                                     OpenCLVecT, OpenCLVecR>(NativeVector);
+      Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
+    } else
+#endif // __SYCL_DEVICE_ONLY__
+    {
+      // Otherwise, we fallback to per-element conversion:
+      for (size_t I = 0; I < NumElements; ++I) {
+        auto val = detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
+            getValue(I));
+#ifdef __SYCL_DEVICE_ONLY__
+        // On device, we interpret BF16 as uint16.
+        if constexpr (std::is_same_v<convertT, bfloat16>)
+          Result[I] = sycl::bit_cast<convertT>(val);
+        else
+#endif
+          Result[I] = static_cast<convertT>(val);
+      }
+    }
+  }
+  return Result;
+}
+
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/include/sycl/types.hpp b/sycl/include/sycl/types.hpp
@@ -21,6 +21,9 @@
 #include <sycl/half_type.hpp>                  // for StorageT, half, Vec16...
 #include <sycl/marray.hpp>                     // for __SYCL_BINOP, __SYCL_...
 #include <sycl/multi_ptr.hpp>                  // for multi_ptr
-#include <sycl/vector.hpp>                     // for sycl::vec and swizzles
+
+#include <sycl/vector.hpp>
+
+#include <sycl/detail/vector_convert.hpp>
 
 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp
@@ -36,7 +36,6 @@
 #include <sycl/detail/type_list.hpp>           // for is_contained
 #include <sycl/detail/type_traits.hpp>         // for is_floating_point
 #include <sycl/detail/vector_arith.hpp>
-#include <sycl/detail/vector_convert.hpp>      // for convertImpl
 #include <sycl/half_type.hpp>                  // for StorageT, half, Vec16...
 
 #include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
@@ -53,6 +52,10 @@
 #include <utility>     // for index_sequence, make_...
 
 namespace sycl {
+
+// TODO: Fix in the next ABI breaking windows.
+enum class rounding_mode { automatic = 0, rte = 1, rtz = 2, rtp = 3, rtn = 4 };
+
 inline namespace _V1 {
 
 struct elem {
@@ -406,18 +409,6 @@ class __SYCL_EBO vec
   static constexpr size_t byte_size() noexcept { return sizeof(m_Data); }
 
 private:
-  // We interpret bool as int8_t, std::byte as uint8_t for conversion to other
-  // types.
-  template <typename T>
-  using ConvertBoolAndByteT =
-      typename detail::map_type<T,
-#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0)
-                                std::byte, /*->*/ std::uint8_t, //
-#endif
-                                bool, /*->*/ std::uint8_t, //
-                                T, /*->*/ T                //
-                                >::type;
-
   // getValue should be able to operate on different underlying
   // types: enum cl_float#N , builtin vector float#N, builtin type float.
   constexpr auto getValue(int Index) const {
@@ -439,88 +430,10 @@ class __SYCL_EBO vec
   }
 
 public:
+  // Out-of-class definition is in `sycl/detail/vector_convert.hpp`
   template <typename convertT,
             rounding_mode roundingMode = rounding_mode::automatic>
-  vec<convertT, NumElements> convert() const {
-
-    using T = ConvertBoolAndByteT<DataT>;
-    using R = ConvertBoolAndByteT<convertT>;
-    using bfloat16 = sycl::ext::oneapi::bfloat16;
-    static_assert(std::is_integral_v<R> ||
-                      detail::is_floating_point<R>::value ||
-                      std::is_same_v<R, bfloat16>,
-                  "Unsupported convertT");
-
-    using OpenCLT = detail::ConvertToOpenCLType_t<T>;
-    using OpenCLR = detail::ConvertToOpenCLType_t<R>;
-    vec<convertT, NumElements> Result;
-
-    // convertImpl can't be called with the same From and To types and therefore
-    // we need some special processing in a few cases.
-    if constexpr (std::is_same_v<DataT, convertT>) {
-      return *this;
-    } else if constexpr (std::is_same_v<OpenCLT, OpenCLR> ||
-                         std::is_same_v<T, R>) {
-      for (size_t I = 0; I < NumElements; ++I)
-        Result[I] = static_cast<convertT>(getValue(I));
-      return Result;
-    } else {
-
-#ifdef __SYCL_DEVICE_ONLY__
-      using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements)));
-      using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements)));
-
-      auto NativeVector = sycl::bit_cast<vector_t>(*this);
-      using ConvertTVecType = typename vec<convertT, NumElements>::vector_t;
-
-      // Whole vector conversion can only be done, if:
-      constexpr bool canUseNativeVectorConvert =
-#ifdef __NVPTX__
-          //  TODO: Likely unnecessary as
-          //  https://github.com/intel/llvm/issues/11840 has been closed
-          //  already.
-          false &&
-#endif
-          NumElements > 1 &&
-          // - vec storage has an equivalent OpenCL native vector it is
-          //   implicitly convertible to. There are some corner cases where it
-          //   is not the case with char, long and long long types.
-          std::is_convertible_v<vector_t, OpenCLVecT> &&
-          std::is_convertible_v<ConvertTVecType, OpenCLVecR> &&
-          // - it is not a signed to unsigned (or vice versa) conversion
-          //   see comments within 'convertImpl' for more details;
-          !detail::is_sint_to_from_uint<T, R>::value &&
-          // - destination type is not bool. bool is stored as integer under the
-          //   hood and therefore conversion to bool looks like conversion
-          //   between two integer types. Since bit pattern for true and false
-          //   is not defined, there is no guarantee that integer conversion
-          //   yields right results here;
-          !std::is_same_v<convertT, bool>;
-
-      if constexpr (canUseNativeVectorConvert) {
-        auto val = detail::convertImpl<T, R, roundingMode, NumElements, OpenCLVecT,
-                                OpenCLVecR>(NativeVector);
-        Result.m_Data = sycl::bit_cast<decltype(Result.m_Data)>(val);
-      } else
-#endif // __SYCL_DEVICE_ONLY__
-      {
-        // Otherwise, we fallback to per-element conversion:
-        for (size_t I = 0; I < NumElements; ++I) {
-          auto val =
-              detail::convertImpl<T, R, roundingMode, 1, OpenCLT, OpenCLR>(
-                  getValue(I));
-#ifdef __SYCL_DEVICE_ONLY__
-          // On device, we interpret BF16 as uint16.
-          if constexpr (std::is_same_v<convertT, bfloat16>)
-            Result[I] = sycl::bit_cast<convertT>(val);
-          else
-#endif
-            Result[I] = static_cast<convertT>(val);
-        }
-      }
-    }
-    return Result;
-  }
+  vec<convertT, NumElements> convert() const;
 
   template <typename asT> asT as() const { return sycl::bit_cast<asT>(*this); }