From 7be1142f71afbc1b6dcf951afa05c78d55af596a Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 9 Jun 2025 09:59:25 +0100 Subject: [PATCH 1/5] =?UTF-8?q?Reapply=20"[libspirv]=20Define=20schar=20ov?= =?UTF-8?q?erloads=20via=20remangling;=20not=20source=20=E2=80=A6=20(#1882?= =?UTF-8?q?1)"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reapplies commit 23584c1991587815e63d95404337eb2f1faeea29. It also includes changes from #18807 which attempt to address the issues that led to the original revert. We were previously achieving the signed char builtin definitions in libspirv via one of two ways. The first was explicitly definining schar overloads of builtins in the source. The second was by remangling 'char' builtins to one of schar or uchar, depending on the host platform. Since we are defining our builtins in OpenCL C, the plain 'char' type is already a signed type. This presents us with the opportunity to achieve our desired schar builtins solely through remangling. The primary idea is to reduce our libclc/libspirv diff with upstream. We also have the option to introduce signed char builtins upstream. As it stands the schar problem isn't far from the 'half' mangling problem that we also now deal with purely in the remangler. --- clang/lib/Sema/SPIRVBuiltins.td | 49 +++-- ...spirv-builtin-lookup-group-non-uniform.cpp | 66 ++++-- .../spirv-builtin-lookup-group.cl | 4 +- libclc/clc/include/clc/clc_as_type.h | 7 - libclc/clc/include/clc/clc_convert.h | 2 - libclc/clc/include/clc/clctypes.h | 7 - libclc/clc/include/clc/integer/gentype.inc | 198 +---------------- libclc/clc/include/clc/math/gentype.inc | 4 - libclc/clc/lib/generic/integer/clc_clz.cl | 5 - libclc/clc/lib/generic/integer/clc_ctz.cl | 5 - libclc/clc/lib/generic/integer/clc_mad_sat.cl | 1 - libclc/clc/lib/generic/integer/clc_mul_hi.cl | 1 - libclc/clc/lib/generic/misc/clc_shuffle.cl | 1 - libclc/clc/lib/generic/misc/clc_shuffle2.cl | 1 - .../include/libspirv/async/gentype.inc | 40 ---- libclc/libspirv/include/libspirv/lp64_types.h | 12 +- .../libspirv/ptx-nvidiacl/async/gentype.inc | 23 -- libclc/libspirv/lib/generic/gen_convert.py | 42 +++- .../lib/generic/gen_convert_common.py | 10 +- .../lib/generic/geometric/normalize.cl | 7 +- .../libspirv/lib/generic/integer/upsample.cl | 1 - libclc/libspirv/lib/generic/math/fract.cl | 1 + libclc/libspirv/lib/generic/math/fract.inc | 15 +- libclc/libspirv/lib/generic/math/maxmag.cl | 1 + libclc/libspirv/lib/generic/math/maxmag.inc | 25 +-- libclc/libspirv/lib/generic/math/minmag.cl | 1 + libclc/libspirv/lib/generic/math/minmag.inc | 19 +- libclc/libspirv/lib/generic/relational/all.cl | 2 +- libclc/libspirv/lib/generic/relational/any.cl | 2 +- .../generic/relational/genbinrelational.inc | 6 +- .../lib/generic/relational/genunary.inc | 6 +- libclc/libspirv/lib/generic/shared/vload.cl | 2 +- libclc/libspirv/lib/generic/shared/vstore.cl | 2 +- .../lib/native_cpu/integer/popcount.cl | 3 - .../async/async_work_group_strided_copy.cl | 6 - .../lib/ptx-nvidiacl/integer/mul_hi.cl | 3 - .../lib/ptx-nvidiacl/relational/isfinite.cl | 6 +- .../lib/ptx-nvidiacl/relational/isinf.cl | 6 +- .../lib/ptx-nvidiacl/relational/isnan.cl | 6 +- libclc/opencl/include/clc/opencl/as_type.h | 6 - libclc/opencl/include/clc/opencl/clc.h | 2 - libclc/utils/gen_convert.py | 11 +- .../libclc-remangler/LibclcRemangler.cpp | 2 + sycl/include/sycl/detail/vector_convert.hpp | 92 ++++---- sycl/test/check_device_code/char_builtins.cpp | 207 ++++++++++++++++++ 45 files changed, 421 insertions(+), 497 deletions(-) create mode 100644 sycl/test/check_device_code/char_builtins.cpp diff --git a/clang/lib/Sema/SPIRVBuiltins.td b/clang/lib/Sema/SPIRVBuiltins.td index ef3d18a682c58..e226e3b223e17 100644 --- a/clang/lib/Sema/SPIRVBuiltins.td +++ b/clang/lib/Sema/SPIRVBuiltins.td @@ -321,8 +321,7 @@ class ConstOCLSPVBuiltin _Signature> : // OpenCL v1.0/1.2/2.0 s6.1.1: Built-in Scalar Data Types. def Bool : IntType<"bool", QualType<"Context.BoolTy">, 1>; -def TrueChar : IntType<"_char", QualType<"Context.CharTy", 0, 1>, 8>; -def Char : IntType<"char", QualType<"Context.SignedCharTy", 0, 1>, 8>; +def Char : IntType<"char", QualType<"Context.CharTy", 0, 1>, 8>; def SChar : IntType<"schar", QualType<"Context.SignedCharTy", 0, 1>, 8>; def UChar : UIntType<"uchar", QualType<"Context.UnsignedCharTy">, 8>; def Short : IntType<"short", QualType<"Context.ShortTy", 0, 1>, 16>; @@ -356,19 +355,20 @@ def Vec16 : IntList<"Vec16", [16]>; def Vec1234 : IntList<"Vec1234", [1, 2, 3, 4]>; // Type lists. -def TLAll : TypeList<[Char, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; -def TLAllUnsigned : TypeList<[UChar, UChar, UShort, UShort, UInt, UInt, ULong, ULong, UInt, ULong, UShort]>; -def TLAllWithBool : TypeList<[Bool, Char, UChar, Short, UShort, Int, UInt, Long, +def TLAll : TypeList<[Char, SChar, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; +def TLAllUnsigned : TypeList<[UChar, UChar, UChar, UShort, UShort, UInt, UInt, ULong, ULong, UInt, ULong, UShort]>; +def TLAllWithBool : TypeList<[Bool, Char, SChar, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; def TLFloat : TypeList<[Float, Double, Half]>; // FIXME: handle properly char (signed or unsigned depending on host) -def TLSignedInts : TypeList<[Char, Short, Int, Long]>; -def TLUnsignedInts : TypeList<[UChar, UShort, UInt, ULong]>; +def TLSignedInts : TypeList<[Char, SChar, Short, Int, Long]>; +def TLUIToSIInts : TypeList<[UChar, UChar, UShort, UInt, ULong]>; +def TLUnsignedInts : TypeList<[UChar, UShort, UInt, ULong]>; // Signed to Unsigned conversion // FIXME: handle properly char (signed or unsigned depending on host) -def TLSToUSignedInts : TypeList<[Char, Short, Int, Long]>; -def TLSToUUnsignedInts : TypeList<[UChar, UShort, UInt, ULong]>; +def TLSToUSignedInts : TypeList<[Char, SChar, Short, Int, Long]>; +def TLSToUUnsignedInts : TypeList<[UChar, UChar, UShort, UInt, ULong]>; def TLIntLongFloats : TypeList<[Int, UInt, Long, ULong, Float, Double, Half]>; @@ -377,7 +377,7 @@ def TLIntLongFloats : TypeList<[Int, UInt, Long, ULong, Float, Double, Half]>; // uchar abs(uchar). def TLAllUIntsTwice : TypeList<[UChar, UChar, UChar, UShort, UShort, UInt, UInt, ULong, ULong]>; -def TLAllInts : TypeList<[Char, UChar, Short, UShort, Int, UInt, Long, ULong]>; +def TLAllInts : TypeList<[Char, SChar, UChar, Short, UShort, Int, UInt, Long, ULong]>; // GenType definitions for multiple base types (e.g. all floating point types, // or all integer types). @@ -394,6 +394,8 @@ def AIGenType1 : GenericType<"AIGenType1", TLAllInts, Vec1>; def AIGenTypeN : GenericType<"AIGenTypeN", TLAllInts, VecAndScalar>; def AUIGenTypeN : GenericType<"AUIGenTypeN", TLUnsignedInts, VecAndScalar>; def ASIGenTypeN : GenericType<"ASIGenTypeN", TLSignedInts, VecAndScalar>; +// unsigned integers matching 1:1 with signed ints +def AUIToSIGenTypeN : GenericType<"AUIToSIGenTypeN", TLUIToSIInts, VecAndScalar>; def AIGenTypeNNoScalar : GenericType<"AIGenTypeNNoScalar", TLAllInts, VecNoScalar>; // All integer to unsigned def AI2UGenTypeN : GenericType<"AI2UGenTypeN", TLAllUIntsTwice, VecAndScalar>; @@ -403,6 +405,7 @@ def SGenTypeN : GenericType<"SGenTypeN", TLSignedInts, VecAndScalar // Unsigned integer def UGenType1 : GenericType<"UGenType1", TLUnsignedInts, Vec1>; def UGenTypeN : GenericType<"UGenTypeN", TLUnsignedInts, VecAndScalar>; +def UToSGenTypeN : GenericType<"UToSGenTypeN", TLUIToSIInts, VecAndScalar>; def UInt4 : GenericType<"UInt4", TypeList<[UInt]>, Vec4>; // Float def FGenType1 : GenericType<"FGenType1", TLFloat, Vec1>; @@ -412,7 +415,7 @@ def IntLongFloatGenType1 : GenericType<"IntLongFloatGenType1", TLIntLongFloats // GenType definitions for every single base type (e.g. fp32 only). // Names are like: GenTypeFloatVecAndScalar. -foreach Type = [Char, UChar, SChar, Short, UShort, +foreach Type = [Char, SChar, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half] in { foreach VecSizes = [VecAndScalar, VecNoScalar] in { @@ -550,9 +553,9 @@ foreach name = ["clz", "ctz", "popcount"] in { def : ConstOCLSPVBuiltin<"rotate", [AIGenTypeN, AIGenTypeN, AIGenTypeN]>; -def : ConstOCLSPVBuiltin<"s_abs", [AUIGenTypeN, ASIGenTypeN]>; +def : ConstOCLSPVBuiltin<"s_abs", [AUIToSIGenTypeN, ASIGenTypeN]>; -def : ConstOCLSPVBuiltin<"s_abs_diff", [AUIGenTypeN, ASIGenTypeN, ASIGenTypeN]>; +def : ConstOCLSPVBuiltin<"s_abs_diff", [AUIToSIGenTypeN, ASIGenTypeN, ASIGenTypeN]>; foreach name = ["s_add_sat", "s_hadd", "s_rhadd", @@ -652,9 +655,9 @@ def : ConstOCLSPVBuiltin<"bitselect", [AGenTypeN, AGenTypeN, AGenTypeN, AGenType foreach name = ["select"] in { def : ConstOCLSPVBuiltin; - def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; def : ConstOCLSPVBuiltin; - def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; def : ConstOCLSPVBuiltin; def : ConstOCLSPVBuiltin; def : ConstOCLSPVBuiltin; @@ -872,16 +875,16 @@ foreach name = ["Dot"] in { } foreach name = ["Any", "All"] in { - def : SPVBuiltin; + def : SPVBuiltin; } foreach name = ["IsNan", "IsInf", "IsFinite", "IsNormal", "SignBitSet"] in { def : SPVBuiltin; def : SPVBuiltin; def : SPVBuiltin; - def : SPVBuiltin; - def : SPVBuiltin; - def : SPVBuiltin; + def : SPVBuiltin; + def : SPVBuiltin; + def : SPVBuiltin; } foreach name = ["LessOrGreater", @@ -895,9 +898,9 @@ foreach name = ["LessOrGreater", def : SPVBuiltin; def : SPVBuiltin; def : SPVBuiltin; - def : SPVBuiltin; - def : SPVBuiltin; - def : SPVBuiltin; + def : SPVBuiltin; + def : SPVBuiltin; + def : SPVBuiltin; } foreach name = ["BitCount"] in { @@ -989,7 +992,7 @@ foreach name = ["GroupLogicalAndKHR", "GroupLogicalOrKHR"] in { def SubgroupShuffleINTELVecType : GenericType<"SubgroupShuffleINTELVecType", - TypeList<[Char, UChar, Short, UShort, Int, UInt, Float]>, + TypeList<[Char, SChar, UChar, Short, UShort, Int, UInt, Float]>, VecNoScalar>; foreach name = ["SubgroupShuffleINTEL", "SubgroupShuffleXorINTEL"] in { diff --git a/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group-non-uniform.cpp b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group-non-uniform.cpp index 08001cdda8bab..f9ae499c5b7e7 100644 --- a/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group-non-uniform.cpp +++ b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group-non-uniform.cpp @@ -189,6 +189,7 @@ template void test_logical() { void test() { test_with_bool(); test_with_bool(); + test_with_bool(); test_with_bool(); test_with_bool(); test_with_bool(); @@ -199,6 +200,7 @@ void test() { test_with_bool(); test_integer(); + test_integer(); test_integer(); test_integer(); test_integer(); @@ -206,6 +208,7 @@ void test() { test_integer(); test_signed(); + test_signed(); test_signed(); test_signed(); @@ -232,13 +235,20 @@ void test() { // CHECK: call noundef zeroext i1 @_Z33__spirv_GroupNonUniformShuffleXoribj // CHECK: call noundef zeroext i1 @_Z32__spirv_GroupNonUniformShuffleUpibj // CHECK: call noundef zeroext i1 @_Z34__spirv_GroupNonUniformShuffleDownibj -// CHECK: call noundef zeroext i1 @_Z31__spirv_GroupNonUniformAllEqualii -// CHECK: call noundef i32 @_Z32__spirv_GroupNonUniformBroadcastiij -// CHECK: call noundef i32 @_Z37__spirv_GroupNonUniformBroadcastFirstii -// CHECK: call noundef i32 @_Z30__spirv_GroupNonUniformShuffleiij -// CHECK: call noundef i32 @_Z33__spirv_GroupNonUniformShuffleXoriij -// CHECK: call noundef i32 @_Z32__spirv_GroupNonUniformShuffleUpiij -// CHECK: call noundef i32 @_Z34__spirv_GroupNonUniformShuffleDowniij +// CHECK: call noundef zeroext i1 @_Z31__spirv_GroupNonUniformAllEqualic +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformBroadcasticj +// CHECK: call noundef signext i8 @_Z37__spirv_GroupNonUniformBroadcastFirstic +// CHECK: call noundef signext i8 @_Z30__spirv_GroupNonUniformShuffleicj +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformShuffleXoricj +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformShuffleUpicj +// CHECK: call noundef signext i8 @_Z34__spirv_GroupNonUniformShuffleDownicj +// CHECK: call noundef zeroext i1 @_Z31__spirv_GroupNonUniformAllEqualia +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformBroadcastiaj +// CHECK: call noundef signext i8 @_Z37__spirv_GroupNonUniformBroadcastFirstia +// CHECK: call noundef signext i8 @_Z30__spirv_GroupNonUniformShuffleiaj +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformShuffleXoriaj +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformShuffleUpiaj +// CHECK: call noundef signext i8 @_Z34__spirv_GroupNonUniformShuffleDowniaj // CHECK: call noundef zeroext i1 @_Z31__spirv_GroupNonUniformAllEqualih // CHECK: call noundef zeroext i8 @_Z32__spirv_GroupNonUniformBroadcastihj // CHECK: call noundef zeroext i8 @_Z37__spirv_GroupNonUniformBroadcastFirstih @@ -295,16 +305,26 @@ void test() { // CHECK: call noundef double @_Z33__spirv_GroupNonUniformShuffleXoridj // CHECK: call noundef double @_Z32__spirv_GroupNonUniformShuffleUpidj // CHECK: call noundef double @_Z34__spirv_GroupNonUniformShuffleDownidj -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformIAddiii -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformIAddiiij -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformIMuliii -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformIMuliiij -// CHECK: call noundef i32 @_Z33__spirv_GroupNonUniformBitwiseAndiii -// CHECK: call noundef i32 @_Z33__spirv_GroupNonUniformBitwiseAndiiij -// CHECK: call noundef i32 @_Z32__spirv_GroupNonUniformBitwiseOriii -// CHECK: call noundef i32 @_Z32__spirv_GroupNonUniformBitwiseOriiij -// CHECK: call noundef i32 @_Z33__spirv_GroupNonUniformBitwiseXoriii -// CHECK: call noundef i32 @_Z33__spirv_GroupNonUniformBitwiseXoriiij +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIAddiic +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIAddiicj +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIMuliic +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIMuliicj +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseAndiic +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseAndiicj +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformBitwiseOriic +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformBitwiseOriicj +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseXoriic +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseXoriicj +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIAddiia +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIAddiiaj +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIMuliia +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformIMuliiaj +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseAndiia +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseAndiiaj +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformBitwiseOriia +// CHECK: call noundef signext i8 @_Z32__spirv_GroupNonUniformBitwiseOriiaj +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseXoriia +// CHECK: call noundef signext i8 @_Z33__spirv_GroupNonUniformBitwiseXoriiaj // CHECK: call noundef zeroext i8 @_Z27__spirv_GroupNonUniformIAddiih // CHECK: call noundef zeroext i8 @_Z27__spirv_GroupNonUniformIAddiihj // CHECK: call noundef zeroext i8 @_Z27__spirv_GroupNonUniformIMuliih @@ -355,10 +375,14 @@ void test() { // CHECK: call noundef i32 @_Z32__spirv_GroupNonUniformBitwiseOriijj // CHECK: call noundef i32 @_Z33__spirv_GroupNonUniformBitwiseXoriij // CHECK: call noundef i32 @_Z33__spirv_GroupNonUniformBitwiseXoriijj -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformSMiniii -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformSMiniiij -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformSMaxiii -// CHECK: call noundef i32 @_Z27__spirv_GroupNonUniformSMaxiiij +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMiniic +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMiniicj +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMaxiic +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMaxiicj +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMiniia +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMiniiaj +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMaxiia +// CHECK: call noundef signext i8 @_Z27__spirv_GroupNonUniformSMaxiiaj // CHECK: call noundef signext i16 @_Z27__spirv_GroupNonUniformSMiniis // CHECK: call noundef signext i16 @_Z27__spirv_GroupNonUniformSMiniisj // CHECK: call noundef signext i16 @_Z27__spirv_GroupNonUniformSMaxiis diff --git a/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group.cl b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group.cl index 52872bf7ac3b9..689d059efa5de 100644 --- a/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group.cl +++ b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-group.cl @@ -27,7 +27,7 @@ bool group_any(bool predicate) { char group_broad_cast(char a) { // CHECK-LABEL: @group_broad_cast( - // CHECK: call spir_func i32 @_Z22__spirv_GroupBroadcastiij( + // CHECK: call spir_func signext i8 @_Z22__spirv_GroupBroadcasticj( return __spirv_GroupBroadcast(2, a, 0u); } @@ -87,7 +87,7 @@ unsigned long group_umax(unsigned long a) { char group_smin(char a) { // CHECK-LABEL: @group_smin( - // CHECK: call spir_func i32 @_Z17__spirv_GroupSMiniii( + // CHECK: call spir_func signext i8 @_Z17__spirv_GroupSMiniic( return __spirv_GroupSMin(2, 0, a); } diff --git a/libclc/clc/include/clc/clc_as_type.h b/libclc/clc/include/clc/clc_as_type.h index 2040f84285f97..62fee71da8b02 100644 --- a/libclc/clc/include/clc/clc_as_type.h +++ b/libclc/clc/include/clc/clc_as_type.h @@ -10,7 +10,6 @@ #define __CLC_CLC_AS_TYPE_H__ #define __clc_as_char(x) __builtin_astype(x, char) -#define __clc_as_schar(x) __builtin_astype(x, schar) #define __clc_as_uchar(x) __builtin_astype(x, uchar) #define __clc_as_short(x) __builtin_astype(x, short) #define __clc_as_ushort(x) __builtin_astype(x, ushort) @@ -21,7 +20,6 @@ #define __clc_as_float(x) __builtin_astype(x, float) #define __clc_as_char2(x) __builtin_astype(x, char2) -#define __clc_as_schar2(x) __builtin_astype(x, schar2) #define __clc_as_uchar2(x) __builtin_astype(x, uchar2) #define __clc_as_short2(x) __builtin_astype(x, short2) #define __clc_as_ushort2(x) __builtin_astype(x, ushort2) @@ -32,7 +30,6 @@ #define __clc_as_float2(x) __builtin_astype(x, float2) #define __clc_as_char3(x) __builtin_astype(x, char3) -#define __clc_as_schar3(x) __builtin_astype(x, schar3) #define __clc_as_uchar3(x) __builtin_astype(x, uchar3) #define __clc_as_short3(x) __builtin_astype(x, short3) #define __clc_as_ushort3(x) __builtin_astype(x, ushort3) @@ -43,7 +40,6 @@ #define __clc_as_float3(x) __builtin_astype(x, float3) #define __clc_as_char4(x) __builtin_astype(x, char4) -#define __clc_as_schar4(x) __builtin_astype(x, schar4) #define __clc_as_uchar4(x) __builtin_astype(x, uchar4) #define __clc_as_short4(x) __builtin_astype(x, short4) #define __clc_as_ushort4(x) __builtin_astype(x, ushort4) @@ -54,9 +50,7 @@ #define __clc_as_float4(x) __builtin_astype(x, float4) #define __clc_as_char8(x) __builtin_astype(x, char8) -#define __clc_as_schar8(x) __builtin_astype(x, schar8) #define __clc_as_uchar8(x) __builtin_astype(x, uchar8) -#define __clc_as_schar8(x) __builtin_astype(x, schar8) #define __clc_as_short8(x) __builtin_astype(x, short8) #define __clc_as_ushort8(x) __builtin_astype(x, ushort8) #define __clc_as_int8(x) __builtin_astype(x, int8) @@ -66,7 +60,6 @@ #define __clc_as_float8(x) __builtin_astype(x, float8) #define __clc_as_char16(x) __builtin_astype(x, char16) -#define __clc_as_schar16(x) __builtin_astype(x, schar16) #define __clc_as_uchar16(x) __builtin_astype(x, uchar16) #define __clc_as_short16(x) __builtin_astype(x, short16) #define __clc_as_ushort16(x) __builtin_astype(x, ushort16) diff --git a/libclc/clc/include/clc/clc_convert.h b/libclc/clc/include/clc/clc_convert.h index 3ca53e944280a..ab41e5abb9d6c 100644 --- a/libclc/clc/include/clc/clc_convert.h +++ b/libclc/clc/include/clc/clc_convert.h @@ -24,7 +24,6 @@ #define _CLC_VECTOR_CONVERT_FROM1(FROM_TYPE, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, char, SUFFIX) \ - _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, schar, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uchar, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, int, SUFFIX) \ _CLC_VECTOR_CONVERT_DECL(FROM_TYPE, uint, SUFFIX) \ @@ -58,7 +57,6 @@ #define _CLC_VECTOR_CONVERT_TO1(SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(char, SUFFIX) \ - _CLC_VECTOR_CONVERT_FROM(schar, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(uchar, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(int, SUFFIX) \ _CLC_VECTOR_CONVERT_FROM(uint, SUFFIX) \ diff --git a/libclc/clc/include/clc/clctypes.h b/libclc/clc/include/clc/clctypes.h index 98294e27a5993..2187c2535963b 100644 --- a/libclc/clc/include/clc/clctypes.h +++ b/libclc/clc/include/clc/clctypes.h @@ -11,7 +11,6 @@ /* 6.1.1 Built-in Scalar Data Types */ -typedef signed char schar; typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; @@ -40,12 +39,6 @@ typedef __attribute__((ext_vector_type(4))) char char4; typedef __attribute__((ext_vector_type(8))) char char8; typedef __attribute__((ext_vector_type(16))) char char16; -typedef __attribute__((ext_vector_type(2))) schar schar2; -typedef __attribute__((ext_vector_type(3))) schar schar3; -typedef __attribute__((ext_vector_type(4))) schar schar4; -typedef __attribute__((ext_vector_type(8))) schar schar8; -typedef __attribute__((ext_vector_type(16))) schar schar16; - typedef __attribute__((ext_vector_type(2))) uchar uchar2; typedef __attribute__((ext_vector_type(3))) uchar uchar3; typedef __attribute__((ext_vector_type(4))) uchar uchar4; diff --git a/libclc/clc/include/clc/integer/gentype.inc b/libclc/clc/include/clc/integer/gentype.inc index f6a5f83967ccc..acca8632de32a 100644 --- a/libclc/clc/include/clc/integer/gentype.inc +++ b/libclc/clc/include/clc/integer/gentype.inc @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + #include #include #include @@ -20,13 +21,11 @@ // These 2 defines only change when switching between data sizes or base types // to keep this file manageable. - #define __CLC_GENSIZE 8 #define __CLC_SCALAR_GENTYPE char #define __CLC_GEN_S #define __CLC_GENTYPE char -#define __CLC_SPIRV_INTERFACE_GENTYPE schar #define __CLC_U_GENTYPE uchar #define __CLC_S_GENTYPE char #define __CLC_SCALAR @@ -39,12 +38,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE char2 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar2 #define __CLC_U_GENTYPE uchar2 #define __CLC_S_GENTYPE char2 #define __CLC_VECSIZE 2 @@ -53,10 +50,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE char3 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar3 #define __CLC_U_GENTYPE uchar3 #define __CLC_S_GENTYPE char3 #define __CLC_VECSIZE 3 @@ -65,10 +60,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE char4 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar4 #define __CLC_U_GENTYPE uchar4 #define __CLC_S_GENTYPE char4 #define __CLC_VECSIZE 4 @@ -77,10 +70,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE char8 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar8 #define __CLC_U_GENTYPE uchar8 #define __CLC_S_GENTYPE char8 #define __CLC_VECSIZE 8 @@ -89,10 +80,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE char16 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar16 #define __CLC_U_GENTYPE uchar16 #define __CLC_S_GENTYPE char16 #define __CLC_VECSIZE 16 @@ -102,94 +91,6 @@ #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE #undef __CLC_VECSIZE_OR_1 -#undef __CLC_SPIRV_INTERFACE_GENTYPE - -#ifndef __CLC_NO_SCHAR -#undef __CLC_GEN_S -#undef __CLC_SCALAR_GENTYPE -#define __CLC_SCALAR_GENTYPE schar -#define __CLC_GEN_S - -#define __CLC_GENTYPE schar -#define __CLC_SPIRV_INTERFACE_GENTYPE schar -#define __CLC_U_GENTYPE uchar -#define __CLC_S_GENTYPE schar -#define __CLC_SCALAR -#define __CLC_VECSIZE -#define __CLC_VECSIZE_OR_1 1 -#include __CLC_BODY -#undef __CLC_VECSIZE_OR_1 -#undef __CLC_VECSIZE -#undef __CLC_SCALAR -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE - -#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE - -#define __CLC_GENTYPE schar2 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar2 -#define __CLC_U_GENTYPE uchar2 -#define __CLC_S_GENTYPE schar2 -#define __CLC_VECSIZE 2 -#include __CLC_BODY -#undef __CLC_VECSIZE -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE - -#define __CLC_GENTYPE schar3 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar3 -#define __CLC_U_GENTYPE uchar3 -#define __CLC_S_GENTYPE schar3 -#define __CLC_VECSIZE 3 -#include __CLC_BODY -#undef __CLC_VECSIZE -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE - -#define __CLC_GENTYPE schar4 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar4 -#define __CLC_U_GENTYPE uchar4 -#define __CLC_S_GENTYPE schar4 -#define __CLC_VECSIZE 4 -#include __CLC_BODY -#undef __CLC_VECSIZE -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE - -#define __CLC_GENTYPE schar8 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar8 -#define __CLC_U_GENTYPE uchar8 -#define __CLC_S_GENTYPE schar8 -#define __CLC_VECSIZE 8 -#include __CLC_BODY -#undef __CLC_VECSIZE -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE - -#define __CLC_GENTYPE schar16 -#define __CLC_SPIRV_INTERFACE_GENTYPE schar16 -#define __CLC_U_GENTYPE uchar16 -#define __CLC_S_GENTYPE schar16 -#define __CLC_VECSIZE 16 -#include __CLC_BODY -#undef __CLC_VECSIZE -#undef __CLC_GENTYPE -#undef __CLC_U_GENTYPE -#undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE -#undef __CLC_VECSIZE_OR_1 - -#endif // __CLC_NO_SCHAR #undef __CLC_SCALAR_GENTYPE #define __CLC_SCALAR_GENTYPE uchar @@ -197,9 +98,8 @@ #define __CLC_GEN_U #define __CLC_GENTYPE uchar -#define __CLC_SPIRV_INTERFACE_GENTYPE uchar #define __CLC_U_GENTYPE uchar -#define __CLC_S_GENTYPE schar +#define __CLC_S_GENTYPE char #define __CLC_SCALAR #define __CLC_VECSIZE #define __CLC_VECSIZE_OR_1 1 @@ -210,69 +110,58 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE uchar2 -#define __CLC_SPIRV_INTERFACE_GENTYPE uchar2 #define __CLC_U_GENTYPE uchar2 -#define __CLC_S_GENTYPE schar2 +#define __CLC_S_GENTYPE char2 #define __CLC_VECSIZE 2 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uchar3 -#define __CLC_SPIRV_INTERFACE_GENTYPE uchar3 #define __CLC_U_GENTYPE uchar3 -#define __CLC_S_GENTYPE schar3 +#define __CLC_S_GENTYPE char3 #define __CLC_VECSIZE 3 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uchar4 -#define __CLC_SPIRV_INTERFACE_GENTYPE uchar4 #define __CLC_U_GENTYPE uchar4 -#define __CLC_S_GENTYPE schar4 +#define __CLC_S_GENTYPE char4 #define __CLC_VECSIZE 4 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uchar8 -#define __CLC_SPIRV_INTERFACE_GENTYPE uchar8 #define __CLC_U_GENTYPE uchar8 -#define __CLC_S_GENTYPE schar8 +#define __CLC_S_GENTYPE char8 #define __CLC_VECSIZE 8 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uchar16 -#define __CLC_SPIRV_INTERFACE_GENTYPE uchar16 #define __CLC_U_GENTYPE uchar16 -#define __CLC_S_GENTYPE schar16 +#define __CLC_S_GENTYPE char16 #define __CLC_VECSIZE 16 #include __CLC_BODY #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #undef __CLC_VECSIZE_OR_1 #undef __CLC_GENSIZE @@ -283,7 +172,6 @@ #define __CLC_GEN_S #define __CLC_GENTYPE short -#define __CLC_SPIRV_INTERFACE_GENTYPE short #define __CLC_U_GENTYPE ushort #define __CLC_S_GENTYPE short #define __CLC_SCALAR @@ -296,12 +184,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE short2 -#define __CLC_SPIRV_INTERFACE_GENTYPE short2 #define __CLC_U_GENTYPE ushort2 #define __CLC_S_GENTYPE short2 #define __CLC_VECSIZE 2 @@ -310,10 +196,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE short3 -#define __CLC_SPIRV_INTERFACE_GENTYPE short3 #define __CLC_U_GENTYPE ushort3 #define __CLC_S_GENTYPE short3 #define __CLC_VECSIZE 3 @@ -322,10 +206,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE short4 -#define __CLC_SPIRV_INTERFACE_GENTYPE short4 #define __CLC_U_GENTYPE ushort4 #define __CLC_S_GENTYPE short4 #define __CLC_VECSIZE 4 @@ -334,10 +216,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE short8 -#define __CLC_SPIRV_INTERFACE_GENTYPE short8 #define __CLC_U_GENTYPE ushort8 #define __CLC_S_GENTYPE short8 #define __CLC_VECSIZE 8 @@ -346,10 +226,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE short16 -#define __CLC_SPIRV_INTERFACE_GENTYPE short16 #define __CLC_U_GENTYPE ushort16 #define __CLC_S_GENTYPE short16 #define __CLC_VECSIZE 16 @@ -358,7 +236,6 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #undef __CLC_VECSIZE_OR_1 #undef __CLC_SCALAR_GENTYPE @@ -367,7 +244,6 @@ #define __CLC_GEN_U #define __CLC_GENTYPE ushort -#define __CLC_SPIRV_INTERFACE_GENTYPE ushort #define __CLC_U_GENTYPE ushort #define __CLC_S_GENTYPE short #define __CLC_SCALAR @@ -380,12 +256,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE ushort2 -#define __CLC_SPIRV_INTERFACE_GENTYPE ushort2 #define __CLC_U_GENTYPE ushort2 #define __CLC_S_GENTYPE short2 #define __CLC_VECSIZE 2 @@ -394,10 +268,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ushort3 -#define __CLC_SPIRV_INTERFACE_GENTYPE ushort3 #define __CLC_U_GENTYPE ushort3 #define __CLC_S_GENTYPE short3 #define __CLC_VECSIZE 3 @@ -406,10 +278,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ushort4 -#define __CLC_SPIRV_INTERFACE_GENTYPE ushort4 #define __CLC_U_GENTYPE ushort4 #define __CLC_S_GENTYPE short4 #define __CLC_VECSIZE 4 @@ -418,10 +288,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ushort8 -#define __CLC_SPIRV_INTERFACE_GENTYPE ushort8 #define __CLC_U_GENTYPE ushort8 #define __CLC_S_GENTYPE short8 #define __CLC_VECSIZE 8 @@ -430,10 +298,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ushort16 -#define __CLC_SPIRV_INTERFACE_GENTYPE ushort16 #define __CLC_U_GENTYPE ushort16 #define __CLC_S_GENTYPE short16 #define __CLC_VECSIZE 16 @@ -442,7 +308,6 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #undef __CLC_VECSIZE_OR_1 #undef __CLC_GENSIZE @@ -453,7 +318,6 @@ #define __CLC_GEN_S #define __CLC_GENTYPE int -#define __CLC_SPIRV_INTERFACE_GENTYPE int #define __CLC_U_GENTYPE uint #define __CLC_S_GENTYPE int #define __CLC_SCALAR @@ -466,12 +330,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE int2 -#define __CLC_SPIRV_INTERFACE_GENTYPE int2 #define __CLC_U_GENTYPE uint2 #define __CLC_S_GENTYPE int2 #define __CLC_VECSIZE 2 @@ -480,10 +342,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE int3 -#define __CLC_SPIRV_INTERFACE_GENTYPE int3 #define __CLC_U_GENTYPE uint3 #define __CLC_S_GENTYPE int3 #define __CLC_VECSIZE 3 @@ -492,10 +352,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE int4 -#define __CLC_SPIRV_INTERFACE_GENTYPE int4 #define __CLC_U_GENTYPE uint4 #define __CLC_S_GENTYPE int4 #define __CLC_VECSIZE 4 @@ -504,10 +362,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE int8 -#define __CLC_SPIRV_INTERFACE_GENTYPE int8 #define __CLC_U_GENTYPE uint8 #define __CLC_S_GENTYPE int8 #define __CLC_VECSIZE 8 @@ -516,10 +372,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE int16 -#define __CLC_SPIRV_INTERFACE_GENTYPE int16 #define __CLC_U_GENTYPE uint16 #define __CLC_S_GENTYPE int16 #define __CLC_VECSIZE 16 @@ -528,7 +382,6 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #undef __CLC_VECSIZE_OR_1 #undef __CLC_SCALAR_GENTYPE @@ -537,7 +390,6 @@ #define __CLC_GEN_U #define __CLC_GENTYPE uint -#define __CLC_SPIRV_INTERFACE_GENTYPE uint #define __CLC_U_GENTYPE uint #define __CLC_S_GENTYPE int #define __CLC_SCALAR @@ -550,12 +402,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE uint2 -#define __CLC_SPIRV_INTERFACE_GENTYPE uint2 #define __CLC_U_GENTYPE uint2 #define __CLC_S_GENTYPE int2 #define __CLC_VECSIZE 2 @@ -564,10 +414,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uint3 -#define __CLC_SPIRV_INTERFACE_GENTYPE uint3 #define __CLC_U_GENTYPE uint3 #define __CLC_S_GENTYPE int3 #define __CLC_VECSIZE 3 @@ -576,10 +424,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uint4 -#define __CLC_SPIRV_INTERFACE_GENTYPE uint4 #define __CLC_U_GENTYPE uint4 #define __CLC_S_GENTYPE int4 #define __CLC_VECSIZE 4 @@ -588,10 +434,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uint8 -#define __CLC_SPIRV_INTERFACE_GENTYPE uint8 #define __CLC_U_GENTYPE uint8 #define __CLC_S_GENTYPE int8 #define __CLC_VECSIZE 8 @@ -600,10 +444,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE uint16 -#define __CLC_SPIRV_INTERFACE_GENTYPE uint16 #define __CLC_U_GENTYPE uint16 #define __CLC_S_GENTYPE int16 #define __CLC_VECSIZE 16 @@ -612,7 +454,6 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #undef __CLC_VECSIZE_OR_1 #undef __CLC_GENSIZE @@ -623,7 +464,6 @@ #define __CLC_GEN_S #define __CLC_GENTYPE long -#define __CLC_SPIRV_INTERFACE_GENTYPE long #define __CLC_U_GENTYPE ulong #define __CLC_S_GENTYPE long #define __CLC_SCALAR @@ -636,12 +476,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE long2 -#define __CLC_SPIRV_INTERFACE_GENTYPE long2 #define __CLC_U_GENTYPE ulong2 #define __CLC_S_GENTYPE long2 #define __CLC_VECSIZE 2 @@ -650,10 +488,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE long3 -#define __CLC_SPIRV_INTERFACE_GENTYPE long3 #define __CLC_U_GENTYPE ulong3 #define __CLC_S_GENTYPE long3 #define __CLC_VECSIZE 3 @@ -662,10 +498,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE long4 -#define __CLC_SPIRV_INTERFACE_GENTYPE long4 #define __CLC_U_GENTYPE ulong4 #define __CLC_S_GENTYPE long4 #define __CLC_VECSIZE 4 @@ -674,10 +508,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE long8 -#define __CLC_SPIRV_INTERFACE_GENTYPE long8 #define __CLC_U_GENTYPE ulong8 #define __CLC_S_GENTYPE long8 #define __CLC_VECSIZE 8 @@ -686,10 +518,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE long16 -#define __CLC_SPIRV_INTERFACE_GENTYPE long16 #define __CLC_U_GENTYPE ulong16 #define __CLC_S_GENTYPE long16 #define __CLC_VECSIZE 16 @@ -698,7 +528,6 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #undef __CLC_VECSIZE_OR_1 #undef __CLC_SCALAR_GENTYPE @@ -707,7 +536,6 @@ #define __CLC_GEN_U #define __CLC_GENTYPE ulong -#define __CLC_SPIRV_INTERFACE_GENTYPE ulong #define __CLC_U_GENTYPE ulong #define __CLC_S_GENTYPE long #define __CLC_SCALAR @@ -721,12 +549,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_VECSIZE_OR_1 __CLC_VECSIZE #define __CLC_GENTYPE ulong2 -#define __CLC_SPIRV_INTERFACE_GENTYPE ulong2 #define __CLC_U_GENTYPE ulong2 #define __CLC_S_GENTYPE long2 #define __CLC_VECSIZE 2 @@ -735,10 +561,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ulong3 -#define __CLC_SPIRV_INTERFACE_GENTYPE ulong3 #define __CLC_U_GENTYPE ulong3 #define __CLC_S_GENTYPE long3 #define __CLC_VECSIZE 3 @@ -747,10 +571,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ulong4 -#define __CLC_SPIRV_INTERFACE_GENTYPE ulong4 #define __CLC_U_GENTYPE ulong4 #define __CLC_S_GENTYPE long4 #define __CLC_VECSIZE 4 @@ -759,10 +581,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ulong8 -#define __CLC_SPIRV_INTERFACE_GENTYPE ulong8 #define __CLC_U_GENTYPE ulong8 #define __CLC_S_GENTYPE long8 #define __CLC_VECSIZE 8 @@ -771,10 +591,8 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #define __CLC_GENTYPE ulong16 -#define __CLC_SPIRV_INTERFACE_GENTYPE ulong16 #define __CLC_U_GENTYPE ulong16 #define __CLC_S_GENTYPE long16 #define __CLC_VECSIZE 16 @@ -783,12 +601,10 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE -#undef __CLC_SPIRV_INTERFACE_GENTYPE #undef __CLC_VECSIZE_OR_1 #undef __CLC_GEN_U -#undef __CLC_CHAR #undef __CLC_GENSIZE #undef __CLC_SCALAR_GENTYPE #undef __CLC_BODY diff --git a/libclc/clc/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc index 6cc4354f2358d..a8d8317a9cacb 100644 --- a/libclc/clc/include/clc/math/gentype.inc +++ b/libclc/clc/include/clc/math/gentype.inc @@ -20,7 +20,6 @@ #define __CLC_DOUBLEN __CLC_XCONCAT(double, __CLC_VECSIZE) #define __CLC_CHARN __CLC_XCONCAT(char, __CLC_VECSIZE) -#define __CLC_SCHARN __CLC_XCONCAT(schar, __CLC_VECSIZE) #define __CLC_SHORTN __CLC_XCONCAT(short, __CLC_VECSIZE) #define __CLC_INTN __CLC_XCONCAT(int, __CLC_VECSIZE) #define __CLC_LONGN __CLC_XCONCAT(long, __CLC_VECSIZE) @@ -35,7 +34,6 @@ #define __CLC_AS_DOUBLEN __CLC_XCONCAT(__clc_as_, __CLC_DOUBLEN) #define __CLC_AS_CHARN __CLC_XCONCAT(__clc_as_, __CLC_CHARN) -#define __CLC_AS_SCHARN __CLC_XCONCAT(__clc_as_, __CLC_SCHARN) #define __CLC_AS_SHORTN __CLC_XCONCAT(__clc_as_, __CLC_SHORTN) #define __CLC_AS_INTN __CLC_XCONCAT(__clc_as_, __CLC_INTN) #define __CLC_AS_LONGN __CLC_XCONCAT(__clc_as_, __CLC_LONGN) @@ -50,7 +48,6 @@ #define __CLC_CONVERT_DOUBLEN __CLC_XCONCAT(__clc_convert_double, __CLC_VECSIZE) #define __CLC_CONVERT_CHARN __CLC_XCONCAT(__clc_convert_, __CLC_CHARN) -#define __CLC_CONVERT_SCHARN __CLC_XCONCAT(__clc_convert_, __CLC_SCHARN) #define __CLC_CONVERT_SHORTN __CLC_XCONCAT(__clc_convert_, __CLC_SHORTN) #define __CLC_CONVERT_INTN __CLC_XCONCAT(__clc_convert_, __CLC_INTN) #define __CLC_CONVERT_LONGN __CLC_XCONCAT(__clc_convert_, __CLC_LONGN) @@ -344,7 +341,6 @@ #undef __CLC_LONGN #undef __CLC_INTN #undef __CLC_SHORTN -#undef __CLC_SCHARN #undef __CLC_CHARN #undef __CLC_DOUBLEN diff --git a/libclc/clc/lib/generic/integer/clc_clz.cl b/libclc/clc/lib/generic/integer/clc_clz.cl index 251ee92f2221f..74f662375af6b 100644 --- a/libclc/clc/lib/generic/integer/clc_clz.cl +++ b/libclc/clc/lib/generic/integer/clc_clz.cl @@ -14,10 +14,6 @@ _CLC_OVERLOAD _CLC_DEF char __clc_clz(char x) { return __clc_clz((ushort)(uchar)x) - 8; } -_CLC_OVERLOAD _CLC_DEF schar __clc_clz(schar x) { - return __clc_clz((ushort)(uchar)x) - 8; -} - _CLC_OVERLOAD _CLC_DEF uchar __clc_clz(uchar x) { return __clc_clz((ushort)x) - 8; } @@ -47,7 +43,6 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_clz(ulong x) { } _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __clc_clz, char) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, schar, __clc_clz, schar) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __clc_clz, uchar) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __clc_clz, short) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __clc_clz, ushort) diff --git a/libclc/clc/lib/generic/integer/clc_ctz.cl b/libclc/clc/lib/generic/integer/clc_ctz.cl index 71c3d9ea6dd3b..50fda4a214b24 100644 --- a/libclc/clc/lib/generic/integer/clc_ctz.cl +++ b/libclc/clc/lib/generic/integer/clc_ctz.cl @@ -14,10 +14,6 @@ _CLC_OVERLOAD _CLC_DEF char __clc_ctz(char x) { return __clc_ctz(__clc_as_uchar(x)); } -_CLC_OVERLOAD _CLC_DEF schar __clc_ctz(schar x) { - return __clc_ctz(__clc_as_uchar(x)); -} - _CLC_OVERLOAD _CLC_DEF uchar __clc_ctz(uchar x) { return __builtin_ctzg(x, 8); } _CLC_OVERLOAD _CLC_DEF short __clc_ctz(short x) { @@ -43,7 +39,6 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_ctz(ulong x) { } _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __clc_ctz, char) -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, schar, __clc_ctz, schar) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, __clc_ctz, uchar) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __clc_ctz, short) _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, __clc_ctz, ushort) diff --git a/libclc/clc/lib/generic/integer/clc_mad_sat.cl b/libclc/clc/lib/generic/integer/clc_mad_sat.cl index cb9d085d7cebc..7c6aaffe6c5c7 100644 --- a/libclc/clc/lib/generic/integer/clc_mad_sat.cl +++ b/libclc/clc/lib/generic/integer/clc_mad_sat.cl @@ -36,7 +36,6 @@ __CLC_DEFINE_SIMPLE_MAD_SAT(TYPE##16, UP_TYPE##16, LIT_PREFIX) __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(char, int, CHAR) -__CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(schar, int, SCHAR) __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(uchar, uint, UCHAR) __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(short, int, SHRT) __CLC_DEFINE_SIMPLE_MAD_SAT_ALL_TYS(ushort, uint, USHRT) diff --git a/libclc/clc/lib/generic/integer/clc_mul_hi.cl b/libclc/clc/lib/generic/integer/clc_mul_hi.cl index 2cd95b299711f..117d8471fb8a3 100644 --- a/libclc/clc/lib/generic/integer/clc_mul_hi.cl +++ b/libclc/clc/lib/generic/integer/clc_mul_hi.cl @@ -114,7 +114,6 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) { #define __CLC_MUL_HI_TYPES() \ __CLC_MUL_HI_DEC_IMPL(short, char, 8) \ - __CLC_MUL_HI_DEC_IMPL(short, schar, 8) \ __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \ __CLC_MUL_HI_DEC_IMPL(int, short, 16) \ __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \ diff --git a/libclc/clc/lib/generic/misc/clc_shuffle.cl b/libclc/clc/lib/generic/misc/clc_shuffle.cl index 83ea39f35191d..f02e7aec9a0b8 100644 --- a/libclc/clc/lib/generic/misc/clc_shuffle.cl +++ b/libclc/clc/lib/generic/misc/clc_shuffle.cl @@ -138,7 +138,6 @@ _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) _CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(schar, uchar) _CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) _CLC_VECTOR_SHUFFLE_INSIZE(int, uint) _CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) diff --git a/libclc/clc/lib/generic/misc/clc_shuffle2.cl b/libclc/clc/lib/generic/misc/clc_shuffle2.cl index 74dfe04c77f31..db97f7c5cf960 100644 --- a/libclc/clc/lib/generic/misc/clc_shuffle2.cl +++ b/libclc/clc/lib/generic/misc/clc_shuffle2.cl @@ -139,7 +139,6 @@ _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) _CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(schar, uchar) _CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) _CLC_VECTOR_SHUFFLE_INSIZE(int, uint) _CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) diff --git a/libclc/libspirv/include/libspirv/async/gentype.inc b/libclc/libspirv/include/libspirv/async/gentype.inc index d759642bd4784..1b678f8a4ad00 100644 --- a/libclc/libspirv/include/libspirv/async/gentype.inc +++ b/libclc/libspirv/include/libspirv/async/gentype.inc @@ -46,46 +46,6 @@ #undef __CLC_GENTYPE_MANGLED #undef __CLC_GENTYPE -#ifndef __CLC_NO_SCHAR -#define __CLC_GENTYPE schar -#define __CLC_GENTYPE_MANGLED a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE schar2 -#define __CLC_GENTYPE_MANGLED Dv2_a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE - -#ifdef __CLC_GEN_VEC3 -#define __CLC_GENTYPE schar3 -#define __CLC_GENTYPE_MANGLED Dv3_a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE -#endif - -#define __CLC_GENTYPE schar4 -#define __CLC_GENTYPE_MANGLED Dv4_a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE schar8 -#define __CLC_GENTYPE_MANGLED Dv8_a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE schar16 -#define __CLC_GENTYPE_MANGLED Dv16_a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE -#endif - #define __CLC_GENTYPE uchar #define __CLC_GENTYPE_MANGLED h #include __CLC_BODY diff --git a/libclc/libspirv/include/libspirv/lp64_types.h b/libclc/libspirv/include/libspirv/lp64_types.h index 2cf6a948a5d4b..aa8e9ad85be63 100644 --- a/libclc/libspirv/include/libspirv/lp64_types.h +++ b/libclc/libspirv/include/libspirv/lp64_types.h @@ -22,12 +22,12 @@ typedef char __clc_vec4_char_t __attribute__((ext_vector_type(4))); typedef char __clc_vec8_char_t __attribute__((ext_vector_type(8))); typedef char __clc_vec16_char_t __attribute__((ext_vector_type(16))); -typedef signed char __clc_int8_t; -typedef signed char __clc_vec2_int8_t __attribute__((ext_vector_type(2))); -typedef signed char __clc_vec3_int8_t __attribute__((ext_vector_type(3))); -typedef signed char __clc_vec4_int8_t __attribute__((ext_vector_type(4))); -typedef signed char __clc_vec8_int8_t __attribute__((ext_vector_type(8))); -typedef signed char __clc_vec16_int8_t __attribute__((ext_vector_type(16))); +typedef char __clc_int8_t; +typedef char __clc_vec2_int8_t __attribute__((ext_vector_type(2))); +typedef char __clc_vec3_int8_t __attribute__((ext_vector_type(3))); +typedef char __clc_vec4_int8_t __attribute__((ext_vector_type(4))); +typedef char __clc_vec8_int8_t __attribute__((ext_vector_type(8))); +typedef char __clc_vec16_int8_t __attribute__((ext_vector_type(16))); typedef unsigned char __clc_uint8_t; typedef unsigned char __clc_vec2_uint8_t __attribute__((ext_vector_type(2))); diff --git a/libclc/libspirv/include/libspirv/ptx-nvidiacl/async/gentype.inc b/libclc/libspirv/include/libspirv/ptx-nvidiacl/async/gentype.inc index 2d65ac9216adf..3ca00675abfdd 100644 --- a/libclc/libspirv/include/libspirv/ptx-nvidiacl/async/gentype.inc +++ b/libclc/libspirv/include/libspirv/ptx-nvidiacl/async/gentype.inc @@ -28,29 +28,6 @@ #undef __CLC_GENTYPE #endif -#ifndef __CLC_NO_SCHAR -#define __CLC_GENTYPE schar -#define __CLC_GENTYPE_MANGLED a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE schar2 -#define __CLC_GENTYPE_MANGLED Dv2_a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE - -#ifdef __CLC_GEN_VEC3 -#define __CLC_GENTYPE schar3 -#define __CLC_GENTYPE_MANGLED Dv3_a -#include __CLC_BODY -#undef __CLC_GENTYPE_MANGLED -#undef __CLC_GENTYPE -#endif - -#endif - #define __CLC_GENTYPE uchar #define __CLC_GENTYPE_MANGLED h #include __CLC_BODY diff --git a/libclc/libspirv/lib/generic/gen_convert.py b/libclc/libspirv/lib/generic/gen_convert.py index 52daf3f1dbfee..2153f9eadc5bf 100755 --- a/libclc/libspirv/lib/generic/gen_convert.py +++ b/libclc/libspirv/lib/generic/gen_convert.py @@ -42,9 +42,6 @@ clc_core_fn_name, ) -types.remove("char") -int_types.remove("char") -signed_types.remove("char") rounding_modes = [""] + rounding_modes print( @@ -94,6 +91,15 @@ #pragma OPENCL EXTENSION cles_khr_int64 : enable #endif +// Typedef some signed char types that SPIR-V requires as the destination of +// certain conversion builtins. +typedef signed char schar; +typedef __attribute__((ext_vector_type(2))) signed char schar2; +typedef __attribute__((ext_vector_type(3))) signed char schar3; +typedef __attribute__((ext_vector_type(4))) signed char schar4; +typedef __attribute__((ext_vector_type(8))) signed char schar8; +typedef __attribute__((ext_vector_type(16))) signed char schar16; + """ ) @@ -113,9 +119,6 @@ def spirv_fn_name(src, dst, size="", mode="", sat="", force_sat_decoration=False is_dst_signed = dst in signed_types use_sat_insn = sat != "" and not force_sat_decoration - if dst == "schar": - dst = "char" - if is_src_unsigned and is_dst_signed and use_sat_insn: return "__spirv_SatConvertUToS_R{DST}{N}".format(DST=dst, N=size) elif is_src_signed and is_dst_unsigned and use_sat_insn: @@ -157,11 +160,22 @@ def is_signed_unsigned_conversion(src, dst): def generate_spirv_fn_impl(src, dst, size="", mode="", sat="", force_decoration=False): close_conditional = conditional_guard(src, dst) + # If the destination is an schar type, we will be converting using the + # equivalent char type which in OpenCL C is signed. For vector types, we + # cannot rely on implicit casts back to the schar type so insert an + # explicit cast. + if dst.startswith("schar") and size: + cast = "__builtin_convertvector(" + cast_end = f", {dst}{size})" + else: + cast = "" + cast_end = "" + print( """_CLC_DEF _CLC_OVERLOAD _CLC_CONSTFN {DST}{N} {FN}({SRC}{N} x) {{ - return {CORE_FN}(x); + return {CAST}{CORE_FN}(x){CAST_END}; }} """.format( FN=spirv_fn_name( @@ -172,6 +186,8 @@ def generate_spirv_fn_impl(src, dst, size="", mode="", sat="", force_decoration= mode=mode, force_sat_decoration=force_decoration, ), + CAST=cast, + CAST_END=cast_end, CORE_FN=clc_core_fn_name(dst, size=size, sat=sat, mode=mode), SRC=src, DST=dst, @@ -209,6 +225,10 @@ def generate_spirv_fn(src, dst, size="", mode="", sat=""): # __spirv_ConvertUToF / __spirv_ConvertSToF + mode for src in int_types: + # We're not interested in schar as source types; remangling will do that + # for us. + if src == "schar": + continue for dst in float_types: for size in vector_sizes: for mode in rounding_modes: @@ -223,6 +243,10 @@ def generate_spirv_fn(src, dst, size="", mode="", sat=""): # __spirv_UConvert + sat for src in int_types: + # We're not interested in schar as source types; remangling will do that + # for us. + if src == "schar": + continue for dst in unsigned_types: for size in vector_sizes: for sat in saturation: @@ -230,6 +254,10 @@ def generate_spirv_fn(src, dst, size="", mode="", sat=""): # __spirv_SConvert + sat for src in int_types: + # We're not interested in schar as source types; remangling will do that + # for us. + if src == "schar": + continue for dst in signed_types: for size in vector_sizes: for sat in saturation: diff --git a/libclc/libspirv/lib/generic/gen_convert_common.py b/libclc/libspirv/lib/generic/gen_convert_common.py index 136f505d59b4e..e36c63eb8bd65 100644 --- a/libclc/libspirv/lib/generic/gen_convert_common.py +++ b/libclc/libspirv/lib/generic/gen_convert_common.py @@ -42,8 +42,8 @@ bool_type = { "char": "char", - "schar": "schar", - "uchar": "schar", + "schar": "char", + "uchar": "char", "short": "short", "ushort": "short", "int": "int", @@ -84,7 +84,7 @@ limit_max = { "char": "CHAR_MAX", - "schar": "CHAR_MAX", + "schar": "SCHAR_MAX", "uchar": "UCHAR_MAX", "short": "SHRT_MAX", "ushort": "USHRT_MAX", @@ -97,7 +97,7 @@ limit_min = { "char": "CHAR_MIN", - "schar": "CHAR_MIN", + "schar": "SCHAR_MIN", "uchar": "0", "short": "SHRT_MIN", "ushort": "0", @@ -152,5 +152,5 @@ def clc_core_fn_name(dst, size="", mode="", sat=""): and saturation arguments. """ return "__clc_convert_{DST}{N}{SAT}{MODE}".format( - DST=dst, N=size, SAT=sat, MODE=mode + DST="char" if dst == "schar" else dst, N=size, SAT=sat, MODE=mode ) diff --git a/libclc/libspirv/lib/generic/geometric/normalize.cl b/libclc/libspirv/lib/generic/geometric/normalize.cl index d6f9d7f1c9cdf..bf706bd6d3398 100644 --- a/libclc/libspirv/lib/generic/geometric/normalize.cl +++ b/libclc/libspirv/lib/generic/geometric/normalize.cl @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include +#include #include #define HALF_MAX_SQRT 0x1.0p+8h @@ -15,8 +17,7 @@ MAX_SQRT, MIN_SQRT) \ _CLC_OVERLOAD _CLC_DEF FP_TYPE##VLEN __spirv_ocl_normalize( \ FP_TYPE##VLEN p) { \ - if (__spirv_All(__spirv_SConvert_Rchar##VLEN( \ - p == (FP_TYPE##VLEN)0.0##FLOAT_MARK))) \ + if (__clc_all(p == (FP_TYPE##VLEN)0.0##FLOAT_MARK)) \ return p; \ FP_TYPE l2 = __spirv_Dot(p, p); \ if (l2 < FLT_MIN) { \ @@ -30,7 +31,7 @@ __spirv_ocl_select( \ (FP_TYPE##VLEN)0.0##FLOAT_MARK, \ (FP_TYPE##VLEN)1.0##FLOAT_MARK, \ - __spirv_SConvert_R##INT_TYPE##VLEN(__spirv_IsInf(p))), \ + __clc_convert_##INT_TYPE##VLEN(__spirv_IsInf(p))), \ p); \ l2 = __spirv_Dot(p, p); \ } \ diff --git a/libclc/libspirv/lib/generic/integer/upsample.cl b/libclc/libspirv/lib/generic/integer/upsample.cl index c8987a7e581ea..3114c9ffdf702 100644 --- a/libclc/libspirv/lib/generic/integer/upsample.cl +++ b/libclc/libspirv/lib/generic/integer/upsample.cl @@ -37,7 +37,6 @@ #define __CLC_UPSAMPLE_TYPES() \ __CLC_UPSAMPLE_IMPL(s, short, char, uchar, char) \ - __CLC_UPSAMPLE_IMPL(s, short, schar, uchar, char) \ __CLC_UPSAMPLE_IMPL(u, ushort, uchar, uchar, uchar) \ __CLC_UPSAMPLE_IMPL(s, int, short, ushort, short) \ __CLC_UPSAMPLE_IMPL(u, uint, ushort, ushort, ushort) \ diff --git a/libclc/libspirv/lib/generic/math/fract.cl b/libclc/libspirv/lib/generic/math/fract.cl index ce768090bd73c..cc2da2c00189a 100644 --- a/libclc/libspirv/lib/generic/math/fract.cl +++ b/libclc/libspirv/lib/generic/math/fract.cl @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include diff --git a/libclc/libspirv/lib/generic/math/fract.inc b/libclc/libspirv/lib/generic/math/fract.inc index 24a559238368d..01e61c6b975f4 100644 --- a/libclc/libspirv/lib/generic/math/fract.inc +++ b/libclc/libspirv/lib/generic/math/fract.inc @@ -9,29 +9,20 @@ #if __CLC_FPSIZE == 64 #define MIN_CONSTANT 0x1.fffffffffffffp-1 #define ZERO (__CLC_GENTYPE)0.0 -#define __CLC_BOOLN __CLC_XCONCAT(long, __CLC_VECSIZE) #elif __CLC_FPSIZE == 32 #define MIN_CONSTANT 0x1.fffffep-1f #define ZERO (__CLC_GENTYPE)0.0f -#define __CLC_BOOLN __CLC_XCONCAT(int, __CLC_VECSIZE) #elif __CLC_FPSIZE == 16 #define MIN_CONSTANT 0x1.ffcp-1h #define ZERO (__CLC_GENTYPE)0.0h -#define __CLC_BOOLN __CLC_XCONCAT(short, __CLC_VECSIZE) -#endif - -#ifdef __CLC_SCALAR -#define __CLC_CONVERT -#else -#define __CLC_CONVERT __CLC_XCONCAT(__spirv_SConvert_R, __CLC_BOOLN) #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fract(__CLC_GENTYPE x, __private __CLC_GENTYPE *iptr) { *iptr = __spirv_ocl_floor(x); __CLC_GENTYPE r = __spirv_ocl_fmin(x - *iptr, MIN_CONSTANT); - r = __CLC_CONVERT(__spirv_IsInf(x)) ? ZERO : r; - r = __CLC_CONVERT(__spirv_IsNan(x)) ? x : r; + r = __CLC_CONVERT_BIT_INTN(__spirv_IsInf(x)) ? ZERO : r; + r = __CLC_CONVERT_BIT_INTN(__spirv_IsNan(x)) ? x : r; return r; } @@ -52,5 +43,3 @@ FRACT_DEF(generic); #undef MIN_CONSTANT #undef ZERO -#undef __CLC_CONVERT -#undef __CLC_BOOLN diff --git a/libclc/libspirv/lib/generic/math/maxmag.cl b/libclc/libspirv/lib/generic/math/maxmag.cl index b30d094e76fdc..5714c61ca7ab9 100644 --- a/libclc/libspirv/lib/generic/math/maxmag.cl +++ b/libclc/libspirv/lib/generic/math/maxmag.cl @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include diff --git a/libclc/libspirv/lib/generic/math/maxmag.inc b/libclc/libspirv/lib/generic/math/maxmag.inc index 9fa4702f0b6ad..44cf21cfda992 100644 --- a/libclc/libspirv/lib/generic/math/maxmag.inc +++ b/libclc/libspirv/lib/generic/math/maxmag.inc @@ -6,26 +6,15 @@ // //===----------------------------------------------------------------------===// -#if __CLC_FPSIZE == 64 -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rlong, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 32 -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rint, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 16 -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rshort, __CLC_VECSIZE) -#endif - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_maxmag(__CLC_GENTYPE x, __CLC_GENTYPE y) { - const __CLC_GENTYPE res = __spirv_ocl_select( - y, x, - __CLC_CONVERT_NATN((__CLC_SCHARN)__spirv_FOrdGreaterThan( - __spirv_ocl_fabs(x), __spirv_ocl_fabs(y)))); + const __CLC_GENTYPE res = + __spirv_ocl_select(y, x, + __CLC_CONVERT_BIT_INTN(__spirv_FOrdGreaterThan( + __spirv_ocl_fabs(x), __spirv_ocl_fabs(y)))); return __spirv_ocl_select( res, __spirv_ocl_fmax(x, y), - __CLC_CONVERT_NATN( - (__CLC_SCHARN)(__spirv_IsNan(x) | __spirv_IsNan(y) | - __spirv_FOrdEqual(__spirv_ocl_fabs(x), - __spirv_ocl_fabs(y))))); + __CLC_CONVERT_BIT_INTN( + (__spirv_IsNan(x) | __spirv_IsNan(y) | + __spirv_FOrdEqual(__spirv_ocl_fabs(x), __spirv_ocl_fabs(y))))); } - -#undef __CLC_CONVERT_NATN diff --git a/libclc/libspirv/lib/generic/math/minmag.cl b/libclc/libspirv/lib/generic/math/minmag.cl index 7a2e00fca9d2f..7fb773c6fdf9a 100644 --- a/libclc/libspirv/lib/generic/math/minmag.cl +++ b/libclc/libspirv/lib/generic/math/minmag.cl @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include diff --git a/libclc/libspirv/lib/generic/math/minmag.inc b/libclc/libspirv/lib/generic/math/minmag.inc index 5a7d93134f2a3..9be181f718bff 100644 --- a/libclc/libspirv/lib/generic/math/minmag.inc +++ b/libclc/libspirv/lib/generic/math/minmag.inc @@ -6,26 +6,15 @@ // //===----------------------------------------------------------------------===// -#if __CLC_FPSIZE == 64 -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rlong, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 32 -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rint, __CLC_VECSIZE) -#elif __CLC_FPSIZE == 16 -#define __CLC_CONVERT_NATN __CLC_XCONCAT(__spirv_SConvert_Rshort, __CLC_VECSIZE) -#endif - _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_minmag(__CLC_GENTYPE x, __CLC_GENTYPE y) { const __CLC_GENTYPE res = __spirv_ocl_select(y, x, - __CLC_CONVERT_NATN((__CLC_SCHARN)__spirv_FOrdLessThan( + __CLC_CONVERT_BIT_INTN(__spirv_FOrdLessThan( __spirv_ocl_fabs(x), __spirv_ocl_fabs(y)))); return __spirv_ocl_select( res, __spirv_ocl_fmin(x, y), - __CLC_CONVERT_NATN( - (__CLC_SCHARN)(__spirv_IsNan(x) | __spirv_IsNan(y) | - __spirv_FOrdEqual(__spirv_ocl_fabs(x), - __spirv_ocl_fabs(y))))); + __CLC_CONVERT_BIT_INTN( + (__spirv_IsNan(x) | __spirv_IsNan(y) | + __spirv_FOrdEqual(__spirv_ocl_fabs(x), __spirv_ocl_fabs(y))))); } - -#undef __CLC_CONVERT_NATN diff --git a/libclc/libspirv/lib/generic/relational/all.cl b/libclc/libspirv/lib/generic/relational/all.cl index 9301125e8a587..5f97bbe99b181 100644 --- a/libclc/libspirv/lib/generic/relational/all.cl +++ b/libclc/libspirv/lib/generic/relational/all.cl @@ -29,4 +29,4 @@ ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \ ALL_ID(TYPE##16) { return _CLC_ALL16(v); } -ALL_VECTORIZE(schar) +ALL_VECTORIZE(char) diff --git a/libclc/libspirv/lib/generic/relational/any.cl b/libclc/libspirv/lib/generic/relational/any.cl index 601abf0e6fb51..18801274fd706 100644 --- a/libclc/libspirv/lib/generic/relational/any.cl +++ b/libclc/libspirv/lib/generic/relational/any.cl @@ -29,4 +29,4 @@ ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \ ANY_ID(TYPE##16) { return _CLC_ANY16(v); } -ANY_VECTORIZE(schar) +ANY_VECTORIZE(char) diff --git a/libclc/libspirv/lib/generic/relational/genbinrelational.inc b/libclc/libspirv/lib/generic/relational/genbinrelational.inc index 14a41401839ac..364b781a89c04 100644 --- a/libclc/libspirv/lib/generic/relational/genbinrelational.inc +++ b/libclc/libspirv/lib/generic/relational/genbinrelational.inc @@ -8,7 +8,7 @@ _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(bool, _CLC_SPIRV_BUILTIN, _CLC_BUILTIN_IMPL, float, float) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, float, float) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(char, _CLC_SPIRV_BUILTIN, float, float) #ifdef cl_khr_fp64 @@ -16,7 +16,7 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, float, float) _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(bool, _CLC_SPIRV_BUILTIN, _CLC_BUILTIN_IMPL, double, double) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, double, double) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(char, _CLC_SPIRV_BUILTIN, double, double) #endif @@ -26,6 +26,6 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, double, double) _CLC_DEFINE_RELATIONAL_BINARY_SCALAR(bool, _CLC_SPIRV_BUILTIN, _CLC_BUILTIN_IMPL, half, half) -_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, half, half) +_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(char, _CLC_SPIRV_BUILTIN, half, half) #endif diff --git a/libclc/libspirv/lib/generic/relational/genunary.inc b/libclc/libspirv/lib/generic/relational/genunary.inc index 80137ca3bc629..7f373d0a27618 100644 --- a/libclc/libspirv/lib/generic/relational/genunary.inc +++ b/libclc/libspirv/lib/generic/relational/genunary.inc @@ -8,7 +8,7 @@ _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(bool, _CLC_SPIRV_BUILTIN, _CLC_BUILTIN_IMPL, float) -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, float) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, _CLC_SPIRV_BUILTIN, float) #ifdef cl_khr_fp64 @@ -16,7 +16,7 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, float) _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(bool, _CLC_SPIRV_BUILTIN, _CLC_BUILTIN_IMPL, double) -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, double) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, _CLC_SPIRV_BUILTIN, double) #endif #ifdef cl_khr_fp16 @@ -25,6 +25,6 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, double) _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(bool, _CLC_SPIRV_BUILTIN, _CLC_BUILTIN_IMPL, half) -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, _CLC_SPIRV_BUILTIN, half) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, _CLC_SPIRV_BUILTIN, half) #endif diff --git a/libclc/libspirv/lib/generic/shared/vload.cl b/libclc/libspirv/lib/generic/shared/vload.cl index d3c8163e9b2a8..4d7d61813abdc 100644 --- a/libclc/libspirv/lib/generic/shared/vload.cl +++ b/libclc/libspirv/lib/generic/shared/vload.cl @@ -76,7 +76,7 @@ #define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \ VLOAD_ADDR_SPACES_IMPL(__CLC_SCALAR_GENTYPE, __CLC_SCALAR_GENTYPE) -VLOAD_ADDR_SPACES_IMPL(char, schar) +VLOAD_ADDR_SPACES_IMPL(char, char) #define VLOAD_TYPES() \ VLOAD_ADDR_SPACES(uchar) \ diff --git a/libclc/libspirv/lib/generic/shared/vstore.cl b/libclc/libspirv/lib/generic/shared/vstore.cl index cf0e5fd0f706b..74bd35c3032d2 100644 --- a/libclc/libspirv/lib/generic/shared/vstore.cl +++ b/libclc/libspirv/lib/generic/shared/vstore.cl @@ -71,7 +71,7 @@ VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \ VSTORE_VECTORIZE_GENERIC(__CLC_SCALAR___CLC_GENTYPE, __generic) -VSTORE_ADDR_SPACES(schar) +VSTORE_ADDR_SPACES(char) VSTORE_ADDR_SPACES(uchar) VSTORE_ADDR_SPACES(short) VSTORE_ADDR_SPACES(ushort) diff --git a/libclc/libspirv/lib/native_cpu/integer/popcount.cl b/libclc/libspirv/lib/native_cpu/integer/popcount.cl index 9756ee7c18e42..4b29f945c2f7b 100644 --- a/libclc/libspirv/lib/native_cpu/integer/popcount.cl +++ b/libclc/libspirv/lib/native_cpu/integer/popcount.cl @@ -13,7 +13,6 @@ } DEF_POPCOUNT_HELPER(char, unsigned char) -DEF_POPCOUNT_HELPER(schar, unsigned char) DEF_POPCOUNT_HELPER(short, unsigned short) _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(int, __spirv_ocl_popcount, @@ -32,5 +31,3 @@ _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(char, __spirv_ocl_popcount, __popcount_helper, char) _CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(uchar, __spirv_ocl_popcount, __builtin_popcountg, uchar) -_CLC_DEFINE_UNARY_BUILTIN_SCALARIZE(schar, __spirv_ocl_popcount, - __popcount_helper, schar) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/async/async_work_group_strided_copy.cl b/libclc/libspirv/lib/ptx-nvidiacl/async/async_work_group_strided_copy.cl index 3c3a7dbc50312..49008ad57dc36 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/async/async_work_group_strided_copy.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/async/async_work_group_strided_copy.cl @@ -53,9 +53,6 @@ __CLC_GROUP_CP_ASYNC_DST_GLOBAL(ushort8); __CLC_GROUP_CP_ASYNC_DST_GLOBAL(char4); __CLC_GROUP_CP_ASYNC_DST_GLOBAL(char8); __CLC_GROUP_CP_ASYNC_DST_GLOBAL(char16); -__CLC_GROUP_CP_ASYNC_DST_GLOBAL(schar4); -__CLC_GROUP_CP_ASYNC_DST_GLOBAL(schar8); -__CLC_GROUP_CP_ASYNC_DST_GLOBAL(schar16); __CLC_GROUP_CP_ASYNC_DST_GLOBAL(uchar4); __CLC_GROUP_CP_ASYNC_DST_GLOBAL(uchar8); __CLC_GROUP_CP_ASYNC_DST_GLOBAL(uchar16); @@ -88,7 +85,6 @@ __CLC_GROUP_CP_ASYNC_4(short2); __CLC_GROUP_CP_ASYNC_4(ushort2); __CLC_GROUP_CP_ASYNC_4(half2); __CLC_GROUP_CP_ASYNC_4(char4); -__CLC_GROUP_CP_ASYNC_4(schar4); __CLC_GROUP_CP_ASYNC_4(uchar4); #undef __CLC_GROUP_CP_ASYNC_4 @@ -122,7 +118,6 @@ __CLC_GROUP_CP_ASYNC_8(int2); __CLC_GROUP_CP_ASYNC_8(uint2); __CLC_GROUP_CP_ASYNC_8(float2); __CLC_GROUP_CP_ASYNC_8(char8); -__CLC_GROUP_CP_ASYNC_8(schar8); __CLC_GROUP_CP_ASYNC_8(uchar8); #undef __CLC_GROUP_CP_ASYNC_8 @@ -157,6 +152,5 @@ __CLC_GROUP_CP_ASYNC_8(uchar8); __CLC_GROUP_CP_ASYNC_16(half8); __CLC_GROUP_CP_ASYNC_16(char16); __CLC_GROUP_CP_ASYNC_16(uchar16); - __CLC_GROUP_CP_ASYNC_16(schar16); #undef __CLC_GROUP_CP_ASYNC_16 diff --git a/libclc/libspirv/lib/ptx-nvidiacl/integer/mul_hi.cl b/libclc/libspirv/lib/ptx-nvidiacl/integer/mul_hi.cl index 56593bbcb0c31..7844544982e4a 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/integer/mul_hi.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/integer/mul_hi.cl @@ -33,15 +33,12 @@ _CLC_OVERLOAD _CLC_DEF ulong __spirv_ocl_u_mul_hi(ulong x, ulong y) { } __CLC_MUL_HI_IMPL(short, __spirv_ocl_s_mul_hi, char, 8) -__CLC_MUL_HI_IMPL(short, __spirv_ocl_s_mul_hi, schar, 8) __CLC_MUL_HI_IMPL(ushort, __spirv_ocl_u_mul_hi, uchar, 8) __CLC_MUL_HI_IMPL(int, __spirv_ocl_s_mul_hi, short, 16) __CLC_MUL_HI_IMPL(uint, __spirv_ocl_u_mul_hi, ushort, 16) _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __spirv_ocl_s_mul_hi, char, char) -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, schar, __spirv_ocl_s_mul_hi, - schar, schar) _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, __spirv_ocl_s_mul_hi, short, short) _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, __spirv_ocl_s_mul_hi, int, diff --git a/libclc/libspirv/lib/ptx-nvidiacl/relational/isfinite.cl b/libclc/libspirv/lib/ptx-nvidiacl/relational/isfinite.cl index 35eac192d2e62..05fbd70e6195c 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/relational/isfinite.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/relational/isfinite.cl @@ -19,7 +19,7 @@ _CLC_DEF _CLC_OVERLOAD bool __spirv_IsFinite(double x) { return __nv_isfinited(x); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsFinite, double) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsFinite, double) #endif @@ -27,7 +27,7 @@ _CLC_DEF _CLC_OVERLOAD bool __spirv_IsFinite(float x) { return __nv_isfinited(x); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsFinite, float) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsFinite, float) #ifdef cl_khr_fp16 @@ -37,6 +37,6 @@ _CLC_DEF _CLC_OVERLOAD bool __spirv_IsFinite(half x) { return __nv_isfinited(x); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsFinite, half) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsFinite, half) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/relational/isinf.cl b/libclc/libspirv/lib/ptx-nvidiacl/relational/isinf.cl index bec7872b834b2..a0d6dfd89aebe 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/relational/isinf.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/relational/isinf.cl @@ -13,7 +13,7 @@ _CLC_DEF _CLC_OVERLOAD bool __spirv_IsInf(float x) { return __nv_isinff(x); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsInf, float) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsInf, float) #ifdef cl_khr_fp64 @@ -21,7 +21,7 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsInf, float) _CLC_DEF _CLC_OVERLOAD bool __spirv_IsInf(double x) { return __nv_isinfd(x); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsInf, double) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsInf, double) #endif #ifdef cl_khr_fp16 @@ -33,5 +33,5 @@ _CLC_DEF _CLC_OVERLOAD bool __spirv_IsInf(half x) { return __spirv_IsInf(f); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsInf, half) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsInf, half) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/relational/isnan.cl b/libclc/libspirv/lib/ptx-nvidiacl/relational/isnan.cl index cba65a524f36a..1a391fbfdcf2e 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/relational/isnan.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/relational/isnan.cl @@ -13,7 +13,7 @@ _CLC_DEF _CLC_OVERLOAD bool __spirv_IsNan(float x) { return __nv_isnanf(x); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsNan, float) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsNan, float) #ifdef cl_khr_fp64 @@ -21,7 +21,7 @@ _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsNan, float) _CLC_DEF _CLC_OVERLOAD bool __spirv_IsNan(double x) { return __nv_isnand(x); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsNan, double) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsNan, double) #endif #ifdef cl_khr_fp16 @@ -33,5 +33,5 @@ _CLC_DEF _CLC_OVERLOAD bool __spirv_IsNan(half x) { return __spirv_IsNan(f); } -_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(schar, __spirv_IsNan, half) +_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(char, __spirv_IsNan, half) #endif diff --git a/libclc/opencl/include/clc/opencl/as_type.h b/libclc/opencl/include/clc/opencl/as_type.h index d661ff3fd89a9..b122614de1d74 100644 --- a/libclc/opencl/include/clc/opencl/as_type.h +++ b/libclc/opencl/include/clc/opencl/as_type.h @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// #define as_char(x) __builtin_astype(x, char) -#define as_schar(x) __builtin_astype(x, schar) #define as_uchar(x) __builtin_astype(x, uchar) #define as_short(x) __builtin_astype(x, short) #define as_ushort(x) __builtin_astype(x, ushort) @@ -20,7 +19,6 @@ #define as_float(x) __builtin_astype(x, float) #define as_char2(x) __builtin_astype(x, char2) -#define as_schar2(x) __builtin_astype(x, schar2) #define as_uchar2(x) __builtin_astype(x, uchar2) #define as_short2(x) __builtin_astype(x, short2) #define as_ushort2(x) __builtin_astype(x, ushort2) @@ -31,7 +29,6 @@ #define as_float2(x) __builtin_astype(x, float2) #define as_char3(x) __builtin_astype(x, char3) -#define as_schar3(x) __builtin_astype(x, schar3) #define as_uchar3(x) __builtin_astype(x, uchar3) #define as_short3(x) __builtin_astype(x, short3) #define as_ushort3(x) __builtin_astype(x, ushort3) @@ -42,7 +39,6 @@ #define as_float3(x) __builtin_astype(x, float3) #define as_char4(x) __builtin_astype(x, char4) -#define as_schar4(x) __builtin_astype(x, schar4) #define as_uchar4(x) __builtin_astype(x, uchar4) #define as_short4(x) __builtin_astype(x, short4) #define as_ushort4(x) __builtin_astype(x, ushort4) @@ -53,7 +49,6 @@ #define as_float4(x) __builtin_astype(x, float4) #define as_char8(x) __builtin_astype(x, char8) -#define as_schar8(x) __builtin_astype(x, schar8) #define as_uchar8(x) __builtin_astype(x, uchar8) #define as_short8(x) __builtin_astype(x, short8) #define as_ushort8(x) __builtin_astype(x, ushort8) @@ -64,7 +59,6 @@ #define as_float8(x) __builtin_astype(x, float8) #define as_char16(x) __builtin_astype(x, char16) -#define as_schar16(x) __builtin_astype(x, schar16) #define as_uchar16(x) __builtin_astype(x, uchar16) #define as_short16(x) __builtin_astype(x, short16) #define as_ushort16(x) __builtin_astype(x, ushort16) diff --git a/libclc/opencl/include/clc/opencl/clc.h b/libclc/opencl/include/clc/opencl/clc.h index 0513349863be1..5859a00c3158b 100644 --- a/libclc/opencl/include/clc/opencl/clc.h +++ b/libclc/opencl/include/clc/opencl/clc.h @@ -23,8 +23,6 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable #endif -#define __CLC_NO_SCHAR - /* Function Attributes */ #include diff --git a/libclc/utils/gen_convert.py b/libclc/utils/gen_convert.py index 0cd7fd97625c3..02893dbad6a37 100644 --- a/libclc/utils/gen_convert.py +++ b/libclc/utils/gen_convert.py @@ -68,14 +68,9 @@ saturation = ["", "_sat"] rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"] -if clc: - types.insert(1, "schar") - int_types.insert(1, "schar") - bool_type = { "char": "char", - "schar": "schar", - "uchar": "schar" if clc else "char", + "uchar": "char", "short": "short", "ushort": "short", "int": "int", @@ -89,7 +84,6 @@ unsigned_type = { "char": "uchar", - "schar": "uchar", "uchar": "uchar", "short": "ushort", "ushort": "ushort", @@ -101,7 +95,6 @@ sizeof_type = { "char": 1, - "schar": 1, "uchar": 1, "short": 2, "ushort": 2, @@ -116,7 +109,6 @@ limit_max = { "char": "CHAR_MAX", - "schar": "CHAR_MAX", "uchar": "UCHAR_MAX", "short": "SHRT_MAX", "ushort": "USHRT_MAX", @@ -129,7 +121,6 @@ limit_min = { "char": "CHAR_MIN", - "schar": "CHAR_MIN", "uchar": "0", "short": "SHRT_MIN", "ushort": "0", diff --git a/libclc/utils/libclc-remangler/LibclcRemangler.cpp b/libclc/utils/libclc-remangler/LibclcRemangler.cpp index 9dd590e5cac07..e0e54ea6cbfda 100644 --- a/libclc/utils/libclc-remangler/LibclcRemangler.cpp +++ b/libclc/utils/libclc-remangler/LibclcRemangler.cpp @@ -752,6 +752,8 @@ class TargetTypeReplacements { else if (CloneTypeReplacements[From] != To) RemangledCloneTypeReplacements[From] = To; } + // Replace char with signed char + RemangledCloneTypeReplacements["char"] = "signed char"; } public: diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp index d66f869885efc..8f4ff466c04f9 100644 --- a/sycl/include/sycl/detail/vector_convert.hpp +++ b/sycl/include/sycl/detail/vector_convert.hpp @@ -387,94 +387,94 @@ using enable_if_to_int_vector_t = RoundingModeCondition, RoundingMode>::type; // signed to signed, unsigned to unsigned conversions -#define __SYCL_SCALAR_INT_INT_CONVERT(Op, DestType) \ +#define __SYCL_SCALAR_INT_INT_CONVERT(Op, DestType, SPVType) \ template \ enable_if_to_int_scalar_t \ Op##Convert(From value) { \ - return __spirv_##Op##Convert_R##DestType(value); \ + return __spirv_##Op##Convert_R##SPVType(value); \ } -#define __SYCL_VECTOR_INT_INT_CONVERT(Op, N, DestType) \ +#define __SYCL_VECTOR_INT_INT_CONVERT(Op, N, DestType, SPVType) \ template \ enable_if_to_int_vector_t \ Op##Convert(From value) { \ - return __spirv_##Op##Convert_R##DestType##N(value); \ + return __spirv_##Op##Convert_R##SPVType##N(value); \ } -#define __SYCL_INT_INT_CONVERT(Op, DestType) \ - __SYCL_SCALAR_INT_INT_CONVERT(Op, DestType) \ - __SYCL_VECTOR_INT_INT_CONVERT(Op, 2, DestType) \ - __SYCL_VECTOR_INT_INT_CONVERT(Op, 3, DestType) \ - __SYCL_VECTOR_INT_INT_CONVERT(Op, 4, DestType) \ - __SYCL_VECTOR_INT_INT_CONVERT(Op, 8, DestType) \ - __SYCL_VECTOR_INT_INT_CONVERT(Op, 16, DestType) +#define __SYCL_INT_INT_CONVERT(Op, DestType, SPVType) \ + __SYCL_SCALAR_INT_INT_CONVERT(Op, DestType, SPVType) \ + __SYCL_VECTOR_INT_INT_CONVERT(Op, 2, DestType, SPVType) \ + __SYCL_VECTOR_INT_INT_CONVERT(Op, 3, DestType, SPVType) \ + __SYCL_VECTOR_INT_INT_CONVERT(Op, 4, DestType, SPVType) \ + __SYCL_VECTOR_INT_INT_CONVERT(Op, 8, DestType, SPVType) \ + __SYCL_VECTOR_INT_INT_CONVERT(Op, 16, DestType, SPVType) -__SYCL_INT_INT_CONVERT(S, char) -__SYCL_INT_INT_CONVERT(S, short) -__SYCL_INT_INT_CONVERT(S, int) -__SYCL_INT_INT_CONVERT(S, long) +__SYCL_INT_INT_CONVERT(S, char, schar) +__SYCL_INT_INT_CONVERT(S, short, short) +__SYCL_INT_INT_CONVERT(S, int, int) +__SYCL_INT_INT_CONVERT(S, long, long) -__SYCL_INT_INT_CONVERT(U, uchar) -__SYCL_INT_INT_CONVERT(U, ushort) -__SYCL_INT_INT_CONVERT(U, uint) -__SYCL_INT_INT_CONVERT(U, ulong) +__SYCL_INT_INT_CONVERT(U, uchar, uchar) +__SYCL_INT_INT_CONVERT(U, ushort, ushort) +__SYCL_INT_INT_CONVERT(U, uint, uint) +__SYCL_INT_INT_CONVERT(U, ulong, ulong) #undef __SYCL_SCALAR_INT_INT_CONVERT #undef __SYCL_VECTOR_INT_INT_CONVERT #undef __SYCL_INT_INT_CONVERT // float to signed, float to unsigned conversion -#define __SYCL_SCALAR_FLOAT_INT_CONVERT(Op, DestType, RoundingMode, \ +#define __SYCL_SCALAR_FLOAT_INT_CONVERT(Op, DestType, SPVType, RoundingMode, \ RoundingModeCondition) \ template \ enable_if_to_int_scalar_t \ Convert##Op(From Value) { \ - return __spirv_Convert##Op##_R##DestType##_##RoundingMode(Value); \ + return __spirv_Convert##Op##_R##SPVType##_##RoundingMode(Value); \ } -#define __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, N, DestType, RoundingMode, \ - RoundingModeCondition) \ +#define __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, N, DestType, SPVType, \ + RoundingMode, RoundingModeCondition) \ template \ enable_if_to_int_vector_t \ Convert##Op(From Value) { \ - return __spirv_Convert##Op##_R##DestType##N##_##RoundingMode(Value); \ + return __spirv_Convert##Op##_R##SPVType##N##_##RoundingMode(Value); \ } -#define __SYCL_FLOAT_INT_CONVERT(Op, DestType, RoundingMode, \ +#define __SYCL_FLOAT_INT_CONVERT(Op, DestType, SPVType, RoundingMode, \ RoundingModeCondition) \ - __SYCL_SCALAR_FLOAT_INT_CONVERT(Op, DestType, RoundingMode, \ + __SYCL_SCALAR_FLOAT_INT_CONVERT(Op, DestType, SPVType, RoundingMode, \ RoundingModeCondition) \ - __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 2, DestType, RoundingMode, \ + __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 2, DestType, SPVType, RoundingMode, \ RoundingModeCondition) \ - __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 3, DestType, RoundingMode, \ + __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 3, DestType, SPVType, RoundingMode, \ RoundingModeCondition) \ - __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 4, DestType, RoundingMode, \ + __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 4, DestType, SPVType, RoundingMode, \ RoundingModeCondition) \ - __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 8, DestType, RoundingMode, \ + __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 8, DestType, SPVType, RoundingMode, \ RoundingModeCondition) \ - __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 16, DestType, RoundingMode, \ + __SYCL_VECTOR_FLOAT_INT_CONVERT(Op, 16, DestType, SPVType, RoundingMode, \ RoundingModeCondition) -#define __SYCL_FLOAT_INT_CONVERT_FOR_TYPE(Op, DestType) \ - __SYCL_FLOAT_INT_CONVERT(Op, DestType, rte, RteOrAutomatic) \ - __SYCL_FLOAT_INT_CONVERT(Op, DestType, rtz, Rtz) \ - __SYCL_FLOAT_INT_CONVERT(Op, DestType, rtp, Rtp) \ - __SYCL_FLOAT_INT_CONVERT(Op, DestType, rtn, Rtn) - -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, char) -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, short) -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, int) -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, long) - -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, uchar) -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, ushort) -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, uint) -__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, ulong) +#define __SYCL_FLOAT_INT_CONVERT_FOR_TYPE(Op, DestType, SPVType) \ + __SYCL_FLOAT_INT_CONVERT(Op, DestType, SPVType, rte, RteOrAutomatic) \ + __SYCL_FLOAT_INT_CONVERT(Op, DestType, SPVType, rtz, Rtz) \ + __SYCL_FLOAT_INT_CONVERT(Op, DestType, SPVType, rtp, Rtp) \ + __SYCL_FLOAT_INT_CONVERT(Op, DestType, SPVType, rtn, Rtn) + +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, char, schar) +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, short, short) +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, int, int) +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToS, long, long) + +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, uchar, uchar) +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, ushort, ushort) +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, uint, uint) +__SYCL_FLOAT_INT_CONVERT_FOR_TYPE(FToU, ulong, ulong) #undef __SYCL_SCALAR_FLOAT_INT_CONVERT #undef __SYCL_VECTOR_FLOAT_INT_CONVERT diff --git a/sycl/test/check_device_code/char_builtins.cpp b/sycl/test/check_device_code/char_builtins.cpp new file mode 100644 index 0000000000000..a28947315f5d3 --- /dev/null +++ b/sycl/test/check_device_code/char_builtins.cpp @@ -0,0 +1,207 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clangxx -I %sycl_include -fno-discard-value-names -fsigned-char -O3 -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SCHAR +// RUN: %clangxx -I %sycl_include -fno-discard-value-names -fno-signed-char -O3 -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UCHAR + +#include + +template +SYCL_EXTERNAL void test_builtins( + T *out, T x, T y, sycl::vec *out2, sycl::vec f2, + sycl::nd_item<2> item, + sycl::multi_ptr ptr_global, + sycl::multi_ptr ptr_local) { + out[0] = sycl::min(x, y); + out[1] = sycl::max(x, y); + out[2] = sycl::clz(x); + out[3] = sycl::ctz(x); + out[4] = sycl::abs(x); + out[5] = sycl::hadd(x, y); + out[6] = sycl::rhadd(x, y); + + out2[0] = f2.template convert(); + + size_t num_elem = 4; + const auto group = item.get_group(); + group.async_work_group_copy(ptr_local, ptr_global, num_elem); +} + +// CHECK-SCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( +// CHECK-SCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.32") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SCHAR-NEXT: [[ENTRY:.*:]] +// CHECK-SCHAR-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 +// CHECK-SCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-SCHAR-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-SCHAR-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_minaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3:[0-9]+]] +// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] +// CHECK-SCHAR-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_maxaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 +// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] +// CHECK-SCHAR-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_clza(i8 noundef signext [[X]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 +// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] +// CHECK-SCHAR-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_ctza(i8 noundef signext [[X]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 +// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] +// CHECK-SCHAR-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_s_absa(i8 noundef signext [[X]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 +// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] +// CHECK-SCHAR-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef signext i8 @_Z18__spirv_ocl_s_haddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 +// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] +// CHECK-SCHAR-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef signext i8 @_Z19__spirv_ocl_s_rhaddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 +// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] +// CHECK-SCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToS_Rschar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 +// CHECK-SCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) +// CHECK-SCHAR-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) +// CHECK-SCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] +// CHECK-SCHAR-NEXT: ret void +// +// CHECK-UCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( +// CHECK-UCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.32") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-UCHAR-NEXT: [[ENTRY:.*:]] +// CHECK-UCHAR-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 +// CHECK-UCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-UCHAR-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-UCHAR-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_minhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3:[0-9]+]] +// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] +// CHECK-UCHAR-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_maxhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 +// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] +// CHECK-UCHAR-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_clzh(i8 noundef zeroext [[X]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 +// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] +// CHECK-UCHAR-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_ctzh(i8 noundef zeroext [[X]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 +// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] +// CHECK-UCHAR-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_absh(i8 noundef zeroext [[X]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 +// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] +// CHECK-UCHAR-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z18__spirv_ocl_u_haddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 +// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] +// CHECK-UCHAR-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z19__spirv_ocl_u_rhaddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 +// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] +// CHECK-UCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToU_Ruchar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 +// CHECK-UCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) +// CHECK-UCHAR-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) +// CHECK-UCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] +// CHECK-UCHAR-NEXT: ret void +// +SYCL_EXTERNAL void test_char_builtins( + char *out, char x, char y, sycl::vec *out2, sycl::vec f2, + sycl::nd_item<2> item, + sycl::multi_ptr ptr_global, + sycl::multi_ptr ptr_local) { + test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPaaaPN4sycl3_V13vecIaLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIaLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IaLSA_3ELSB_2EEE( +// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.83") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.84") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_minaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_maxaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 +// CHECK-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_clza(i8 noundef signext [[X]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 +// CHECK-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_ctza(i8 noundef signext [[X]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 +// CHECK-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_s_absa(i8 noundef signext [[X]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 +// CHECK-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef signext i8 @_Z18__spirv_ocl_s_haddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 +// CHECK-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef signext i8 @_Z19__spirv_ocl_s_rhaddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 +// CHECK-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToS_Rschar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] +// CHECK-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) +// CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) +// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META18:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL void test_schar_builtins( + signed char *out, signed char x, signed char y, + sycl::vec *out2, sycl::vec f2, + sycl::nd_item<2> item, + sycl::multi_ptr + ptr_global, + sycl::multi_ptr + ptr_local) { + test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPhhhPN4sycl3_V13vecIhLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIhLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IhLSA_3ELSB_2EEE( +// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.137") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.138") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_minhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] +// CHECK-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_maxhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 +// CHECK-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_clzh(i8 noundef zeroext [[X]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 +// CHECK-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_ctzh(i8 noundef zeroext [[X]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 +// CHECK-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_absh(i8 noundef zeroext [[X]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 +// CHECK-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z18__spirv_ocl_u_haddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 +// CHECK-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z19__spirv_ocl_u_rhaddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] +// CHECK-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 +// CHECK-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToU_Ruchar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] +// CHECK-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) +// CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) +// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4]], !noalias [[META23:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL void test_schar_builtins( + unsigned char *out, unsigned char x, unsigned char y, + sycl::vec *out2, sycl::vec f2, + sycl::nd_item<2> item, + sycl::multi_ptr + ptr_global, + sycl::multi_ptr + ptr_local) { + test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); +} +//. +// CHECK: [[META6]] = !{} +// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +// CHECK: [[META8]] = !{!"p1 omnipotent char", [[META9:![0-9]+]], i64 0} +// CHECK: [[META9]] = !{!"any pointer", [[META10:![0-9]+]], i64 0} +// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} +// CHECK: [[META11]] = !{!"Simple C++ TBAA"} +// CHECK: [[TBAA12]] = !{[[META10]], [[META10]], i64 0} +// CHECK: [[META18]] = !{[[META19:![0-9]+]], [[META21:![0-9]+]]} +// CHECK: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm: %agg.result"} +// CHECK: [[META20]] = distinct !{[[META20]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm"} +// CHECK: [[META21]] = distinct !{[[META21]], [[META22:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm: %agg.result"} +// CHECK: [[META22]] = distinct !{[[META22]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm"} +// CHECK: [[META23]] = !{[[META24:![0-9]+]], [[META26:![0-9]+]]} +// CHECK: [[META24]] = distinct !{[[META24]], [[META25:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm: %agg.result"} +// CHECK: [[META25]] = distinct !{[[META25]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm"} +// CHECK: [[META26]] = distinct !{[[META26]], [[META27:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm: %agg.result"} +// CHECK: [[META27]] = distinct !{[[META27]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm"} +//. From 105741903edac3b7bf50bfcd32a43f7154a0e434 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 11 Jun 2025 11:25:02 +0100 Subject: [PATCH 2/5] update checks --- sycl/test/check_device_code/char_builtins.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sycl/test/check_device_code/char_builtins.cpp b/sycl/test/check_device_code/char_builtins.cpp index a28947315f5d3..af39a2eaf393f 100644 --- a/sycl/test/check_device_code/char_builtins.cpp +++ b/sycl/test/check_device_code/char_builtins.cpp @@ -2,6 +2,10 @@ // RUN: %clangxx -I %sycl_include -fno-discard-value-names -fsigned-char -O3 -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SCHAR // RUN: %clangxx -I %sycl_include -fno-discard-value-names -fno-signed-char -O3 -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UCHAR +// Note: we've used update_cc_test_checks but because the mangling of multi_ptr +// differs on different hosts, the check has been updated not to rely on the +// specific numeric suffix, e.g., %"class.sycl::_V1::multi_ptr{{.*}}". + #include template @@ -26,7 +30,7 @@ SYCL_EXTERNAL void test_builtins( } // CHECK-SCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( -// CHECK-SCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.32") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-SCHAR-NEXT: [[ENTRY:.*:]] // CHECK-SCHAR-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 // CHECK-SCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] @@ -59,7 +63,7 @@ SYCL_EXTERNAL void test_builtins( // CHECK-SCHAR-NEXT: ret void // // CHECK-UCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( -// CHECK-UCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.32") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-UCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-UCHAR-NEXT: [[ENTRY:.*:]] // CHECK-UCHAR-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 // CHECK-UCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] @@ -100,7 +104,7 @@ SYCL_EXTERNAL void test_char_builtins( } // CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPaaaPN4sycl3_V13vecIaLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIaLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IaLSA_3ELSB_2EEE( -// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.83") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.84") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] @@ -144,7 +148,7 @@ SYCL_EXTERNAL void test_schar_builtins( } // CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPhhhPN4sycl3_V13vecIhLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIhLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IhLSA_3ELSB_2EEE( -// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.137") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.138") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7]] From d78adc9b97fbc69014a9053427e7d2e6788589de Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 11 Jun 2025 14:56:45 +0100 Subject: [PATCH 3/5] update test? --- sycl/test/check_device_code/char_builtins.cpp | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/sycl/test/check_device_code/char_builtins.cpp b/sycl/test/check_device_code/char_builtins.cpp index af39a2eaf393f..21f031eacfc78 100644 --- a/sycl/test/check_device_code/char_builtins.cpp +++ b/sycl/test/check_device_code/char_builtins.cpp @@ -10,7 +10,7 @@ template SYCL_EXTERNAL void test_builtins( - T *out, T x, T y, sycl::vec *out2, sycl::vec f2, + T *out, T x, T y, sycl::vec *out2, sycl::vec f2, sycl::nd_item<2> item, sycl::multi_ptr ptr_global, sycl::multi_ptr ptr_local) { @@ -29,12 +29,12 @@ SYCL_EXTERNAL void test_builtins( group.async_work_group_copy(ptr_local, ptr_global, num_elem); } -// CHECK-SCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( -// CHECK-SCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( +// CHECK-SCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-SCHAR-NEXT: [[ENTRY:.*:]] -// CHECK-SCHAR-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 -// CHECK-SCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-SCHAR-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-SCHAR-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 +// CHECK-SCHAR-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-SCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] // CHECK-SCHAR-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_minaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3:[0-9]+]] // CHECK-SCHAR-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] // CHECK-SCHAR-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_maxaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] @@ -55,19 +55,19 @@ SYCL_EXTERNAL void test_builtins( // CHECK-SCHAR-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef signext i8 @_Z19__spirv_ocl_s_rhaddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] // CHECK-SCHAR-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 // CHECK-SCHAR-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-SCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToS_Rschar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 -// CHECK-SCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) -// CHECK-SCHAR-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) -// CHECK-SCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] +// CHECK-SCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToS_Rschar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] +// CHECK-SCHAR-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 +// CHECK-SCHAR-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) +// CHECK-SCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) +// CHECK-SCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] // CHECK-SCHAR-NEXT: ret void // -// CHECK-UCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( -// CHECK-UCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-UCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( +// CHECK-UCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-UCHAR-NEXT: [[ENTRY:.*:]] -// CHECK-UCHAR-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 -// CHECK-UCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-UCHAR-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-UCHAR-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 +// CHECK-UCHAR-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-UCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] // CHECK-UCHAR-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_minhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3:[0-9]+]] // CHECK-UCHAR-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] // CHECK-UCHAR-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_maxhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] @@ -88,27 +88,27 @@ SYCL_EXTERNAL void test_builtins( // CHECK-UCHAR-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z19__spirv_ocl_u_rhaddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] // CHECK-UCHAR-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 // CHECK-UCHAR-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-UCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToU_Ruchar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 -// CHECK-UCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) -// CHECK-UCHAR-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) -// CHECK-UCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] +// CHECK-UCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToU_Ruchar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] +// CHECK-UCHAR-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 +// CHECK-UCHAR-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) +// CHECK-UCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) +// CHECK-UCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] // CHECK-UCHAR-NEXT: ret void // SYCL_EXTERNAL void test_char_builtins( - char *out, char x, char y, sycl::vec *out2, sycl::vec f2, + char *out, char x, char y, sycl::vec *out2, sycl::vec f2, sycl::nd_item<2> item, sycl::multi_ptr ptr_global, sycl::multi_ptr ptr_local) { test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); } -// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPaaaPN4sycl3_V13vecIaLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIaLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IaLSA_3ELSB_2EEE( -// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPaaaPN4sycl3_V13vecIaLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIaLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IaLSA_3ELSB_2EEE( +// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] // CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_minaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] // CHECK-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_maxaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] @@ -129,16 +129,16 @@ SYCL_EXTERNAL void test_char_builtins( // CHECK-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef signext i8 @_Z19__spirv_ocl_s_rhaddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] // CHECK-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 // CHECK-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToS_Rschar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] -// CHECK-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) -// CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) -// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META18:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToS_Rschar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] +// CHECK-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) +// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) +// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META18:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL void test_schar_builtins( signed char *out, signed char x, signed char y, - sycl::vec *out2, sycl::vec f2, + sycl::vec *out2, sycl::vec f2, sycl::nd_item<2> item, sycl::multi_ptr ptr_global, @@ -147,12 +147,12 @@ SYCL_EXTERNAL void test_schar_builtins( test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); } -// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPhhhPN4sycl3_V13vecIhLi2EEENS2_IfLi2EEENS1_7nd_itemILi2EEENS1_9multi_ptrIhLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IhLSA_3ELSB_2EEE( -// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 2)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPhhhPN4sycl3_V13vecIhLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIhLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IhLSA_3ELSB_2EEE( +// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[F2]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] // CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_minhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] // CHECK-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12]] // CHECK-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_maxhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] @@ -173,16 +173,16 @@ SYCL_EXTERNAL void test_schar_builtins( // CHECK-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z19__spirv_ocl_u_rhaddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] // CHECK-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 // CHECK-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <2 x i8> @_Z31__spirv_ConvertFToU_Ruchar2_rteDv2_f(<2 x float> noundef [[TMP0]]) #[[ATTR3]] -// CHECK-NEXT: store <2 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(1) -// CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(3) -// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP4]], ptr addrspace(1) noundef [[TMP3]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4]], !noalias [[META23:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToU_Ruchar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] +// CHECK-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) +// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) +// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4]], !noalias [[META23:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL void test_schar_builtins( unsigned char *out, unsigned char x, unsigned char y, - sycl::vec *out2, sycl::vec f2, + sycl::vec *out2, sycl::vec f2, sycl::nd_item<2> item, sycl::multi_ptr ptr_global, From 7a3b24578f5b92f3138f3473723dbbe89e2673dc Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 12 Jun 2025 15:44:17 +0100 Subject: [PATCH 4/5] convert test to E2E --- sycl/test-e2e/Basic/char_builtins.cpp | 142 ++++++++++++ sycl/test/check_device_code/char_builtins.cpp | 211 ------------------ 2 files changed, 142 insertions(+), 211 deletions(-) create mode 100644 sycl/test-e2e/Basic/char_builtins.cpp delete mode 100644 sycl/test/check_device_code/char_builtins.cpp diff --git a/sycl/test-e2e/Basic/char_builtins.cpp b/sycl/test-e2e/Basic/char_builtins.cpp new file mode 100644 index 0000000000000..2413cbc8758af --- /dev/null +++ b/sycl/test-e2e/Basic/char_builtins.cpp @@ -0,0 +1,142 @@ +// RUN: %{build} -Wno-absolute-value -Wno-deprecated-declarations -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +using namespace sycl; + +constexpr size_t BufferSize = 16; +constexpr size_t NElems = 32; +constexpr size_t WorkGroupSize = 8; + +#if 0 +template +SYCL_EXTERNAL void test_builtins( + T *out, T x, T y, sycl::vec *out2, sycl::vec f2, + sycl::nd_item<2> item, + sycl::multi_ptr ptr_global, + sycl::multi_ptr ptr_local) { + size_t num_elem = 4; + const auto group = item.get_group(); + group.async_work_group_copy(ptr_local, ptr_global, num_elem); +} +#endif + +template +int check(const T *A, const T *B, T *C, const vec FVec) { +#define UNARY_CHECK(IDX, OP) \ + assert(C[IDX] == OP(A[IDX]) && "error: " #OP "failed") + + UNARY_CHECK(0, clz); + UNARY_CHECK(1, ctz); + UNARY_CHECK(2, std::abs); + +#define BINARY_CHECK(IDX, OP) \ + assert(C[IDX] == OP(A[IDX], B[IDX]) && "error: " #OP "failed") + + BINARY_CHECK(3, std::min); + BINARY_CHECK(4, std::max); + + auto hadd = [](auto x, auto y) { + return (static_cast(x) + static_cast(y)) >> 1; + }; + BINARY_CHECK(5, hadd); + auto rhadd = [](auto x, auto y) { + return (static_cast(x) + static_cast(y) + 1) >> 1; + }; + BINARY_CHECK(6, rhadd); + + assert(C[7] == (T)(FVec[0])); + assert(C[8] == (T)(FVec[1])); + + return 0; +#undef UNARY_CHECK +#undef BINARY_CHECK +} + +template int do_test(const T *A, const T *B, T *C) { + queue Q; + // Avoid out-of-range float->(u)char errors by keeping these values within + // range. + vec FVec = {1.0f, 127.0f}; + { + buffer ABuf(A, BufferSize); + buffer BBuf(B, BufferSize); + buffer CBuf(C, BufferSize); + Q.submit([&](handler &CGH) { + auto A = ABuf.template get_access(CGH); + auto B = BBuf.template get_access(CGH); + auto C = CBuf.template get_access(CGH); + CGH.single_task<>([=]() { + C[0] = clz(A[0]); + C[1] = ctz(A[1]); + C[2] = abs(A[2]); + C[3] = min(A[3], B[3]); + C[4] = max(A[4], B[4]); + C[5] = hadd(A[5], B[5]); + C[6] = rhadd(A[6], B[6]); + + vec conv = FVec.template convert(); + C[7] = conv[0]; + C[8] = conv[1]; + }); + }); + } + + // Regression test async work-group copy builtins + { + buffer ABuf(A, BufferSize); + Q.submit([&](handler &CGH) { + auto A = ABuf.template get_access(CGH); + local_accessor Local(range<1>{WorkGroupSize}, CGH); + + nd_range<1> NDR{range<1>(NElems), range<1>(WorkGroupSize)}; + CGH.parallel_for<>(NDR, [=](nd_item<1> NDId) { + auto GrId = NDId.get_group_linear_id(); + size_t NElemsToCopy = WorkGroupSize; + size_t Offset = GrId * WorkGroupSize; + auto E = NDId.async_work_group_copy( + Local.template get_multi_ptr(), + A.template get_multi_ptr() + Offset, + NElemsToCopy); + E.wait(); + }); + }); + } + + if (!std::is_same_v) { + return check(A, B, C, FVec); + } + + // Cast 'char' to signed or unsigned char to check the device's char + // signedness matches the host's. + if constexpr (std::numeric_limits::is_signed) { + return check(reinterpret_cast(A), + reinterpret_cast(B), + reinterpret_cast(C), FVec); + } + return check(reinterpret_cast(A), + reinterpret_cast(B), + reinterpret_cast(C), FVec); +} + +int main() { + std::array A, B, C; + + std::fill(A.begin(), A.end(), 1); + std::fill(B.begin(), B.end(), 128); + std::fill(C.begin(), C.end(), std::numeric_limits::max()); + + int ret = do_test(A.data(), B.data(), C.data()); + + ret |= do_test(reinterpret_cast(A.data()), + reinterpret_cast(B.data()), + reinterpret_cast(C.data())); + + ret |= do_test(reinterpret_cast(A.data()), + reinterpret_cast(B.data()), + reinterpret_cast(C.data())); + return ret; +} diff --git a/sycl/test/check_device_code/char_builtins.cpp b/sycl/test/check_device_code/char_builtins.cpp deleted file mode 100644 index 21f031eacfc78..0000000000000 --- a/sycl/test/check_device_code/char_builtins.cpp +++ /dev/null @@ -1,211 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clangxx -I %sycl_include -fno-discard-value-names -fsigned-char -O3 -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SCHAR -// RUN: %clangxx -I %sycl_include -fno-discard-value-names -fno-signed-char -O3 -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UCHAR - -// Note: we've used update_cc_test_checks but because the mangling of multi_ptr -// differs on different hosts, the check has been updated not to rely on the -// specific numeric suffix, e.g., %"class.sycl::_V1::multi_ptr{{.*}}". - -#include - -template -SYCL_EXTERNAL void test_builtins( - T *out, T x, T y, sycl::vec *out2, sycl::vec f2, - sycl::nd_item<2> item, - sycl::multi_ptr ptr_global, - sycl::multi_ptr ptr_local) { - out[0] = sycl::min(x, y); - out[1] = sycl::max(x, y); - out[2] = sycl::clz(x); - out[3] = sycl::ctz(x); - out[4] = sycl::abs(x); - out[5] = sycl::hadd(x, y); - out[6] = sycl::rhadd(x, y); - - out2[0] = f2.template convert(); - - size_t num_elem = 4; - const auto group = item.get_group(); - group.async_work_group_copy(ptr_local, ptr_global, num_elem); -} - -// CHECK-SCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( -// CHECK-SCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { -// CHECK-SCHAR-NEXT: [[ENTRY:.*:]] -// CHECK-SCHAR-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 -// CHECK-SCHAR-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-SCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] -// CHECK-SCHAR-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_minaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3:[0-9]+]] -// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] -// CHECK-SCHAR-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_maxaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 -// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] -// CHECK-SCHAR-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_clza(i8 noundef signext [[X]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 -// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] -// CHECK-SCHAR-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_ctza(i8 noundef signext [[X]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 -// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] -// CHECK-SCHAR-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_s_absa(i8 noundef signext [[X]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 -// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] -// CHECK-SCHAR-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef signext i8 @_Z18__spirv_ocl_s_haddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 -// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] -// CHECK-SCHAR-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef signext i8 @_Z19__spirv_ocl_s_rhaddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 -// CHECK-SCHAR-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-SCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToS_Rschar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] -// CHECK-SCHAR-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 -// CHECK-SCHAR-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) -// CHECK-SCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) -// CHECK-SCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] -// CHECK-SCHAR-NEXT: ret void -// -// CHECK-UCHAR-LABEL: define dso_local spir_func void @_Z18test_char_builtinsPcccPN4sycl3_V13vecIcLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIcLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IcLSA_3ELSB_2EEE( -// CHECK-UCHAR-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { -// CHECK-UCHAR-NEXT: [[ENTRY:.*:]] -// CHECK-UCHAR-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 -// CHECK-UCHAR-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-UCHAR-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] -// CHECK-UCHAR-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_minhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3:[0-9]+]] -// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] -// CHECK-UCHAR-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_maxhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 -// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] -// CHECK-UCHAR-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_clzh(i8 noundef zeroext [[X]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 -// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] -// CHECK-UCHAR-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_ctzh(i8 noundef zeroext [[X]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 -// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] -// CHECK-UCHAR-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_absh(i8 noundef zeroext [[X]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 -// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] -// CHECK-UCHAR-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z18__spirv_ocl_u_haddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 -// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] -// CHECK-UCHAR-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z19__spirv_ocl_u_rhaddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 -// CHECK-UCHAR-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-UCHAR-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToU_Ruchar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] -// CHECK-UCHAR-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 -// CHECK-UCHAR-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) -// CHECK-UCHAR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) -// CHECK-UCHAR-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META13:![0-9]+]] -// CHECK-UCHAR-NEXT: ret void -// -SYCL_EXTERNAL void test_char_builtins( - char *out, char x, char y, sycl::vec *out2, sycl::vec f2, - sycl::nd_item<2> item, - sycl::multi_ptr ptr_global, - sycl::multi_ptr ptr_local) { - test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); -} - -// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPaaaPN4sycl3_V13vecIaLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIaLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IaLSA_3ELSB_2EEE( -// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_minaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3:[0-9]+]] -// CHECK-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef signext i8 @_Z17__spirv_ocl_s_maxaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 -// CHECK-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_clza(i8 noundef signext [[X]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 -// CHECK-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef signext i8 @_Z15__spirv_ocl_ctza(i8 noundef signext [[X]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 -// CHECK-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_s_absa(i8 noundef signext [[X]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 -// CHECK-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef signext i8 @_Z18__spirv_ocl_s_haddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 -// CHECK-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef signext i8 @_Z19__spirv_ocl_s_rhaddaa(i8 noundef signext [[X]], i8 noundef signext [[Y]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 -// CHECK-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToS_Rschar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] -// CHECK-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) -// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) -// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3aPU3AS1Kamm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4:[0-9]+]], !noalias [[META18:![0-9]+]] -// CHECK-NEXT: ret void -// -SYCL_EXTERNAL void test_schar_builtins( - signed char *out, signed char x, signed char y, - sycl::vec *out2, sycl::vec f2, - sycl::nd_item<2> item, - sycl::multi_ptr - ptr_global, - sycl::multi_ptr - ptr_local) { - test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); -} - -// CHECK-LABEL: define dso_local spir_func void @_Z19test_schar_builtinsPhhhPN4sycl3_V13vecIhLi4EEENS2_IfLi4EEENS1_7nd_itemILi2EEENS1_9multi_ptrIhLNS1_6access13address_spaceE1ELNS9_9decoratedE2EEENS8_IhLSA_3ELSB_2EEE( -// CHECK-SAME: ptr addrspace(4) noundef writeonly captures(none) initializes((0, 7)) [[OUT:%.*]], i8 noundef zeroext [[X:%.*]], i8 noundef zeroext [[Y:%.*]], ptr addrspace(4) noundef writeonly captures(none) initializes((0, 4)) [[OUT2:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 16 captures(none) [[F2:%.*]], ptr noundef readnone byval(%"class.sycl::_V1::nd_item") align 1 captures(none) [[ITEM:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_GLOBAL:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr{{.*}}") align 8 captures(none) [[PTR_LOCAL:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <4 x float>, ptr [[F2]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[PTR_GLOBAL]], align 8, !tbaa [[TBAA7]] -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[PTR_LOCAL]], align 8, !tbaa [[TBAA7]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_minhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] -// CHECK-NEXT: store i8 [[CALL_I_I_I]], ptr addrspace(4) [[OUT]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I30_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_maxhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 1 -// CHECK-NEXT: store i8 [[CALL_I_I30_I]], ptr addrspace(4) [[ARRAYIDX2_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I31_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_clzh(i8 noundef zeroext [[X]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 2 -// CHECK-NEXT: store i8 [[CALL_I_I31_I]], ptr addrspace(4) [[ARRAYIDX4_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I32_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z15__spirv_ocl_ctzh(i8 noundef zeroext [[X]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 3 -// CHECK-NEXT: store i8 [[CALL_I_I32_I]], ptr addrspace(4) [[ARRAYIDX6_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I33_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z17__spirv_ocl_u_absh(i8 noundef zeroext [[X]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX8_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 4 -// CHECK-NEXT: store i8 [[CALL_I_I33_I]], ptr addrspace(4) [[ARRAYIDX8_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I34_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z18__spirv_ocl_u_haddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 5 -// CHECK-NEXT: store i8 [[CALL_I_I34_I]], ptr addrspace(4) [[ARRAYIDX10_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I35_I:%.*]] = tail call spir_func noundef zeroext i8 @_Z19__spirv_ocl_u_rhaddhh(i8 noundef zeroext [[X]], i8 noundef zeroext [[Y]]) #[[ATTR3]] -// CHECK-NEXT: [[ARRAYIDX12_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[OUT]], i64 6 -// CHECK-NEXT: store i8 [[CALL_I_I35_I]], ptr addrspace(4) [[ARRAYIDX12_I]], align 1, !tbaa [[TBAA12]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef <4 x i8> @_Z31__spirv_ConvertFToU_Ruchar4_rteDv4_f(<4 x float> noundef [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]]) #[[ATTR3]] -// CHECK-NEXT: store <4 x i8> [[CALL_I_I_I_I]], ptr addrspace(4) [[OUT2]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) -// CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(3) -// CHECK-NEXT: [[CALL3_I_I_I:%.*]] = tail call spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyiPU3AS3hPU3AS1Khmm9ocl_event(i32 noundef 2, ptr addrspace(3) noundef [[TMP3]], ptr addrspace(1) noundef [[TMP2]], i64 noundef 4, i64 noundef 1, target("spirv.Event") zeroinitializer) #[[ATTR4]], !noalias [[META23:![0-9]+]] -// CHECK-NEXT: ret void -// -SYCL_EXTERNAL void test_schar_builtins( - unsigned char *out, unsigned char x, unsigned char y, - sycl::vec *out2, sycl::vec f2, - sycl::nd_item<2> item, - sycl::multi_ptr - ptr_global, - sycl::multi_ptr - ptr_local) { - test_builtins(out, x, y, out2, f2, item, ptr_global, ptr_local); -} -//. -// CHECK: [[META6]] = !{} -// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -// CHECK: [[META8]] = !{!"p1 omnipotent char", [[META9:![0-9]+]], i64 0} -// CHECK: [[META9]] = !{!"any pointer", [[META10:![0-9]+]], i64 0} -// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0} -// CHECK: [[META11]] = !{!"Simple C++ TBAA"} -// CHECK: [[TBAA12]] = !{[[META10]], [[META10]], i64 0} -// CHECK: [[META18]] = !{[[META19:![0-9]+]], [[META21:![0-9]+]]} -// CHECK: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm: %agg.result"} -// CHECK: [[META20]] = distinct !{[[META20]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm"} -// CHECK: [[META21]] = distinct !{[[META21]], [[META22:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm: %agg.result"} -// CHECK: [[META22]] = distinct !{[[META22]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIaEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm"} -// CHECK: [[META23]] = !{[[META24:![0-9]+]], [[META26:![0-9]+]]} -// CHECK: [[META24]] = distinct !{[[META24]], [[META25:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm: %agg.result"} -// CHECK: [[META25]] = distinct !{[[META25]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENSt9enable_ifIXntsr6detail7is_boolIT_EE5valueENS0_12device_eventEE4typeENS0_9multi_ptrIS5_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEENS9_IS5_LSB_1ELSC_2EEEmm"} -// CHECK: [[META26]] = distinct !{[[META26]], [[META27:![0-9]+]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm: %agg.result"} -// CHECK: [[META27]] = distinct !{[[META27]], !"_ZNK4sycl3_V15groupILi2EE21async_work_group_copyIhEENS0_12device_eventENS0_9multi_ptrIT_LNS0_6access13address_spaceE3ELNS7_9decoratedE2EEENS5_IS6_LS8_1ELS9_2EEEm"} -//. From 542abb7dc782ec2e125b52b4bec40219dde7a20c Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 16 Jun 2025 17:18:35 +0100 Subject: [PATCH 5/5] address feedback --- sycl/test-e2e/Basic/char_builtins.cpp | 91 +++++++++++---------------- 1 file changed, 35 insertions(+), 56 deletions(-) diff --git a/sycl/test-e2e/Basic/char_builtins.cpp b/sycl/test-e2e/Basic/char_builtins.cpp index 2413cbc8758af..5ab0154895a8c 100644 --- a/sycl/test-e2e/Basic/char_builtins.cpp +++ b/sycl/test-e2e/Basic/char_builtins.cpp @@ -11,52 +11,31 @@ constexpr size_t BufferSize = 16; constexpr size_t NElems = 32; constexpr size_t WorkGroupSize = 8; -#if 0 -template -SYCL_EXTERNAL void test_builtins( - T *out, T x, T y, sycl::vec *out2, sycl::vec f2, - sycl::nd_item<2> item, - sycl::multi_ptr ptr_global, - sycl::multi_ptr ptr_local) { - size_t num_elem = 4; - const auto group = item.get_group(); - group.async_work_group_copy(ptr_local, ptr_global, num_elem); -} -#endif - template int check(const T *A, const T *B, T *C, const vec FVec) { -#define UNARY_CHECK(IDX, OP) \ - assert(C[IDX] == OP(A[IDX]) && "error: " #OP "failed") - - UNARY_CHECK(0, clz); - UNARY_CHECK(1, ctz); - UNARY_CHECK(2, std::abs); - -#define BINARY_CHECK(IDX, OP) \ - assert(C[IDX] == OP(A[IDX], B[IDX]) && "error: " #OP "failed") + assert(C[0] == clz(A[0]) && "error: clz failed"); + assert(C[1] == ctz(A[1]) && "error: ctz failed"); + assert(C[2] == abs(A[2]) && "error: abs failed"); - BINARY_CHECK(3, std::min); - BINARY_CHECK(4, std::max); + assert(C[3] == std::min(A[3], B[3]) && "error: min failed"); + assert(C[4] == std::max(A[4], B[4]) && "error: max failed"); - auto hadd = [](auto x, auto y) { + auto Hadd = [](auto x, auto y) { return (static_cast(x) + static_cast(y)) >> 1; }; - BINARY_CHECK(5, hadd); - auto rhadd = [](auto x, auto y) { + assert(C[5] == Hadd(A[5], B[5]) && "error: hadd failed"); + auto Rhadd = [](auto x, auto y) { return (static_cast(x) + static_cast(y) + 1) >> 1; }; - BINARY_CHECK(6, rhadd); + assert(C[6] == Rhadd(A[6], B[6]) && "error: rhadd failed"); assert(C[7] == (T)(FVec[0])); assert(C[8] == (T)(FVec[1])); return 0; -#undef UNARY_CHECK -#undef BINARY_CHECK } -template int do_test(const T *A, const T *B, T *C) { +template int doCharTest(const T *A, const T *B, T *C) { queue Q; // Avoid out-of-range float->(u)char errors by keeping these values within // range. @@ -66,30 +45,30 @@ template int do_test(const T *A, const T *B, T *C) { buffer BBuf(B, BufferSize); buffer CBuf(C, BufferSize); Q.submit([&](handler &CGH) { - auto A = ABuf.template get_access(CGH); - auto B = BBuf.template get_access(CGH); - auto C = CBuf.template get_access(CGH); + auto AAcc = ABuf.template get_access(CGH); + auto BAcc = BBuf.template get_access(CGH); + auto CAcc = CBuf.template get_access(CGH); CGH.single_task<>([=]() { - C[0] = clz(A[0]); - C[1] = ctz(A[1]); - C[2] = abs(A[2]); - C[3] = min(A[3], B[3]); - C[4] = max(A[4], B[4]); - C[5] = hadd(A[5], B[5]); - C[6] = rhadd(A[6], B[6]); - - vec conv = FVec.template convert(); - C[7] = conv[0]; - C[8] = conv[1]; + CAcc[0] = clz(AAcc[0]); + CAcc[1] = ctz(AAcc[1]); + CAcc[2] = abs(AAcc[2]); + CAcc[3] = min(AAcc[3], BAcc[3]); + CAcc[4] = max(AAcc[4], BAcc[4]); + CAcc[5] = hadd(AAcc[5], BAcc[5]); + CAcc[6] = rhadd(AAcc[6], BAcc[6]); + + vec Conv = FVec.template convert(); + CAcc[7] = Conv[0]; + CAcc[8] = Conv[1]; }); }); } - // Regression test async work-group copy builtins + // Regression test async work-group copy builtins. { buffer ABuf(A, BufferSize); Q.submit([&](handler &CGH) { - auto A = ABuf.template get_access(CGH); + auto AAcc = ABuf.template get_access(CGH); local_accessor Local(range<1>{WorkGroupSize}, CGH); nd_range<1> NDR{range<1>(NElems), range<1>(WorkGroupSize)}; @@ -99,7 +78,7 @@ template int do_test(const T *A, const T *B, T *C) { size_t Offset = GrId * WorkGroupSize; auto E = NDId.async_work_group_copy( Local.template get_multi_ptr(), - A.template get_multi_ptr() + Offset, + AAcc.template get_multi_ptr() + Offset, NElemsToCopy); E.wait(); }); @@ -129,14 +108,14 @@ int main() { std::fill(B.begin(), B.end(), 128); std::fill(C.begin(), C.end(), std::numeric_limits::max()); - int ret = do_test(A.data(), B.data(), C.data()); + int Ret = doCharTest(A.data(), B.data(), C.data()); - ret |= do_test(reinterpret_cast(A.data()), - reinterpret_cast(B.data()), - reinterpret_cast(C.data())); + Ret |= doCharTest(reinterpret_cast(A.data()), + reinterpret_cast(B.data()), + reinterpret_cast(C.data())); - ret |= do_test(reinterpret_cast(A.data()), - reinterpret_cast(B.data()), - reinterpret_cast(C.data())); - return ret; + Ret |= doCharTest(reinterpret_cast(A.data()), + reinterpret_cast(B.data()), + reinterpret_cast(C.data())); + return Ret; }