Skip to content

Commit c00db97

Browse files
committed
[Clang] Add elementwise saturated add/sub builtins
This patch implements `__builtin_elementwise_add_sat` and `__builtin_elementwise_sub_sat` builtins. These map to the add/sub saturated math intrinsics described here: https://llvm.org/docs/LangRef.html#saturation-arithmetic-intrinsics With this in place we should then be able to replace the x86 SSE adds/subs intrinsics with these generic variants - it looks like other targets should be able to use these as well (arm/aarch64/webassembly all have similar examples in cgbuiltin). Differential Revision: https://reviews.llvm.org/D117898
1 parent 95b8a3e commit c00db97

File tree

7 files changed

+277
-0
lines changed

7 files changed

+277
-0
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,10 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
545545
magnitude than x
546546
T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer and floating point types
547547
T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer and floating point types
548+
T __builtin_elementwise_add_sat(T x, T y) return the sum of x and y, clamped to the range of integer types
549+
representable values for the signed/unsigned integer type.
550+
T __builtin_elementwise_sub_sat(T x, T y) return the difference of x and y, clamped to the range of integer types
551+
representable values for the signed/unsigned integer type..
548552
========================================= ================================================================ =========================================
549553

550554

clang/include/clang/Basic/Builtins.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,8 @@ BUILTIN(__builtin_elementwise_ceil, "v.", "nct")
653653
BUILTIN(__builtin_elementwise_floor, "v.", "nct")
654654
BUILTIN(__builtin_elementwise_roundeven, "v.", "nct")
655655
BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
656+
BUILTIN(__builtin_elementwise_add_sat, "v.", "nct")
657+
BUILTIN(__builtin_elementwise_sub_sat, "v.", "nct")
656658
BUILTIN(__builtin_reduce_max, "v.", "nct")
657659
BUILTIN(__builtin_reduce_min, "v.", "nct")
658660
BUILTIN(__builtin_reduce_xor, "v.", "nct")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3154,6 +3154,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
31543154
return RValue::get(
31553155
emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
31563156

3157+
case Builtin::BI__builtin_elementwise_add_sat:
3158+
case Builtin::BI__builtin_elementwise_sub_sat: {
3159+
Value *Op0 = EmitScalarExpr(E->getArg(0));
3160+
Value *Op1 = EmitScalarExpr(E->getArg(1));
3161+
Value *Result;
3162+
assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3163+
QualType Ty = E->getArg(0)->getType();
3164+
if (auto *VecTy = Ty->getAs<VectorType>())
3165+
Ty = VecTy->getElementType();
3166+
bool IsSigned = Ty->isSignedIntegerType();
3167+
unsigned Opc;
3168+
if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3169+
Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3170+
else
3171+
Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3172+
Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3173+
return RValue::get(Result);
3174+
}
3175+
31573176
case Builtin::BI__builtin_elementwise_max: {
31583177
Value *Op0 = EmitScalarExpr(E->getArg(0));
31593178
Value *Op1 = EmitScalarExpr(E->getArg(1));

clang/lib/Sema/SemaChecking.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2238,6 +2238,28 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
22382238
break;
22392239
}
22402240

2241+
// These builtins restrict the element type to integer
2242+
// types only.
2243+
case Builtin::BI__builtin_elementwise_add_sat:
2244+
case Builtin::BI__builtin_elementwise_sub_sat: {
2245+
if (SemaBuiltinElementwiseMath(TheCall))
2246+
return ExprError();
2247+
2248+
const Expr *Arg = TheCall->getArg(0);
2249+
QualType ArgTy = Arg->getType();
2250+
QualType EltTy = ArgTy;
2251+
2252+
if (auto *VecTy = EltTy->getAs<VectorType>())
2253+
EltTy = VecTy->getElementType();
2254+
2255+
if (!EltTy->isIntegerType()) {
2256+
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
2257+
<< 1 << /* integer ty */ 6 << ArgTy;
2258+
return ExprError();
2259+
}
2260+
break;
2261+
}
2262+
22412263
case Builtin::BI__builtin_elementwise_min:
22422264
case Builtin::BI__builtin_elementwise_max:
22432265
if (SemaBuiltinElementwiseMath(TheCall))

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,104 @@ void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
5656
si = __builtin_elementwise_abs(si);
5757
}
5858

59+
void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
60+
float4 vf1, float4 vf2, long long int i1,
61+
long long int i2, si8 vi1, si8 vi2,
62+
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
63+
_BitInt(31) bi1, _BitInt(31) bi2,
64+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
65+
// CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
66+
// CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
67+
// CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]])
68+
i1 = __builtin_elementwise_add_sat(i1, i2);
69+
70+
// CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
71+
// CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10)
72+
i1 = __builtin_elementwise_add_sat(i1, 10);
73+
74+
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
75+
// CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
76+
// CHECK-NEXT: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
77+
vi1 = __builtin_elementwise_add_sat(vi1, vi2);
78+
79+
// CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4
80+
// CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
81+
// CHECK-NEXT: call i32 @llvm.uadd.sat.i32(i32 [[U1]], i32 [[U2]])
82+
u1 = __builtin_elementwise_add_sat(u1, u2);
83+
84+
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
85+
// CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
86+
// CHECK-NEXT: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
87+
vu1 = __builtin_elementwise_add_sat(vu1, vu2);
88+
89+
// CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
90+
// CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
91+
// CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[BI1]], i31 [[BI2]])
92+
bi1 = __builtin_elementwise_add_sat(bi1, bi2);
93+
94+
// CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
95+
// CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
96+
// CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[BU1]], i55 [[BU2]])
97+
bu1 = __builtin_elementwise_add_sat(bu1, bu2);
98+
99+
// CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
100+
// CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
101+
// CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]])
102+
int_as_one = __builtin_elementwise_add_sat(int_as_one, b);
103+
104+
// CHECK: call i32 @llvm.sadd.sat.i32(i32 1, i32 97)
105+
i1 = __builtin_elementwise_add_sat(1, 'a');
106+
}
107+
108+
void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
109+
float4 vf1, float4 vf2, long long int i1,
110+
long long int i2, si8 vi1, si8 vi2,
111+
unsigned u1, unsigned u2, u4 vu1, u4 vu2,
112+
_BitInt(31) bi1, _BitInt(31) bi2,
113+
unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
114+
// CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
115+
// CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
116+
// CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]])
117+
i1 = __builtin_elementwise_sub_sat(i1, i2);
118+
119+
// CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8
120+
// CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10)
121+
i1 = __builtin_elementwise_sub_sat(i1, 10);
122+
123+
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
124+
// CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
125+
// CHECK-NEXT: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
126+
vi1 = __builtin_elementwise_sub_sat(vi1, vi2);
127+
128+
// CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4
129+
// CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
130+
// CHECK-NEXT: call i32 @llvm.usub.sat.i32(i32 [[U1]], i32 [[U2]])
131+
u1 = __builtin_elementwise_sub_sat(u1, u2);
132+
133+
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
134+
// CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
135+
// CHECK-NEXT: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
136+
vu1 = __builtin_elementwise_sub_sat(vu1, vu2);
137+
138+
// CHECK: [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
139+
// CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
140+
// CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[BI1]], i31 [[BI2]])
141+
bi1 = __builtin_elementwise_sub_sat(bi1, bi2);
142+
143+
// CHECK: [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
144+
// CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
145+
// CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[BU1]], i55 [[BU2]])
146+
bu1 = __builtin_elementwise_sub_sat(bu1, bu2);
147+
148+
// CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
149+
// CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
150+
// CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]])
151+
int_as_one = __builtin_elementwise_sub_sat(int_as_one, b);
152+
153+
// CHECK: call i32 @llvm.ssub.sat.i32(i32 1, i32 97)
154+
i1 = __builtin_elementwise_sub_sat(1, 'a');
155+
}
156+
59157
void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
60158
float4 vf1, float4 vf2, long long int i1,
61159
long long int i2, si8 vi1, si8 vi2,

clang/test/Sema/builtins-elementwise-math.c

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,122 @@ void test_builtin_elementwise_abs(int i, double d, float4 v, int3 iv, unsigned u
3333
// expected-error@-1 {{1st argument must be a signed integer or floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
3434
}
3535

36+
void test_builtin_elementwise_add_sat(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
37+
i = __builtin_elementwise_add_sat(p, d);
38+
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
39+
40+
struct Foo foo = __builtin_elementwise_add_sat(i, i);
41+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
42+
43+
i = __builtin_elementwise_add_sat(i);
44+
// expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
45+
46+
i = __builtin_elementwise_add_sat();
47+
// expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
48+
49+
i = __builtin_elementwise_add_sat(i, i, i);
50+
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
51+
52+
i = __builtin_elementwise_add_sat(v, iv);
53+
// expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
54+
55+
i = __builtin_elementwise_add_sat(uv, iv);
56+
// expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
57+
58+
v = __builtin_elementwise_add_sat(v, v);
59+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
60+
61+
s = __builtin_elementwise_add_sat(i, s);
62+
63+
enum e { one,
64+
two };
65+
i = __builtin_elementwise_add_sat(one, two);
66+
67+
enum f { three };
68+
enum f x = __builtin_elementwise_add_sat(one, three);
69+
70+
_BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
71+
ext = __builtin_elementwise_add_sat(ext, ext);
72+
73+
const int ci;
74+
i = __builtin_elementwise_add_sat(ci, i);
75+
i = __builtin_elementwise_add_sat(i, ci);
76+
i = __builtin_elementwise_add_sat(ci, ci);
77+
78+
i = __builtin_elementwise_add_sat(i, int_as_one); // ok (attributes don't match)?
79+
i = __builtin_elementwise_add_sat(i, b); // ok (sugar doesn't match)?
80+
81+
int A[10];
82+
A = __builtin_elementwise_add_sat(A, A);
83+
// expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'int *')}}
84+
85+
int(ii);
86+
int j;
87+
j = __builtin_elementwise_add_sat(i, j);
88+
89+
_Complex float c1, c2;
90+
c1 = __builtin_elementwise_add_sat(c1, c2);
91+
// expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
92+
}
93+
94+
void test_builtin_elementwise_sub_sat(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
95+
i = __builtin_elementwise_sub_sat(p, d);
96+
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
97+
98+
struct Foo foo = __builtin_elementwise_sub_sat(i, i);
99+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
100+
101+
i = __builtin_elementwise_sub_sat(i);
102+
// expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
103+
104+
i = __builtin_elementwise_sub_sat();
105+
// expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
106+
107+
i = __builtin_elementwise_sub_sat(i, i, i);
108+
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
109+
110+
i = __builtin_elementwise_sub_sat(v, iv);
111+
// expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
112+
113+
i = __builtin_elementwise_sub_sat(uv, iv);
114+
// expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
115+
116+
v = __builtin_elementwise_sub_sat(v, v);
117+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
118+
119+
s = __builtin_elementwise_sub_sat(i, s);
120+
121+
enum e { one,
122+
two };
123+
i = __builtin_elementwise_sub_sat(one, two);
124+
125+
enum f { three };
126+
enum f x = __builtin_elementwise_sub_sat(one, three);
127+
128+
_BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
129+
ext = __builtin_elementwise_sub_sat(ext, ext);
130+
131+
const int ci;
132+
i = __builtin_elementwise_sub_sat(ci, i);
133+
i = __builtin_elementwise_sub_sat(i, ci);
134+
i = __builtin_elementwise_sub_sat(ci, ci);
135+
136+
i = __builtin_elementwise_sub_sat(i, int_as_one); // ok (attributes don't match)?
137+
i = __builtin_elementwise_sub_sat(i, b); // ok (sugar doesn't match)?
138+
139+
int A[10];
140+
A = __builtin_elementwise_sub_sat(A, A);
141+
// expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'int *')}}
142+
143+
int(ii);
144+
int j;
145+
j = __builtin_elementwise_sub_sat(i, j);
146+
147+
_Complex float c1, c2;
148+
c1 = __builtin_elementwise_sub_sat(c1, c2);
149+
// expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
150+
}
151+
36152
void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
37153
i = __builtin_elementwise_max(p, d);
38154
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}

clang/test/SemaCXX/builtins-elementwise-math.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,22 @@ void test_builtin_elementwise_abs() {
2121
static_assert(!is_const<decltype(__builtin_elementwise_abs(b))>::value);
2222
}
2323

24+
void test_builtin_elementwise_add_sat() {
25+
const int a = 2;
26+
int b = 1;
27+
static_assert(!is_const<decltype(__builtin_elementwise_add_sat(a, b))>::value);
28+
static_assert(!is_const<decltype(__builtin_elementwise_add_sat(b, a))>::value);
29+
static_assert(!is_const<decltype(__builtin_elementwise_add_sat(a, a))>::value);
30+
}
31+
32+
void test_builtin_elementwise_sub_sat() {
33+
const int a = 2;
34+
int b = 1;
35+
static_assert(!is_const<decltype(__builtin_elementwise_sub_sat(a, b))>::value);
36+
static_assert(!is_const<decltype(__builtin_elementwise_sub_sat(b, a))>::value);
37+
static_assert(!is_const<decltype(__builtin_elementwise_sub_sat(a, a))>::value);
38+
}
39+
2440
void test_builtin_elementwise_max() {
2541
const int a = 2;
2642
int b = 1;

0 commit comments

Comments
 (0)