[Clang] Add elementwise saturated add/sub builtins

RKSimon · RKSimon · commit c00db9715975 · 2022-02-08T11:22:01.000Z
This patch implements `__builtin_elementwise_add_sat` and `__builtin_elementwise_sub_sat` builtins. These map to the add/sub saturated math intrinsics described here: https://llvm.org/docs/LangRef.html#saturation-arithmetic-intrinsics With this in place we should then be able to replace the x86 SSE adds/subs intrinsics with these generic variants - it looks like other targets should be able to use these as well (arm/aarch64/webassembly all have similar examples in cgbuiltin). Differential Revision: https://reviews.llvm.org/D117898
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
@@ -545,6 +545,10 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
                                            magnitude than x
  T __builtin_elementwise_max(T x, T y)     return x or y, whichever is larger                               integer and floating point types
  T __builtin_elementwise_min(T x, T y)     return x or y, whichever is smaller                              integer and floating point types
+ T __builtin_elementwise_add_sat(T x, T y) return the sum of x and y, clamped to the range of               integer types
+                                           representable values for the signed/unsigned integer type.
+ T __builtin_elementwise_sub_sat(T x, T y) return the difference of x and y, clamped to the range of        integer types
+                                           representable values for the signed/unsigned integer type..
 ========================================= ================================================================ =========================================
 
 
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
@@ -653,6 +653,8 @@ BUILTIN(__builtin_elementwise_ceil, "v.", "nct")
 BUILTIN(__builtin_elementwise_floor, "v.", "nct")
 BUILTIN(__builtin_elementwise_roundeven, "v.", "nct")
 BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
+BUILTIN(__builtin_elementwise_add_sat, "v.", "nct")
+BUILTIN(__builtin_elementwise_sub_sat, "v.", "nct")
 BUILTIN(__builtin_reduce_max, "v.", "nct")
 BUILTIN(__builtin_reduce_min, "v.", "nct")
 BUILTIN(__builtin_reduce_xor, "v.", "nct")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3154,6 +3154,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
 
+  case Builtin::BI__builtin_elementwise_add_sat:
+  case Builtin::BI__builtin_elementwise_sub_sat: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    Value *Result;
+    assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
+    QualType Ty = E->getArg(0)->getType();
+    if (auto *VecTy = Ty->getAs<VectorType>())
+      Ty = VecTy->getElementType();
+    bool IsSigned = Ty->isSignedIntegerType();
+    unsigned Opc;
+    if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
+      Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
+    else
+      Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
+    Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
+    return RValue::get(Result);
+  }
+
   case Builtin::BI__builtin_elementwise_max: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     Value *Op1 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
@@ -2238,6 +2238,28 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     break;
   }
 
+  // These builtins restrict the element type to integer
+  // types only.
+  case Builtin::BI__builtin_elementwise_add_sat:
+  case Builtin::BI__builtin_elementwise_sub_sat: {
+    if (SemaBuiltinElementwiseMath(TheCall))
+      return ExprError();
+
+    const Expr *Arg = TheCall->getArg(0);
+    QualType ArgTy = Arg->getType();
+    QualType EltTy = ArgTy;
+
+    if (auto *VecTy = EltTy->getAs<VectorType>())
+      EltTy = VecTy->getElementType();
+
+    if (!EltTy->isIntegerType()) {
+      Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+          << 1 << /* integer ty */ 6 << ArgTy;
+      return ExprError();
+    }
+    break;
+  }
+
   case Builtin::BI__builtin_elementwise_min:
   case Builtin::BI__builtin_elementwise_max:
     if (SemaBuiltinElementwiseMath(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -56,6 +56,104 @@ void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
   si = __builtin_elementwise_abs(si);
 }
 
+void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
+                                      float4 vf1, float4 vf2, long long int i1,
+                                      long long int i2, si8 vi1, si8 vi2,
+                                      unsigned u1, unsigned u2, u4 vu1, u4 vu2,
+                                      _BitInt(31) bi1, _BitInt(31) bi2,
+                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
+  // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
+  // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 [[I2]])
+  i1 = __builtin_elementwise_add_sat(i1, i2);
+
+  // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.sadd.sat.i64(i64 [[I1]], i64 10)
+  i1 = __builtin_elementwise_add_sat(i1, 10);
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
+  // CHECK-NEXT: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
+  vi1 = __builtin_elementwise_add_sat(vi1, vi2);
+
+  // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
+  // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
+  // CHECK-NEXT: call i32 @llvm.uadd.sat.i32(i32 [[U1]], i32 [[U2]])
+  u1 = __builtin_elementwise_add_sat(u1, u2);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
+  // CHECK-NEXT: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
+  vu1 = __builtin_elementwise_add_sat(vu1, vu2);
+
+  // CHECK:      [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
+  // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
+  // CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[BI1]], i31 [[BI2]])
+  bi1 = __builtin_elementwise_add_sat(bi1, bi2);
+
+  // CHECK:      [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
+  // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
+  // CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[BU1]], i55 [[BU2]])
+  bu1 = __builtin_elementwise_add_sat(bu1, bu2);
+
+  // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
+  // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
+  // CHECK-NEXT: call i32 @llvm.sadd.sat.i32(i32 [[IAS1]], i32 [[B]])
+  int_as_one = __builtin_elementwise_add_sat(int_as_one, b);
+
+  // CHECK: call i32 @llvm.sadd.sat.i32(i32 1, i32 97)
+  i1 = __builtin_elementwise_add_sat(1, 'a');
+}
+
+void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
+                                      float4 vf1, float4 vf2, long long int i1,
+                                      long long int i2, si8 vi1, si8 vi2,
+                                      unsigned u1, unsigned u2, u4 vu1, u4 vu2,
+                                      _BitInt(31) bi1, _BitInt(31) bi2,
+                                      unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) {
+  // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
+  // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 [[I2]])
+  i1 = __builtin_elementwise_sub_sat(i1, i2);
+
+  // CHECK:      [[I1:%.+]] = load i64, i64* %i1.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.ssub.sat.i64(i64 [[I1]], i64 10)
+  i1 = __builtin_elementwise_sub_sat(i1, 10);
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16
+  // CHECK-NEXT: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]])
+  vi1 = __builtin_elementwise_sub_sat(vi1, vi2);
+
+  // CHECK:      [[U1:%.+]] = load i32, i32* %u1.addr, align 4
+  // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4
+  // CHECK-NEXT: call i32 @llvm.usub.sat.i32(i32 [[U1]], i32 [[U2]])
+  u1 = __builtin_elementwise_sub_sat(u1, u2);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16
+  // CHECK-NEXT: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
+  vu1 = __builtin_elementwise_sub_sat(vu1, vu2);
+
+  // CHECK:      [[BI1:%.+]] = load i31, i31* %bi1.addr, align 4
+  // CHECK-NEXT: [[BI2:%.+]] = load i31, i31* %bi2.addr, align 4
+  // CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[BI1]], i31 [[BI2]])
+  bi1 = __builtin_elementwise_sub_sat(bi1, bi2);
+
+  // CHECK:      [[BU1:%.+]] = load i55, i55* %bu1.addr, align 8
+  // CHECK-NEXT: [[BU2:%.+]] = load i55, i55* %bu2.addr, align 8
+  // CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[BU1]], i55 [[BU2]])
+  bu1 = __builtin_elementwise_sub_sat(bu1, bu2);
+
+  // CHECK:      [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4
+  // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4
+  // CHECK-NEXT: call i32 @llvm.ssub.sat.i32(i32 [[IAS1]], i32 [[B]])
+  int_as_one = __builtin_elementwise_sub_sat(int_as_one, b);
+
+  // CHECK: call i32 @llvm.ssub.sat.i32(i32 1, i32 97)
+  i1 = __builtin_elementwise_sub_sat(1, 'a');
+}
+
 void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
                                   float4 vf1, float4 vf2, long long int i1,
                                   long long int i2, si8 vi1, si8 vi2,
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
@@ -33,6 +33,122 @@ void test_builtin_elementwise_abs(int i, double d, float4 v, int3 iv, unsigned u
   // expected-error@-1 {{1st argument must be a signed integer or floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
+void test_builtin_elementwise_add_sat(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
+  i = __builtin_elementwise_add_sat(p, d);
+  // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
+
+  struct Foo foo = __builtin_elementwise_add_sat(i, i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_elementwise_add_sat(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+  i = __builtin_elementwise_add_sat();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+  i = __builtin_elementwise_add_sat(i, i, i);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+
+  i = __builtin_elementwise_add_sat(v, iv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
+
+  i = __builtin_elementwise_add_sat(uv, iv);
+  // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
+
+  v = __builtin_elementwise_add_sat(v, v);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+
+  s = __builtin_elementwise_add_sat(i, s);
+
+  enum e { one,
+           two };
+  i = __builtin_elementwise_add_sat(one, two);
+
+  enum f { three };
+  enum f x = __builtin_elementwise_add_sat(one, three);
+
+  _BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
+  ext = __builtin_elementwise_add_sat(ext, ext);
+
+  const int ci;
+  i = __builtin_elementwise_add_sat(ci, i);
+  i = __builtin_elementwise_add_sat(i, ci);
+  i = __builtin_elementwise_add_sat(ci, ci);
+
+  i = __builtin_elementwise_add_sat(i, int_as_one); // ok (attributes don't match)?
+  i = __builtin_elementwise_add_sat(i, b);          // ok (sugar doesn't match)?
+
+  int A[10];
+  A = __builtin_elementwise_add_sat(A, A);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'int *')}}
+
+  int(ii);
+  int j;
+  j = __builtin_elementwise_add_sat(i, j);
+
+  _Complex float c1, c2;
+  c1 = __builtin_elementwise_add_sat(c1, c2);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
+}
+
+void test_builtin_elementwise_sub_sat(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
+  i = __builtin_elementwise_sub_sat(p, d);
+  // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
+
+  struct Foo foo = __builtin_elementwise_sub_sat(i, i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_elementwise_sub_sat(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+  i = __builtin_elementwise_sub_sat();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+  i = __builtin_elementwise_sub_sat(i, i, i);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+
+  i = __builtin_elementwise_sub_sat(v, iv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
+
+  i = __builtin_elementwise_sub_sat(uv, iv);
+  // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}}
+
+  v = __builtin_elementwise_sub_sat(v, v);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+
+  s = __builtin_elementwise_sub_sat(i, s);
+
+  enum e { one,
+           two };
+  i = __builtin_elementwise_sub_sat(one, two);
+
+  enum f { three };
+  enum f x = __builtin_elementwise_sub_sat(one, three);
+
+  _BitInt(32) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}}
+  ext = __builtin_elementwise_sub_sat(ext, ext);
+
+  const int ci;
+  i = __builtin_elementwise_sub_sat(ci, i);
+  i = __builtin_elementwise_sub_sat(i, ci);
+  i = __builtin_elementwise_sub_sat(ci, ci);
+
+  i = __builtin_elementwise_sub_sat(i, int_as_one); // ok (attributes don't match)?
+  i = __builtin_elementwise_sub_sat(i, b);          // ok (sugar doesn't match)?
+
+  int A[10];
+  A = __builtin_elementwise_sub_sat(A, A);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'int *')}}
+
+  int(ii);
+  int j;
+  j = __builtin_elementwise_sub_sat(i, j);
+
+  _Complex float c1, c2;
+  c1 = __builtin_elementwise_sub_sat(c1, c2);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type (was '_Complex float')}}
+}
+
 void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
   i = __builtin_elementwise_max(p, d);
   // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -21,6 +21,22 @@ void test_builtin_elementwise_abs() {
   static_assert(!is_const<decltype(__builtin_elementwise_abs(b))>::value);
 }
 
+void test_builtin_elementwise_add_sat() {
+  const int a = 2;
+  int b = 1;
+  static_assert(!is_const<decltype(__builtin_elementwise_add_sat(a, b))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_add_sat(b, a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_add_sat(a, a))>::value);
+}
+
+void test_builtin_elementwise_sub_sat() {
+  const int a = 2;
+  int b = 1;
+  static_assert(!is_const<decltype(__builtin_elementwise_sub_sat(a, b))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_sub_sat(b, a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_sub_sat(a, a))>::value);
+}
+
 void test_builtin_elementwise_max() {
   const int a = 2;
   int b = 1;