[Hexagon] Add saturating add instructions (#148132)

aankit-ca · aankit-quic · jverma-quic · web-flow · commit f9d3278901cf · 2025-07-11T15:00:05.000-07:00
Generate the saturating add instructions for sadd.sat for scalar and
vector instructions

Co-authored-by: aankit-quic &lt;aankit@quicinc.com&gt;
Co-authored-by: Jyotsna Verma &lt;jverma@quicinc.com&gt;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1762,6 +1762,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SRL, VT, Custom);
   }
 
+  setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
+  setOperationAction(ISD::SADDSAT, MVT::i64, Legal);
+
   // Extending loads from (native) vectors of i8 into (native) vectors of i16
   // are legal.
   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -206,6 +206,8 @@ HexagonTargetLowering::initializeHVXLowering() {
     setOperationAction(ISD::CTLZ,           T, Legal);
     setOperationAction(ISD::SELECT,         T, Legal);
     setOperationAction(ISD::SPLAT_VECTOR,   T, Legal);
+    setOperationAction(ISD::UADDSAT, T, Legal);
+    setOperationAction(ISD::SADDSAT, T, Legal);
     if (T != ByteV) {
       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
@@ -297,6 +299,8 @@ HexagonTargetLowering::initializeHVXLowering() {
     setOperationAction(ISD::CTPOP,    T, Custom);
 
     setOperationAction(ISD::ADD,      T, Legal);
+    setOperationAction(ISD::UADDSAT, T, Legal);
+    setOperationAction(ISD::SADDSAT, T, Legal);
     setOperationAction(ISD::SUB,      T, Legal);
     setOperationAction(ISD::MUL,      T, Custom);
     setOperationAction(ISD::MULHS,    T, Custom);
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -1524,6 +1524,14 @@ def: Pat<(or  I32:$Rs, anyimm:$s10),   (A2_orir   I32:$Rs,  imm:$s10)>;
 def: Pat<(and I32:$Rs, anyimm:$s10),   (A2_andir  I32:$Rs,  imm:$s10)>;
 def: Pat<(sub anyimm:$s10, I32:$Rs),   (A2_subri  imm:$s10, I32:$Rs)>;
 
+class OpR_RR_pat_sat<InstHexagon MI, SDNode Op, ValueType ResType,
+                     PatFrag RxPred>
+  : Pat<(ResType (Op RxPred:$Rs, RxPred:$Rt)),
+        (MI RxPred:$Rs, RxPred:$Rt)>;
+
+def: OpR_RR_pat_sat<A2_addsat,  saddsat, i32, I32>;
+def: OpR_RR_pat_sat<A2_addpsat, saddsat, i64, I64>;
+
 def: OpR_RR_pat<A2_add,       Add,        i32,   I32>;
 def: OpR_RR_pat<A2_sub,       Sub,        i32,   I32>;
 def: OpR_RR_pat<A2_and,       And,        i32,   I32>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -426,6 +426,21 @@ let Predicates = [UseHVX] in {
            (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
 }
 
+let Predicates = [UseHVX] in {
+  def: OpR_RR_pat_sat<V6_vaddubsat,    uaddsat, VecI8,   HVI8>;
+  def: OpR_RR_pat_sat<V6_vadduhsat,    uaddsat, VecI16,  HVI16>;
+  def: OpR_RR_pat_sat<V6_vadduwsat,    uaddsat, VecI32,  HVI32>;
+  def: OpR_RR_pat_sat<V6_vaddbsat,     saddsat, VecI8,   HVI8>;
+  def: OpR_RR_pat_sat<V6_vaddhsat,     saddsat, VecI16,  HVI16>;
+  def: OpR_RR_pat_sat<V6_vaddwsat,     saddsat, VecI32,  HVI32>;
+  def: OpR_RR_pat_sat<V6_vaddubsat_dv, uaddsat, VecPI8,  HWI8>;
+  def: OpR_RR_pat_sat<V6_vadduhsat_dv, uaddsat, VecPI16, HWI16>;
+  def: OpR_RR_pat_sat<V6_vadduwsat_dv, uaddsat, VecPI32, HWI32>;
+  def: OpR_RR_pat_sat<V6_vaddbsat_dv,  saddsat, VecPI8,  HWI8>;
+  def: OpR_RR_pat_sat<V6_vaddhsat_dv,  saddsat, VecPI16, HWI16>;
+  def: OpR_RR_pat_sat<V6_vaddwsat_dv,  saddsat, VecPI32, HWI32>;
+}
+
 // For now, we always deal with vector floating point in SF mode.
 class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType,
                       PatFrag RsPred, PatFrag RtPred = RsPred>
diff --git a/llvm/test/CodeGen/Hexagon/addsat.ll b/llvm/test/CodeGen/Hexagon/addsat.ll
@@ -0,0 +1,157 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test for saturating add instructions.
+
+; CHECK-LABEL: test1
+; CHECK: v{{.*}}.ub = vadd(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub):sat
+define <128 x i8> @test1(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
+entry:
+  %wide.load = load <128 x i8>, <128 x i8>* %a0, align 1
+  %wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1
+  %add = call <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62)
+  ret <128 x i8> %add
+}
+
+; CHECK-LABEL: test2
+; CHECK: v{{.*}}.b = vadd(v{{[0-9]+}}.b,v{{[0-9]+}}.b):sat
+define <128 x i8> @test2(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
+entry:
+  %wide.load = load <128 x i8>, <128 x i8>* %a0, align 1
+  %wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1
+  %add = call <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62)
+  ret <128 x i8> %add
+}
+
+; CHECK-LABEL: test3
+; CHECK: v{{.*}}.uh = vadd(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh):sat
+define <64 x i16> @test3(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
+entry:
+  %wide.load = load <64 x i16>, <64 x i16>* %a0, align 1
+  %wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1
+  %add = call <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62)
+  ret <64 x i16> %add
+}
+
+; CHECK-LABEL: test4
+; CHECK: v{{.*}}.h = vadd(v{{[0-9]+}}.h,v{{[0-9]+}}.h):sat
+define <64 x i16> @test4(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
+entry:
+  %wide.load = load <64 x i16>, <64 x i16>* %a0, align 1
+  %wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1
+  %add = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62)
+  ret <64 x i16> %add
+}
+
+; CHECK-LABEL: test5
+; CHECK: v{{.*}}.uw = vadd(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw):sat
+define <32 x i32> @test5(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
+entry:
+  %wide.load = load <32 x i32>, <32 x i32>* %a0, align 1
+  %wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1
+  %add = call <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62)
+  ret <32 x i32> %add
+}
+
+; CHECK-LABEL: test6
+; CHECK: v{{.*}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w):sat
+define <32 x i32> @test6(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
+entry:
+  %wide.load = load <32 x i32>, <32 x i32>* %a0, align 1
+  %wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1
+  %add = call <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62)
+  ret <32 x i32> %add
+}
+
+; CHECK-LABEL: test7
+; CHECK: v{{[0-9]+}}:{{[0-9]+}}.ub = vadd(v{{[0-9]+}}:{{[0-9]+}}.ub,v{{[0-9]+}}:{{[0-9]+}}.ub):sat
+define <256 x i8> @test7(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
+entry:
+  %wide.load = load <256 x i8>, <256 x i8>* %a0, align 1
+  %wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1
+  %add = call <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62)
+  ret <256 x i8> %add
+}
+
+; CHECK-LABEL: test8
+; CHECK: v{{[0-9]+}}:{{[0-9]+}}.b = vadd(v{{[0-9]+}}:{{[0-9]+}}.b,v{{[0-9]+}}:{{[0-9]+}}.b):sat
+define <256 x i8> @test8(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
+entry:
+  %wide.load = load <256 x i8>, <256 x i8>* %a0, align 1
+  %wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1
+  %add = call <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62)
+  ret <256 x i8> %add
+}
+
+; CHECK-LABEL: test9
+; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uh = vadd(v{{[0-9]+}}:{{[0-9]+}}.uh,v{{[0-9]+}}:{{[0-9]+}}.uh):sat
+define <128 x i16> @test9(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
+entry:
+  %wide.load = load <128 x i16>, <128 x i16>* %a0, align 1
+  %wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1
+  %add = call <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62)
+  ret <128 x i16> %add
+}
+
+; CHECK-LABEL: test10
+; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h = vadd(v{{[0-9]+}}:{{[0-9]+}}.h,v{{[0-9]+}}:{{[0-9]+}}.h):sat
+define <128 x i16> @test10(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
+entry:
+  %wide.load = load <128 x i16>, <128 x i16>* %a0, align 1
+  %wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1
+  %add = call <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62)
+  ret <128 x i16> %add
+}
+
+; CHECK-LABEL: test11
+; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uw = vadd(v{{[0-9]+}}:{{[0-9]+}}.uw,v{{[0-9]+}}:{{[0-9]+}}.uw):sat
+define <64 x i32> @test11(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
+entry:
+  %wide.load = load <64 x i32>, <64 x i32>* %a0, align 1
+  %wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1
+  %add = call <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62)
+  ret <64 x i32> %add
+}
+
+; CHECK-LABEL: test12
+; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w = vadd(v{{[0-9]+}}:{{[0-9]+}}.w,v{{[0-9]+}}:{{[0-9]+}}.w):sat
+define <64 x i32> @test12(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
+entry:
+  %wide.load = load <64 x i32>, <64 x i32>* %a0, align 1
+  %wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1
+  %add = call <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62)
+  ret <64 x i32> %add
+}
+
+; CHECK-LABEL: test13
+; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},r{{[0-9]+}}):sat
+define i32 @test13(i32 %a0, i32 %a1) #0 {
+entry:
+  %add = call i32 @llvm.sadd.sat.i32(i32 %a0, i32 %a1)
+  ret i32 %add
+}
+
+; CHECK-LABEL: test14
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = add(r{{[0-9]+}}:{{[0-9]+}},r{{[0-9]+}}:{{[0-9]+}}):sat
+define i64 @test14(i64 %a0, i64 %a1) #0 {
+entry:
+  %add = call i64 @llvm.sadd.sat.i64(i64 %a0, i64 %a1)
+  ret i64 %add
+}
+
+declare <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8>, <128 x i8>) #1
+declare <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8>, <128 x i8>) #1
+declare <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16>, <64 x i16>) #1
+declare <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16>, <64 x i16>) #1
+declare <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32>, <32 x i32>) #1
+declare <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32>, <32 x i32>) #1
+declare <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8>, <256 x i8>) #1
+declare <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8>, <256 x i8>) #1
+declare <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16>, <128 x i16>) #1
+declare <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16>, <128 x i16>) #1
+declare <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32>, <64 x i32>) #1
+declare <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32>, <64 x i32>) #1
+declare i32 @llvm.sadd.sat.i32(i32, i32)
+declare i64 @llvm.sadd.sat.i64(i64, i64)
+
+attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
+attributes #1 = { nounwind readnone speculatable willreturn }

Original file line number	Diff line number	Diff line change
`@@ -1762,6 +1762,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,`
`1762`	`1762`	`setOperationAction(ISD::SRL, VT, Custom);`
`1763`	`1763`	`}`
`1764`	`1764`
	`1765`	`+ setOperationAction(ISD::SADDSAT, MVT::i32, Legal);`
	`1766`	`+ setOperationAction(ISD::SADDSAT, MVT::i64, Legal);`
	`1767`	`+`
`1765`	`1768`	`// Extending loads from (native) vectors of i8 into (native) vectors of i16`
`1766`	`1769`	`// are legal.`
`1767`	`1770`	`setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);`