Skip to content

Commit f9d3278

Browse files
aankit-caaankit-quicjverma-quic
authored
[Hexagon] Add saturating add instructions (#148132)
Generate the saturating add instructions for sadd.sat for scalar and vector instructions Co-authored-by: aankit-quic <aankit@quicinc.com> Co-authored-by: Jyotsna Verma <jverma@quicinc.com>
1 parent aa7bf60 commit f9d3278

File tree

5 files changed

+187
-0
lines changed

5 files changed

+187
-0
lines changed

llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,6 +1762,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
17621762
setOperationAction(ISD::SRL, VT, Custom);
17631763
}
17641764

1765+
setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
1766+
setOperationAction(ISD::SADDSAT, MVT::i64, Legal);
1767+
17651768
// Extending loads from (native) vectors of i8 into (native) vectors of i16
17661769
// are legal.
17671770
setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ HexagonTargetLowering::initializeHVXLowering() {
206206
setOperationAction(ISD::CTLZ, T, Legal);
207207
setOperationAction(ISD::SELECT, T, Legal);
208208
setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
209+
setOperationAction(ISD::UADDSAT, T, Legal);
210+
setOperationAction(ISD::SADDSAT, T, Legal);
209211
if (T != ByteV) {
210212
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
211213
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
@@ -297,6 +299,8 @@ HexagonTargetLowering::initializeHVXLowering() {
297299
setOperationAction(ISD::CTPOP, T, Custom);
298300

299301
setOperationAction(ISD::ADD, T, Legal);
302+
setOperationAction(ISD::UADDSAT, T, Legal);
303+
setOperationAction(ISD::SADDSAT, T, Legal);
300304
setOperationAction(ISD::SUB, T, Legal);
301305
setOperationAction(ISD::MUL, T, Custom);
302306
setOperationAction(ISD::MULHS, T, Custom);

llvm/lib/Target/Hexagon/HexagonPatterns.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,6 +1524,14 @@ def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
15241524
def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>;
15251525
def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>;
15261526

1527+
class OpR_RR_pat_sat<InstHexagon MI, SDNode Op, ValueType ResType,
1528+
PatFrag RxPred>
1529+
: Pat<(ResType (Op RxPred:$Rs, RxPred:$Rt)),
1530+
(MI RxPred:$Rs, RxPred:$Rt)>;
1531+
1532+
def: OpR_RR_pat_sat<A2_addsat, saddsat, i32, I32>;
1533+
def: OpR_RR_pat_sat<A2_addpsat, saddsat, i64, I64>;
1534+
15271535
def: OpR_RR_pat<A2_add, Add, i32, I32>;
15281536
def: OpR_RR_pat<A2_sub, Sub, i32, I32>;
15291537
def: OpR_RR_pat<A2_and, And, i32, I32>;

llvm/lib/Target/Hexagon/HexagonPatternsHVX.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,21 @@ let Predicates = [UseHVX] in {
426426
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
427427
}
428428

429+
let Predicates = [UseHVX] in {
430+
def: OpR_RR_pat_sat<V6_vaddubsat, uaddsat, VecI8, HVI8>;
431+
def: OpR_RR_pat_sat<V6_vadduhsat, uaddsat, VecI16, HVI16>;
432+
def: OpR_RR_pat_sat<V6_vadduwsat, uaddsat, VecI32, HVI32>;
433+
def: OpR_RR_pat_sat<V6_vaddbsat, saddsat, VecI8, HVI8>;
434+
def: OpR_RR_pat_sat<V6_vaddhsat, saddsat, VecI16, HVI16>;
435+
def: OpR_RR_pat_sat<V6_vaddwsat, saddsat, VecI32, HVI32>;
436+
def: OpR_RR_pat_sat<V6_vaddubsat_dv, uaddsat, VecPI8, HWI8>;
437+
def: OpR_RR_pat_sat<V6_vadduhsat_dv, uaddsat, VecPI16, HWI16>;
438+
def: OpR_RR_pat_sat<V6_vadduwsat_dv, uaddsat, VecPI32, HWI32>;
439+
def: OpR_RR_pat_sat<V6_vaddbsat_dv, saddsat, VecPI8, HWI8>;
440+
def: OpR_RR_pat_sat<V6_vaddhsat_dv, saddsat, VecPI16, HWI16>;
441+
def: OpR_RR_pat_sat<V6_vaddwsat_dv, saddsat, VecPI32, HWI32>;
442+
}
443+
429444
// For now, we always deal with vector floating point in SF mode.
430445
class OpR_RR_pat_conv<InstHexagon MI, PatFrag Op, ValueType ResType,
431446
PatFrag RsPred, PatFrag RtPred = RsPred>

llvm/test/CodeGen/Hexagon/addsat.ll

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
; RUN: llc -march=hexagon < %s | FileCheck %s
2+
3+
; Test for saturating add instructions.
4+
5+
; CHECK-LABEL: test1
6+
; CHECK: v{{.*}}.ub = vadd(v{{[0-9]+}}.ub,v{{[0-9]+}}.ub):sat
7+
define <128 x i8> @test1(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
8+
entry:
9+
%wide.load = load <128 x i8>, <128 x i8>* %a0, align 1
10+
%wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1
11+
%add = call <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62)
12+
ret <128 x i8> %add
13+
}
14+
15+
; CHECK-LABEL: test2
16+
; CHECK: v{{.*}}.b = vadd(v{{[0-9]+}}.b,v{{[0-9]+}}.b):sat
17+
define <128 x i8> @test2(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
18+
entry:
19+
%wide.load = load <128 x i8>, <128 x i8>* %a0, align 1
20+
%wide.load62 = load <128 x i8>, <128 x i8>* %a1, align 1
21+
%add = call <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8> %wide.load, <128 x i8> %wide.load62)
22+
ret <128 x i8> %add
23+
}
24+
25+
; CHECK-LABEL: test3
26+
; CHECK: v{{.*}}.uh = vadd(v{{[0-9]+}}.uh,v{{[0-9]+}}.uh):sat
27+
define <64 x i16> @test3(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
28+
entry:
29+
%wide.load = load <64 x i16>, <64 x i16>* %a0, align 1
30+
%wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1
31+
%add = call <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62)
32+
ret <64 x i16> %add
33+
}
34+
35+
; CHECK-LABEL: test4
36+
; CHECK: v{{.*}}.h = vadd(v{{[0-9]+}}.h,v{{[0-9]+}}.h):sat
37+
define <64 x i16> @test4(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
38+
entry:
39+
%wide.load = load <64 x i16>, <64 x i16>* %a0, align 1
40+
%wide.load62 = load <64 x i16>, <64 x i16>* %a1, align 1
41+
%add = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> %wide.load, <64 x i16> %wide.load62)
42+
ret <64 x i16> %add
43+
}
44+
45+
; CHECK-LABEL: test5
46+
; CHECK: v{{.*}}.uw = vadd(v{{[0-9]+}}.uw,v{{[0-9]+}}.uw):sat
47+
define <32 x i32> @test5(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
48+
entry:
49+
%wide.load = load <32 x i32>, <32 x i32>* %a0, align 1
50+
%wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1
51+
%add = call <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62)
52+
ret <32 x i32> %add
53+
}
54+
55+
; CHECK-LABEL: test6
56+
; CHECK: v{{.*}}.w = vadd(v{{[0-9]+}}.w,v{{[0-9]+}}.w):sat
57+
define <32 x i32> @test6(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
58+
entry:
59+
%wide.load = load <32 x i32>, <32 x i32>* %a0, align 1
60+
%wide.load62 = load <32 x i32>, <32 x i32>* %a1, align 1
61+
%add = call <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32> %wide.load, <32 x i32> %wide.load62)
62+
ret <32 x i32> %add
63+
}
64+
65+
; CHECK-LABEL: test7
66+
; CHECK: v{{[0-9]+}}:{{[0-9]+}}.ub = vadd(v{{[0-9]+}}:{{[0-9]+}}.ub,v{{[0-9]+}}:{{[0-9]+}}.ub):sat
67+
define <256 x i8> @test7(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
68+
entry:
69+
%wide.load = load <256 x i8>, <256 x i8>* %a0, align 1
70+
%wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1
71+
%add = call <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62)
72+
ret <256 x i8> %add
73+
}
74+
75+
; CHECK-LABEL: test8
76+
; CHECK: v{{[0-9]+}}:{{[0-9]+}}.b = vadd(v{{[0-9]+}}:{{[0-9]+}}.b,v{{[0-9]+}}:{{[0-9]+}}.b):sat
77+
define <256 x i8> @test8(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
78+
entry:
79+
%wide.load = load <256 x i8>, <256 x i8>* %a0, align 1
80+
%wide.load62 = load <256 x i8>, <256 x i8>* %a1, align 1
81+
%add = call <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8> %wide.load, <256 x i8> %wide.load62)
82+
ret <256 x i8> %add
83+
}
84+
85+
; CHECK-LABEL: test9
86+
; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uh = vadd(v{{[0-9]+}}:{{[0-9]+}}.uh,v{{[0-9]+}}:{{[0-9]+}}.uh):sat
87+
define <128 x i16> @test9(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
88+
entry:
89+
%wide.load = load <128 x i16>, <128 x i16>* %a0, align 1
90+
%wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1
91+
%add = call <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62)
92+
ret <128 x i16> %add
93+
}
94+
95+
; CHECK-LABEL: test10
96+
; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h = vadd(v{{[0-9]+}}:{{[0-9]+}}.h,v{{[0-9]+}}:{{[0-9]+}}.h):sat
97+
define <128 x i16> @test10(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
98+
entry:
99+
%wide.load = load <128 x i16>, <128 x i16>* %a0, align 1
100+
%wide.load62 = load <128 x i16>, <128 x i16>* %a1, align 1
101+
%add = call <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16> %wide.load, <128 x i16> %wide.load62)
102+
ret <128 x i16> %add
103+
}
104+
105+
; CHECK-LABEL: test11
106+
; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uw = vadd(v{{[0-9]+}}:{{[0-9]+}}.uw,v{{[0-9]+}}:{{[0-9]+}}.uw):sat
107+
define <64 x i32> @test11(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
108+
entry:
109+
%wide.load = load <64 x i32>, <64 x i32>* %a0, align 1
110+
%wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1
111+
%add = call <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62)
112+
ret <64 x i32> %add
113+
}
114+
115+
; CHECK-LABEL: test12
116+
; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w = vadd(v{{[0-9]+}}:{{[0-9]+}}.w,v{{[0-9]+}}:{{[0-9]+}}.w):sat
117+
define <64 x i32> @test12(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
118+
entry:
119+
%wide.load = load <64 x i32>, <64 x i32>* %a0, align 1
120+
%wide.load62 = load <64 x i32>, <64 x i32>* %a1, align 1
121+
%add = call <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32> %wide.load, <64 x i32> %wide.load62)
122+
ret <64 x i32> %add
123+
}
124+
125+
; CHECK-LABEL: test13
126+
; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},r{{[0-9]+}}):sat
127+
define i32 @test13(i32 %a0, i32 %a1) #0 {
128+
entry:
129+
%add = call i32 @llvm.sadd.sat.i32(i32 %a0, i32 %a1)
130+
ret i32 %add
131+
}
132+
133+
; CHECK-LABEL: test14
134+
; CHECK: r{{[0-9]+}}:{{[0-9]+}} = add(r{{[0-9]+}}:{{[0-9]+}},r{{[0-9]+}}:{{[0-9]+}}):sat
135+
define i64 @test14(i64 %a0, i64 %a1) #0 {
136+
entry:
137+
%add = call i64 @llvm.sadd.sat.i64(i64 %a0, i64 %a1)
138+
ret i64 %add
139+
}
140+
141+
declare <128 x i8> @llvm.uadd.sat.v128i8(<128 x i8>, <128 x i8>) #1
142+
declare <128 x i8> @llvm.sadd.sat.v128i8(<128 x i8>, <128 x i8>) #1
143+
declare <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16>, <64 x i16>) #1
144+
declare <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16>, <64 x i16>) #1
145+
declare <32 x i32> @llvm.uadd.sat.v32i32(<32 x i32>, <32 x i32>) #1
146+
declare <32 x i32> @llvm.sadd.sat.v32i32(<32 x i32>, <32 x i32>) #1
147+
declare <256 x i8> @llvm.uadd.sat.v256i8(<256 x i8>, <256 x i8>) #1
148+
declare <256 x i8> @llvm.sadd.sat.v256i8(<256 x i8>, <256 x i8>) #1
149+
declare <128 x i16> @llvm.uadd.sat.v128i16(<128 x i16>, <128 x i16>) #1
150+
declare <128 x i16> @llvm.sadd.sat.v128i16(<128 x i16>, <128 x i16>) #1
151+
declare <64 x i32> @llvm.uadd.sat.v64i32(<64 x i32>, <64 x i32>) #1
152+
declare <64 x i32> @llvm.sadd.sat.v64i32(<64 x i32>, <64 x i32>) #1
153+
declare i32 @llvm.sadd.sat.i32(i32, i32)
154+
declare i64 @llvm.sadd.sat.i64(i64, i64)
155+
156+
attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" }
157+
attributes #1 = { nounwind readnone speculatable willreturn }

0 commit comments

Comments
 (0)