Skip to content

Commit 4155cc0

Browse files
committed
[SystemZ] Recognize carry/borrow computation
Generate code using the VECTOR ADD COMPUTE CARRY and VECTOR SUBTRACT COMPUTE BORROW INDICATION instructions to implement open-coded IR with those semantics. Handles integer vector types as well as i128. Fixes: llvm#129608
1 parent 4a4987b commit 4155cc0

File tree

5 files changed

+402
-0
lines changed

5 files changed

+402
-0
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7343,6 +7343,37 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND(
73437343
}
73447344
}
73457345
}
7346+
// Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7347+
// and VECTOR ADD COMPUTE CARRY for i128:
7348+
// (zext (setcc_uge X Y)) --> (VSCBI X Y)
7349+
// (zext (setcc_ule Y X)) --> (VSCBI X Y)
7350+
// (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7351+
// (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7352+
// For vector types, these patterns are recognized in the .td file.
7353+
if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7354+
N0.getOperand(0).getValueType() == VT) {
7355+
SDValue Op0 = N0.getOperand(0);
7356+
SDValue Op1 = N0.getOperand(1);
7357+
const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7358+
switch (CC) {
7359+
case ISD::SETULE:
7360+
std::swap(Op0, Op1);
7361+
[[fallthrough]];
7362+
case ISD::SETUGE:
7363+
return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7364+
case ISD::SETUGT:
7365+
std::swap(Op0, Op1);
7366+
[[fallthrough]];
7367+
case ISD::SETULT:
7368+
if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7369+
(Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7370+
return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7371+
Op0->getOperand(1));
7372+
break;
7373+
default:
7374+
break;
7375+
}
7376+
}
73467377

73477378
return SDValue();
73487379
}

llvm/lib/Target/SystemZ/SystemZInstrVector.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,23 @@ let Predicates = [FeatureVectorEnhancements3] in {
12871287
(VMNLQ VR128:$x, VR128:$y)>;
12881288
}
12891289

1290+
// Instantiate comparison patterns to recognize VACC/VSCBI for TYPE.
1291+
multiclass IntegerComputeCarryOrBorrow<ValueType type,
1292+
Instruction vacc, Instruction vscbi> {
1293+
let Predicates = [FeatureVector] in {
1294+
def : Pat<(z_vzext1 (type (z_vicmphl VR128:$x, (add VR128:$x, VR128:$y)))),
1295+
(vacc VR128:$x, VR128:$y)>;
1296+
def : Pat<(z_vzext1 (type (z_vicmphl VR128:$y, (add VR128:$x, VR128:$y)))),
1297+
(vacc VR128:$x, VR128:$y)>;
1298+
def : Pat<(z_vzext1 (z_vnot (type (z_vicmphl VR128:$y, VR128:$x)))),
1299+
(vscbi VR128:$x, VR128:$y)>;
1300+
}
1301+
}
1302+
defm : IntegerComputeCarryOrBorrow<v16i8, VACCB, VSCBIB>;
1303+
defm : IntegerComputeCarryOrBorrow<v8i16, VACCH, VSCBIH>;
1304+
defm : IntegerComputeCarryOrBorrow<v4i32, VACCF, VSCBIF>;
1305+
defm : IntegerComputeCarryOrBorrow<v2i64, VACCG, VSCBIG>;
1306+
12901307
// Instantiate full-vector shifts.
12911308
multiclass FullVectorShiftOps<SDPatternOperator shift,
12921309
Instruction sbit, Instruction sbyte> {

llvm/lib/Target/SystemZ/SystemZOperators.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,13 @@ def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>;
10581058
// Bitwise negation on vectors.
10591059
def z_vnot : PatFrag<(ops node:$x), (xor node:$x, immAllOnesV)>;
10601060

1061+
// In-register element-wise zero extension from i1 on vectors.
1062+
def vsplat_imm_eq_1 : PatFrag<(ops), (build_vector), [{
1063+
APInt Imm;
1064+
return ISD::isConstantSplatVector(N, Imm) && Imm == 1;
1065+
}]>;
1066+
def z_vzext1 : PatFrag<(ops node:$x), (and node:$x, vsplat_imm_eq_1)>;
1067+
10611068
// Signed "integer greater than zero" on vectors.
10621069
def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;
10631070

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; Test usage of VACC/VSCBI.
3+
;
4+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
5+
6+
define i128 @i128_subc_1(i128 %a, i128 %b) unnamed_addr {
7+
; CHECK-LABEL: i128_subc_1:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: vl %v0, 0(%r4), 3
10+
; CHECK-NEXT: vl %v1, 0(%r3), 3
11+
; CHECK-NEXT: vscbiq %v0, %v1, %v0
12+
; CHECK-NEXT: vst %v0, 0(%r2), 3
13+
; CHECK-NEXT: br %r14
14+
%cmp = icmp uge i128 %a, %b
15+
%ext = zext i1 %cmp to i128
16+
ret i128 %ext
17+
}
18+
19+
define i128 @i128_subc_2(i128 %a, i128 %b) unnamed_addr {
20+
; CHECK-LABEL: i128_subc_2:
21+
; CHECK: # %bb.0:
22+
; CHECK-NEXT: vl %v0, 0(%r3), 3
23+
; CHECK-NEXT: vl %v1, 0(%r4), 3
24+
; CHECK-NEXT: vscbiq %v0, %v1, %v0
25+
; CHECK-NEXT: vst %v0, 0(%r2), 3
26+
; CHECK-NEXT: br %r14
27+
%cmp = icmp ule i128 %a, %b
28+
%ext = zext i1 %cmp to i128
29+
ret i128 %ext
30+
}
31+
32+
define i128 @i128_addc_1(i128 %a, i128 %b) {
33+
; CHECK-LABEL: i128_addc_1:
34+
; CHECK: # %bb.0:
35+
; CHECK-NEXT: vl %v0, 0(%r4), 3
36+
; CHECK-NEXT: vl %v1, 0(%r3), 3
37+
; CHECK-NEXT: vaccq %v0, %v1, %v0
38+
; CHECK-NEXT: vst %v0, 0(%r2), 3
39+
; CHECK-NEXT: br %r14
40+
%sum = add i128 %a, %b
41+
%cmp = icmp ult i128 %sum, %a
42+
%ext = zext i1 %cmp to i128
43+
ret i128 %ext
44+
}
45+
46+
define i128 @i128_addc_2(i128 %a, i128 %b) {
47+
; CHECK-LABEL: i128_addc_2:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: vl %v0, 0(%r4), 3
50+
; CHECK-NEXT: vl %v1, 0(%r3), 3
51+
; CHECK-NEXT: vaccq %v0, %v1, %v0
52+
; CHECK-NEXT: vst %v0, 0(%r2), 3
53+
; CHECK-NEXT: br %r14
54+
%sum = add i128 %a, %b
55+
%cmp = icmp ult i128 %sum, %b
56+
%ext = zext i1 %cmp to i128
57+
ret i128 %ext
58+
}
59+
60+
define i128 @i128_addc_3(i128 %a, i128 %b) {
61+
; CHECK-LABEL: i128_addc_3:
62+
; CHECK: # %bb.0:
63+
; CHECK-NEXT: vl %v0, 0(%r4), 3
64+
; CHECK-NEXT: vl %v1, 0(%r3), 3
65+
; CHECK-NEXT: vaccq %v0, %v1, %v0
66+
; CHECK-NEXT: vst %v0, 0(%r2), 3
67+
; CHECK-NEXT: br %r14
68+
%sum = add i128 %a, %b
69+
%cmp = icmp ugt i128 %a, %sum
70+
%ext = zext i1 %cmp to i128
71+
ret i128 %ext
72+
}
73+
74+
define i128 @i128_addc_4(i128 %a, i128 %b) {
75+
; CHECK-LABEL: i128_addc_4:
76+
; CHECK: # %bb.0:
77+
; CHECK-NEXT: vl %v0, 0(%r4), 3
78+
; CHECK-NEXT: vl %v1, 0(%r3), 3
79+
; CHECK-NEXT: vaccq %v0, %v1, %v0
80+
; CHECK-NEXT: vst %v0, 0(%r2), 3
81+
; CHECK-NEXT: br %r14
82+
%sum = add i128 %a, %b
83+
%cmp = icmp ugt i128 %b, %sum
84+
%ext = zext i1 %cmp to i128
85+
ret i128 %ext
86+
}
87+

0 commit comments

Comments
 (0)