Skip to content

Commit 6705d94

Browse files
committed
Revert "[SDAG] Allow scalable vectors in ComputeKnownBits"
This reverts commit bc0fea0. There was a "timeout for a Halide Hexagon test" reported. Revert until investigation complete.
1 parent 102f05b commit 6705d94

File tree

4 files changed

+66
-64
lines changed

4 files changed

+66
-64
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 16 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2910,10 +2910,14 @@ const APInt *SelectionDAG::getValidMaximumShiftAmountConstant(
29102910
KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
29112911
EVT VT = Op.getValueType();
29122912

2913-
// Since the number of lanes in a scalable vector is unknown at compile time,
2914-
// we track one bit which is implicitly broadcast to all lanes. This means
2915-
// that all lanes in a scalable vector are considered demanded.
2916-
APInt DemandedElts = VT.isFixedLengthVector()
2913+
// TOOD: Until we have a plan for how to represent demanded elements for
2914+
// scalable vectors, we can just bail out for now.
2915+
if (Op.getValueType().isScalableVector()) {
2916+
unsigned BitWidth = Op.getScalarValueSizeInBits();
2917+
return KnownBits(BitWidth);
2918+
}
2919+
2920+
APInt DemandedElts = VT.isVector()
29172921
? APInt::getAllOnes(VT.getVectorNumElements())
29182922
: APInt(1, 1);
29192923
return computeKnownBits(Op, DemandedElts, Depth);
@@ -2928,6 +2932,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
29282932

29292933
KnownBits Known(BitWidth); // Don't know anything.
29302934

2935+
// TOOD: Until we have a plan for how to represent demanded elements for
2936+
// scalable vectors, we can just bail out for now.
2937+
if (Op.getValueType().isScalableVector())
2938+
return Known;
2939+
29312940
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
29322941
// We know all of the bits for a constant!
29332942
return KnownBits::makeConstant(C->getAPIntValue());
@@ -2942,7 +2951,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
29422951

29432952
KnownBits Known2;
29442953
unsigned NumElts = DemandedElts.getBitWidth();
2945-
assert((!Op.getValueType().isFixedLengthVector() ||
2954+
assert((!Op.getValueType().isVector() ||
29462955
NumElts == Op.getValueType().getVectorNumElements()) &&
29472956
"Unexpected vector size");
29482957

@@ -2954,18 +2963,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
29542963
case ISD::MERGE_VALUES:
29552964
return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts,
29562965
Depth + 1);
2957-
case ISD::SPLAT_VECTOR: {
2958-
SDValue SrcOp = Op.getOperand(0);
2959-
Known = computeKnownBits(SrcOp, Depth + 1);
2960-
if (SrcOp.getValueSizeInBits() != BitWidth) {
2961-
assert(SrcOp.getValueSizeInBits() > BitWidth &&
2962-
"Expected SPLAT_VECTOR implicit truncation");
2963-
Known = Known.trunc(BitWidth);
2964-
}
2965-
break;
2966-
}
29672966
case ISD::BUILD_VECTOR:
2968-
assert(!Op.getValueType().isScalableVector());
29692967
// Collect the known bits that are shared by every demanded vector element.
29702968
Known.Zero.setAllBits(); Known.One.setAllBits();
29712969
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
@@ -2991,7 +2989,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
29912989
}
29922990
break;
29932991
case ISD::VECTOR_SHUFFLE: {
2994-
assert(!Op.getValueType().isScalableVector());
29952992
// Collect the known bits that are shared by every vector element referenced
29962993
// by the shuffle.
29972994
APInt DemandedLHS, DemandedRHS;
@@ -3019,8 +3016,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
30193016
break;
30203017
}
30213018
case ISD::CONCAT_VECTORS: {
3022-
if (Op.getValueType().isScalableVector())
3023-
break;
30243019
// Split DemandedElts and test each of the demanded subvectors.
30253020
Known.Zero.setAllBits(); Known.One.setAllBits();
30263021
EVT SubVectorVT = Op.getOperand(0).getValueType();
@@ -3041,8 +3036,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
30413036
break;
30423037
}
30433038
case ISD::INSERT_SUBVECTOR: {
3044-
if (Op.getValueType().isScalableVector())
3045-
break;
30463039
// Demand any elements from the subvector and the remainder from the src its
30473040
// inserted into.
30483041
SDValue Src = Op.getOperand(0);
@@ -3070,7 +3063,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
30703063
// Offset the demanded elts by the subvector index.
30713064
SDValue Src = Op.getOperand(0);
30723065
// Bail until we can represent demanded elements for scalable vectors.
3073-
if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector())
3066+
if (Src.getValueType().isScalableVector())
30743067
break;
30753068
uint64_t Idx = Op.getConstantOperandVal(1);
30763069
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
@@ -3079,8 +3072,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
30793072
break;
30803073
}
30813074
case ISD::SCALAR_TO_VECTOR: {
3082-
if (Op.getValueType().isScalableVector())
3083-
break;
30843075
// We know about scalar_to_vector as much as we know about it source,
30853076
// which becomes the first element of otherwise unknown vector.
30863077
if (DemandedElts != 1)
@@ -3094,9 +3085,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
30943085
break;
30953086
}
30963087
case ISD::BITCAST: {
3097-
if (Op.getValueType().isScalableVector())
3098-
break;
3099-
31003088
SDValue N0 = Op.getOperand(0);
31013089
EVT SubVT = N0.getValueType();
31023090
unsigned SubBitWidth = SubVT.getScalarSizeInBits();
@@ -3418,8 +3406,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
34183406
if (ISD::isNON_EXTLoad(LD) && Cst) {
34193407
// Determine any common known bits from the loaded constant pool value.
34203408
Type *CstTy = Cst->getType();
3421-
if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() &&
3422-
!Op.getValueType().isScalableVector()) {
3409+
if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
34233410
// If its a vector splat, then we can (quickly) reuse the scalar path.
34243411
// NOTE: We assume all elements match and none are UNDEF.
34253412
if (CstTy->isVectorTy()) {
@@ -3493,8 +3480,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
34933480
break;
34943481
}
34953482
case ISD::ZERO_EXTEND_VECTOR_INREG: {
3496-
if (Op.getValueType().isScalableVector())
3497-
break;
34983483
EVT InVT = Op.getOperand(0).getValueType();
34993484
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
35003485
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3507,8 +3492,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
35073492
break;
35083493
}
35093494
case ISD::SIGN_EXTEND_VECTOR_INREG: {
3510-
if (Op.getValueType().isScalableVector())
3511-
break;
35123495
EVT InVT = Op.getOperand(0).getValueType();
35133496
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
35143497
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3525,8 +3508,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
35253508
break;
35263509
}
35273510
case ISD::ANY_EXTEND_VECTOR_INREG: {
3528-
if (Op.getValueType().isScalableVector())
3529-
break;
35303511
EVT InVT = Op.getOperand(0).getValueType();
35313512
APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
35323513
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
@@ -3692,9 +3673,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
36923673
break;
36933674
}
36943675
case ISD::INSERT_VECTOR_ELT: {
3695-
if (Op.getValueType().isScalableVector())
3696-
break;
3697-
36983676
// If we know the element index, split the demand between the
36993677
// source vector and the inserted element, otherwise assume we need
37003678
// the original demanded vector elements and the value.
@@ -3861,11 +3839,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
38613839
case ISD::INTRINSIC_WO_CHAIN:
38623840
case ISD::INTRINSIC_W_CHAIN:
38633841
case ISD::INTRINSIC_VOID:
3864-
// TODO: Probably okay to remove after audit; here to reduce change size
3865-
// in initial enablement patch for scalable vectors
3866-
if (Op.getValueType().isScalableVector())
3867-
break;
3868-
38693842
// Allow the target to implement this method for its nodes.
38703843
TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
38713844
break;

llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ define <vscale x 2 x i64> @index_ii_range() {
5555
define <vscale x 8 x i16> @index_ii_range_combine(i16 %a) {
5656
; CHECK-LABEL: index_ii_range_combine:
5757
; CHECK: // %bb.0:
58-
; CHECK-NEXT: index z0.h, #0, #8
59-
; CHECK-NEXT: orr z0.h, z0.h, #0x2
58+
; CHECK-NEXT: index z0.h, #2, #8
6059
; CHECK-NEXT: ret
6160
%val = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
6261
%val1 = shufflevector <vscale x 8 x i16> %val, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer

llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,7 @@ define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
574574
; CHECK: // %bb.0:
575575
; CHECK-NEXT: index z1.d, #0, #1
576576
; CHECK-NEXT: and z1.d, z1.d, #0x1
577-
; CHECK-NEXT: orr z1.d, z1.d, #0x8
577+
; CHECK-NEXT: add z1.d, z1.d, #8 // =0x8
578578
; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
579579
; CHECK-NEXT: ret
580580
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)

llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,15 @@ define <vscale x 2 x i8> @umulo_nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %
99
; CHECK-NEXT: ptrue p0.d
1010
; CHECK-NEXT: and z1.d, z1.d, #0xff
1111
; CHECK-NEXT: and z0.d, z0.d, #0xff
12-
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
13-
; CHECK-NEXT: lsr z1.d, z0.d, #8
12+
; CHECK-NEXT: movprfx z2, z0
13+
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
14+
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
15+
; CHECK-NEXT: lsr z1.d, z2.d, #8
16+
; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
1417
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
15-
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
18+
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
19+
; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
20+
; CHECK-NEXT: mov z0.d, z2.d
1621
; CHECK-NEXT: ret
1722
%a = call { <vscale x 2 x i8>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y)
1823
%b = extractvalue { <vscale x 2 x i8>, <vscale x 2 x i1> } %a, 0
@@ -29,10 +34,15 @@ define <vscale x 4 x i8> @umulo_nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %
2934
; CHECK-NEXT: ptrue p0.s
3035
; CHECK-NEXT: and z1.s, z1.s, #0xff
3136
; CHECK-NEXT: and z0.s, z0.s, #0xff
32-
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
33-
; CHECK-NEXT: lsr z1.s, z0.s, #8
37+
; CHECK-NEXT: movprfx z2, z0
38+
; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
39+
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
40+
; CHECK-NEXT: lsr z1.s, z2.s, #8
41+
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
3442
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
35-
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
43+
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
44+
; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
45+
; CHECK-NEXT: mov z0.d, z2.d
3646
; CHECK-NEXT: ret
3747
%a = call { <vscale x 4 x i8>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y)
3848
%b = extractvalue { <vscale x 4 x i8>, <vscale x 4 x i1> } %a, 0
@@ -49,10 +59,15 @@ define <vscale x 8 x i8> @umulo_nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %
4959
; CHECK-NEXT: ptrue p0.h
5060
; CHECK-NEXT: and z1.h, z1.h, #0xff
5161
; CHECK-NEXT: and z0.h, z0.h, #0xff
52-
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
53-
; CHECK-NEXT: lsr z1.h, z0.h, #8
62+
; CHECK-NEXT: movprfx z2, z0
63+
; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
64+
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
65+
; CHECK-NEXT: lsr z1.h, z2.h, #8
66+
; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0
5467
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
55-
; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
68+
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
69+
; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0
70+
; CHECK-NEXT: mov z0.d, z2.d
5671
; CHECK-NEXT: ret
5772
%a = call { <vscale x 8 x i8>, <vscale x 8 x i1> } @llvm.umul.with.overflow.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y)
5873
%b = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i1> } %a, 0
@@ -149,10 +164,15 @@ define <vscale x 2 x i16> @umulo_nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i1
149164
; CHECK-NEXT: ptrue p0.d
150165
; CHECK-NEXT: and z1.d, z1.d, #0xffff
151166
; CHECK-NEXT: and z0.d, z0.d, #0xffff
152-
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
153-
; CHECK-NEXT: lsr z1.d, z0.d, #16
167+
; CHECK-NEXT: movprfx z2, z0
168+
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
169+
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
170+
; CHECK-NEXT: lsr z1.d, z2.d, #16
171+
; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
154172
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
155-
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
173+
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
174+
; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
175+
; CHECK-NEXT: mov z0.d, z2.d
156176
; CHECK-NEXT: ret
157177
%a = call { <vscale x 2 x i16>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %y)
158178
%b = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i1> } %a, 0
@@ -169,10 +189,15 @@ define <vscale x 4 x i16> @umulo_nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i1
169189
; CHECK-NEXT: ptrue p0.s
170190
; CHECK-NEXT: and z1.s, z1.s, #0xffff
171191
; CHECK-NEXT: and z0.s, z0.s, #0xffff
172-
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
173-
; CHECK-NEXT: lsr z1.s, z0.s, #16
192+
; CHECK-NEXT: movprfx z2, z0
193+
; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
194+
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
195+
; CHECK-NEXT: lsr z1.s, z2.s, #16
196+
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
174197
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
175-
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
198+
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
199+
; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
200+
; CHECK-NEXT: mov z0.d, z2.d
176201
; CHECK-NEXT: ret
177202
%a = call { <vscale x 4 x i16>, <vscale x 4 x i1> } @llvm.umul.with.overflow.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %y)
178203
%b = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i1> } %a, 0
@@ -269,10 +294,15 @@ define <vscale x 2 x i32> @umulo_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i3
269294
; CHECK-NEXT: ptrue p0.d
270295
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
271296
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
272-
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
273-
; CHECK-NEXT: lsr z1.d, z0.d, #32
297+
; CHECK-NEXT: movprfx z2, z0
298+
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
299+
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
300+
; CHECK-NEXT: lsr z1.d, z2.d, #32
301+
; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
274302
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
275-
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
303+
; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b
304+
; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
305+
; CHECK-NEXT: mov z0.d, z2.d
276306
; CHECK-NEXT: ret
277307
%a = call { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.umul.with.overflow.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
278308
%b = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i1> } %a, 0

0 commit comments

Comments
 (0)