Skip to content

Commit 45909ec

Browse files
authored
[PowePC] using MTVSRBMI instruction instead of constant pool in power10+ (#144084)
The instruction MTVSRBMI set 0x00(or 0xFF) to each byte of VSR based on the bits mask. Using the instruction instead of constant pool can reduce the asm code size and instructions in power10.
1 parent e5cd9bd commit 45909ec

File tree

2 files changed

+54
-19
lines changed

2 files changed

+54
-19
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9584,6 +9584,37 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
95849584
return false;
95859585
}
95869586

9587+
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
9588+
unsigned int NumOps = BVN.getNumOperands();
9589+
assert(NumOps > 0 && "Unexpected 0-size build vector");
9590+
9591+
BitMask.clearAllBits();
9592+
EVT VT = BVN.getValueType(0);
9593+
APInt ConstValue(VT.getSizeInBits(), 0);
9594+
9595+
unsigned EltWidth = VT.getScalarSizeInBits();
9596+
9597+
unsigned BitPos = 0;
9598+
for (auto OpVal : BVN.op_values()) {
9599+
auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9600+
9601+
if (!CN)
9602+
return false;
9603+
9604+
ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
9605+
BitPos += EltWidth;
9606+
}
9607+
9608+
for (unsigned J = 0; J < 16; ++J) {
9609+
APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9610+
if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9611+
return false;
9612+
if (ExtractValue == 0xFF)
9613+
BitMask.setBit(J);
9614+
}
9615+
return true;
9616+
}
9617+
95879618
// If this is a case we can't handle, return null and let the default
95889619
// expansion code take care of it. If we CAN select this case, and if it
95899620
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -9595,6 +9626,25 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
95959626
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
95969627
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
95979628

9629+
if (Subtarget.hasP10Vector()) {
9630+
APInt BitMask(32, 0);
9631+
// If the value of the vector is all zeros or all ones,
9632+
// we do not convert it to MTVSRBMI.
9633+
// The xxleqv instruction sets a vector with all ones.
9634+
// The xxlxor instruction sets a vector with all zeros.
9635+
if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) {
9636+
SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9637+
MachineSDNode *MSDNode =
9638+
DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9639+
SDValue SDV = SDValue(MSDNode, 0);
9640+
EVT DVT = BVN->getValueType(0);
9641+
EVT SVT = SDV.getValueType();
9642+
if (SVT != DVT) {
9643+
SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9644+
}
9645+
return SDV;
9646+
}
9647+
}
95989648
// Check if this is a splat of a constant value.
95999649
APInt APSplatBits, APSplatUndef;
96009650
unsigned SplatBitSize;

llvm/test/CodeGen/PowerPC/mtvsrbmi.ll

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,13 @@
1010
; RUN: | FileCheck %s --check-prefix=CHECK
1111

1212
define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
13-
; CHECK: L..CPI0_0:
14-
; CHECK-NEXT: .byte 255 # 0xff
15-
; CHECK-NEXT: .byte 0 # 0x0
16-
; CHECK-NEXT: .byte 0 # 0x0
17-
; CHECK-NEXT: .byte 0 # 0x0
18-
; CHECK-NEXT: .byte 0 # 0x0
19-
; CHECK-NEXT: .byte 0 # 0x0
20-
; CHECK-NEXT: .byte 0 # 0x0
21-
; CHECK-NEXT: .byte 0 # 0x0
22-
; CHECK-NEXT: .byte 0 # 0x0
23-
; CHECK-NEXT: .byte 0 # 0x0
24-
; CHECK-NEXT: .byte 0 # 0x0
25-
; CHECK-NEXT: .byte 0 # 0x0
26-
; CHECK-NEXT: .byte 0 # 0x0
27-
; CHECK-NEXT: .byte 0 # 0x0
28-
; CHECK-NEXT: .byte 0 # 0x0
29-
; CHECK-NEXT: .byte 0 # 0x0
13+
; CHECK-NOT: L..CPI0_0:
14+
; CHECK-NOT: .byte 255 # 0xff
15+
; CHECK-NOT: .byte 0 # 0x0
3016

3117
; CHECK-LABEL: _Z5v00FFv:
3218
; CHECK: # %bb.0: # %entry
33-
; CHECK-NEXT: lwz r3, L..C0(r2) # %const.0
34-
; CHECK-NEXT: lxv vs34, 0(r3)
19+
; CHECK-NEXT: mtvsrbmi v2, 1
3520
; CHECK-NEXT: blr
3621
entry:
3722
ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>

0 commit comments

Comments
 (0)