@@ -1361,8 +1361,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
1361
1361
if (Subtarget.isISAFuture()) {
1362
1362
addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1363
1363
addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1364
+ addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1364
1365
setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1365
1366
setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1367
+ setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1368
+ setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1366
1369
} else {
1367
1370
addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1368
1371
}
@@ -11890,15 +11893,19 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11890
11893
SDValue LoadChain = LN->getChain();
11891
11894
SDValue BasePtr = LN->getBasePtr();
11892
11895
EVT VT = Op.getValueType();
11896
+ bool IsV1024i1 = VT == MVT::v1024i1;
11897
+ bool IsV2048i1 = VT == MVT::v2048i1;
11893
11898
11894
- // Type v1024i1 is used for Dense Math dmr registers.
11895
- assert(VT == MVT::v1024i1 && "Unsupported type.");
11899
+ // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
11900
+ // Dense Math dmr pair registers, respectively.
11901
+ assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
11896
11902
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11897
11903
"Dense Math support required.");
11898
11904
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11899
11905
11900
- SmallVector<SDValue, 4> Loads;
11901
- SmallVector<SDValue, 4> LoadChains;
11906
+ SmallVector<SDValue, 8> Loads;
11907
+ SmallVector<SDValue, 8> LoadChains;
11908
+
11902
11909
SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
11903
11910
SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11904
11911
MachineMemOperand *MMO = LN->getMemOperand();
@@ -11934,11 +11941,36 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11934
11941
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
11935
11942
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
11936
11943
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
11944
+
11937
11945
SDValue Value =
11938
11946
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
11939
11947
11940
- SDValue RetOps[] = {Value, TF};
11941
- return DAG.getMergeValues(RetOps, dl);
11948
+ if (IsV1024i1) {
11949
+ return DAG.getMergeValues({Value, TF}, dl);
11950
+ }
11951
+
11952
+ // Handle Loads for V2048i1 which represents a dmr pair.
11953
+ SDValue DmrPValue;
11954
+ SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
11955
+ Loads[4], Loads[5]),
11956
+ 0);
11957
+ SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
11958
+ Loads[6], Loads[7]),
11959
+ 0);
11960
+ const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
11961
+ SDValue Dmr1Value = SDValue(
11962
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
11963
+
11964
+ SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
11965
+ SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
11966
+
11967
+ SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
11968
+ const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
11969
+
11970
+ DmrPValue = SDValue(
11971
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
11972
+
11973
+ return DAG.getMergeValues({DmrPValue, TF}, dl);
11942
11974
}
11943
11975
11944
11976
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
@@ -11949,7 +11981,7 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11949
11981
SDValue BasePtr = LN->getBasePtr();
11950
11982
EVT VT = Op.getValueType();
11951
11983
11952
- if (VT == MVT::v1024i1)
11984
+ if (VT == MVT::v1024i1 || VT == MVT::v2048i1 )
11953
11985
return LowerDMFVectorLoad(Op, DAG);
11954
11986
11955
11987
if (VT != MVT::v256i1 && VT != MVT::v512i1)
@@ -11996,34 +12028,88 @@ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
11996
12028
StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11997
12029
SDValue StoreChain = SN->getChain();
11998
12030
SDValue BasePtr = SN->getBasePtr();
11999
- SmallVector<SDValue, 4 > Values;
12000
- SmallVector<SDValue, 4 > Stores;
12031
+ SmallVector<SDValue, 8 > Values;
12032
+ SmallVector<SDValue, 8 > Stores;
12001
12033
EVT VT = SN->getValue().getValueType();
12034
+ bool IsV1024i1 = VT == MVT::v1024i1;
12035
+ bool IsV2048i1 = VT == MVT::v2048i1;
12002
12036
12003
- // Type v1024i1 is used for Dense Math dmr registers.
12004
- assert(VT == MVT::v1024i1 && "Unsupported type.");
12037
+ // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12038
+ // Dense Math dmr pair registers, respectively.
12039
+ assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12005
12040
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12006
12041
"Dense Math support required.");
12007
12042
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12008
12043
12009
- SDValue Lo(
12010
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12011
- Op.getOperand(1),
12012
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12013
- 0);
12014
- SDValue Hi(
12015
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12016
- Op.getOperand(1),
12017
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12018
- 0);
12019
12044
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12020
- MachineSDNode *ExtNode =
12021
- DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12022
- Values.push_back(SDValue(ExtNode, 0));
12023
- Values.push_back(SDValue(ExtNode, 1));
12024
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12025
- Values.push_back(SDValue(ExtNode, 0));
12026
- Values.push_back(SDValue(ExtNode, 1));
12045
+ if (IsV1024i1) {
12046
+ SDValue Lo(DAG.getMachineNode(
12047
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12048
+ Op.getOperand(1),
12049
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12050
+ 0);
12051
+ SDValue Hi(DAG.getMachineNode(
12052
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12053
+ Op.getOperand(1),
12054
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12055
+ 0);
12056
+ MachineSDNode *ExtNode =
12057
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12058
+ Values.push_back(SDValue(ExtNode, 0));
12059
+ Values.push_back(SDValue(ExtNode, 1));
12060
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12061
+ Values.push_back(SDValue(ExtNode, 0));
12062
+ Values.push_back(SDValue(ExtNode, 1));
12063
+ } else {
12064
+ // This corresponds to v2048i1 which represents a dmr pair.
12065
+ SDValue Dmr0(
12066
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12067
+ Op.getOperand(1),
12068
+ DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12069
+ 0);
12070
+
12071
+ SDValue Dmr1(
12072
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12073
+ Op.getOperand(1),
12074
+ DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12075
+ 0);
12076
+
12077
+ SDValue Dmr0Lo(DAG.getMachineNode(
12078
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12079
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12080
+ 0);
12081
+
12082
+ SDValue Dmr0Hi(DAG.getMachineNode(
12083
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12084
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12085
+ 0);
12086
+
12087
+ SDValue Dmr1Lo(DAG.getMachineNode(
12088
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12089
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12090
+ 0);
12091
+
12092
+ SDValue Dmr1Hi(DAG.getMachineNode(
12093
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12094
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12095
+ 0);
12096
+
12097
+ MachineSDNode *ExtNode =
12098
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12099
+ Values.push_back(SDValue(ExtNode, 0));
12100
+ Values.push_back(SDValue(ExtNode, 1));
12101
+ ExtNode =
12102
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12103
+ Values.push_back(SDValue(ExtNode, 0));
12104
+ Values.push_back(SDValue(ExtNode, 1));
12105
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12106
+ Values.push_back(SDValue(ExtNode, 0));
12107
+ Values.push_back(SDValue(ExtNode, 1));
12108
+ ExtNode =
12109
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12110
+ Values.push_back(SDValue(ExtNode, 0));
12111
+ Values.push_back(SDValue(ExtNode, 1));
12112
+ }
12027
12113
12028
12114
if (Subtarget.isLittleEndian())
12029
12115
std::reverse(Values.begin(), Values.end());
@@ -12062,7 +12148,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12062
12148
SDValue Value2 = SN->getValue();
12063
12149
EVT StoreVT = Value.getValueType();
12064
12150
12065
- if (StoreVT == MVT::v1024i1)
12151
+ if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1 )
12066
12152
return LowerDMFVectorStore(Op, DAG);
12067
12153
12068
12154
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
0 commit comments