@@ -4619,26 +4619,109 @@ PerformFADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
4619
4619
return SDValue ();
4620
4620
}
4621
4621
4622
+ // If {Lo, Hi} = <packed f32x2 val>, returns that value
4623
+ static SDValue peekThroughF32x2Copy (const SDValue &Lo, const SDValue &Hi) {
4624
+ if (Lo.getValueType () != MVT::f32 || Lo.getOpcode () != ISD::CopyFromReg ||
4625
+ Lo.getNode () != Hi.getNode () || Lo == Hi)
4626
+ return SDValue ();
4627
+
4628
+ SDNode *CopyF = Lo.getNode ();
4629
+ SDNode *CopyT = CopyF->getOperand (0 ).getNode ();
4630
+ if (CopyT->getOpcode () != ISD::CopyToReg)
4631
+ return SDValue ();
4632
+
4633
+ // check the two registers are the same
4634
+ if (cast<RegisterSDNode>(CopyF->getOperand (1 ))->getReg () !=
4635
+ cast<RegisterSDNode>(CopyT->getOperand (1 ))->getReg ())
4636
+ return SDValue ();
4637
+
4638
+ SDValue OrigV = CopyT->getOperand (2 );
4639
+ if (OrigV.getValueType () != MVT::i64 )
4640
+ return SDValue ();
4641
+ return OrigV;
4642
+ }
4643
+
4644
+ static SDValue
4645
+ PerformPackedF32StoreCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
4646
+ CodeGenOptLevel OptLevel) {
4647
+ if (OptLevel == CodeGenOptLevel::None)
4648
+ return SDValue ();
4649
+
4650
+ // rewrite stores of packed f32 values
4651
+ auto *MemN = cast<MemSDNode>(N);
4652
+ if (MemN->getMemoryVT () == MVT::f32 ) {
4653
+ std::optional<NVPTXISD::NodeType> NewOpcode;
4654
+ switch (MemN->getOpcode ()) {
4655
+ case NVPTXISD::StoreRetvalV2:
4656
+ NewOpcode = NVPTXISD::StoreRetval;
4657
+ break ;
4658
+ case NVPTXISD::StoreRetvalV4:
4659
+ NewOpcode = NVPTXISD::StoreRetvalV2;
4660
+ break ;
4661
+ case NVPTXISD::StoreParamV2:
4662
+ NewOpcode = NVPTXISD::StoreParam;
4663
+ break ;
4664
+ case NVPTXISD::StoreParamV4:
4665
+ NewOpcode = NVPTXISD::StoreParamV2;
4666
+ break ;
4667
+ }
4668
+
4669
+ if (NewOpcode) {
4670
+ SmallVector<SDValue> NewOps = {N->getOperand (0 ), N->getOperand (1 )};
4671
+ unsigned NumPacked = 0 ;
4672
+
4673
+ // gather all packed operands
4674
+ for (unsigned I = 2 , E = MemN->getNumOperands (); I < E; I += 2 ) {
4675
+ if (SDValue Packed = peekThroughF32x2Copy (MemN->getOperand (I),
4676
+ MemN->getOperand (I + 1 ))) {
4677
+ NewOps.push_back (Packed);
4678
+ ++NumPacked;
4679
+ } else {
4680
+ NumPacked = 0 ;
4681
+ break ;
4682
+ }
4683
+ }
4684
+
4685
+ if (NumPacked) {
4686
+ return DCI.DAG .getMemIntrinsicNode (
4687
+ *NewOpcode, SDLoc (N), N->getVTList (), NewOps, MVT::i64 ,
4688
+ MemN->getPointerInfo (), MemN->getAlign (),
4689
+ MachineMemOperand::MOStore);
4690
+ }
4691
+ }
4692
+ }
4693
+ return SDValue ();
4694
+ }
4695
+
4622
4696
static SDValue PerformStoreCombineHelper (SDNode *N, std::size_t Front,
4623
- std::size_t Back) {
4697
+ std::size_t Back,
4698
+ TargetLowering::DAGCombinerInfo &DCI,
4699
+ CodeGenOptLevel OptLevel) {
4624
4700
if (all_of (N->ops ().drop_front (Front).drop_back (Back),
4625
4701
[](const SDUse &U) { return U.get ()->isUndef (); }))
4626
4702
// Operand 0 is the previous value in the chain. Cannot return EntryToken
4627
4703
// as the previous value will become unused and eliminated later.
4628
4704
return N->getOperand (0 );
4629
4705
4706
+ if (SDValue V = PerformPackedF32StoreCombine (N, DCI, OptLevel))
4707
+ return V;
4708
+
4630
4709
return SDValue ();
4631
4710
}
4632
4711
4633
- static SDValue PerformStoreParamCombine (SDNode *N) {
4712
+ static SDValue PerformStoreParamCombine (SDNode *N,
4713
+ TargetLowering::DAGCombinerInfo &DCI,
4714
+ CodeGenOptLevel OptLevel) {
4634
4715
// Operands from the 3rd to the 2nd last one are the values to be stored.
4635
4716
// {Chain, ArgID, Offset, Val, Glue}
4636
- return PerformStoreCombineHelper (N, 3 , 1 );
4717
+ return PerformStoreCombineHelper (N, 3 , 1 , DCI, OptLevel );
4637
4718
}
4638
4719
4639
- static SDValue PerformStoreRetvalCombine (SDNode *N) {
4720
+ static SDValue PerformStoreRetvalCombine (SDNode *N,
4721
+ TargetLowering::DAGCombinerInfo &DCI,
4722
+ CodeGenOptLevel OptLevel) {
4640
4723
// Operands from the 2nd to the last one are the values to be stored
4641
- return PerformStoreCombineHelper (N, 2 , 0 );
4724
+ return PerformStoreCombineHelper (N, 2 , 0 , DCI, OptLevel );
4642
4725
}
4643
4726
4644
4727
// / PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
@@ -5329,11 +5412,11 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
5329
5412
case NVPTXISD::StoreRetval:
5330
5413
case NVPTXISD::StoreRetvalV2:
5331
5414
case NVPTXISD::StoreRetvalV4:
5332
- return PerformStoreRetvalCombine (N);
5415
+ return PerformStoreRetvalCombine (N, DCI, OptLevel );
5333
5416
case NVPTXISD::StoreParam:
5334
5417
case NVPTXISD::StoreParamV2:
5335
5418
case NVPTXISD::StoreParamV4:
5336
- return PerformStoreParamCombine (N);
5419
+ return PerformStoreParamCombine (N, DCI, OptLevel );
5337
5420
case ISD::EXTRACT_VECTOR_ELT:
5338
5421
return PerformEXTRACTCombine (N, DCI);
5339
5422
case ISD::VSELECT:
0 commit comments