@@ -163,8 +163,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
163
163
164
164
CurDAG->setRoot (Dummy.getValue ());
165
165
166
- MadeChange |= doPeepholeMergeVVMFold ();
167
-
168
166
// After we're done with everything else, convert IMPLICIT_DEF
169
167
// passthru operands to NoRegister. This is required to workaround
170
168
// an optimization deficiency in MachineCSE. This really should
@@ -4092,218 +4090,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4092
4090
return true ;
4093
4091
}
4094
4092
4095
- static bool IsVMerge (SDNode *N) {
4096
- return RISCV::getRVVMCOpcode (N->getMachineOpcode ()) == RISCV::VMERGE_VVM;
4097
- }
4098
-
4099
- // Try to fold away VMERGE_VVM instructions into their true operands:
4100
- //
4101
- // %true = PseudoVADD_VV ...
4102
- // %x = PseudoVMERGE_VVM %false, %false, %true, %mask
4103
- // ->
4104
- // %x = PseudoVADD_VV_MASK %false, ..., %mask
4105
- //
4106
- // We can only fold if vmerge's passthru operand, vmerge's false operand and
4107
- // %true's passthru operand (if it has one) are the same. This is because we
4108
- // have to consolidate them into one passthru operand in the result.
4109
- //
4110
- // If %true is masked, then we can use its mask instead of vmerge's if vmerge's
4111
- // mask is all ones.
4112
- //
4113
- // The resulting VL is the minimum of the two VLs.
4114
- //
4115
- // The resulting policy is the effective policy the vmerge would have had,
4116
- // i.e. whether or not it's passthru operand was implicit-def.
4117
- bool RISCVDAGToDAGISel::performCombineVMergeAndVOps (SDNode *N) {
4118
- SDValue Passthru, False, True, VL, Mask;
4119
- assert (IsVMerge (N));
4120
- Passthru = N->getOperand (0 );
4121
- False = N->getOperand (1 );
4122
- True = N->getOperand (2 );
4123
- Mask = N->getOperand (3 );
4124
- VL = N->getOperand (4 );
4125
-
4126
- // If the EEW of True is different from vmerge's SEW, then we can't fold.
4127
- if (True.getSimpleValueType () != N->getSimpleValueType (0 ))
4128
- return false ;
4129
-
4130
- // We require that either passthru and false are the same, or that passthru
4131
- // is undefined.
4132
- if (Passthru != False && !isImplicitDef (Passthru))
4133
- return false ;
4134
-
4135
- assert (True.getResNo () == 0 &&
4136
- " Expect True is the first output of an instruction." );
4137
-
4138
- // Need N is the exactly one using True.
4139
- if (!True.hasOneUse ())
4140
- return false ;
4141
-
4142
- if (!True.isMachineOpcode ())
4143
- return false ;
4144
-
4145
- unsigned TrueOpc = True.getMachineOpcode ();
4146
- const MCInstrDesc &TrueMCID = TII->get (TrueOpc);
4147
- uint64_t TrueTSFlags = TrueMCID.TSFlags ;
4148
- bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse (TrueMCID);
4149
-
4150
- const RISCV::RISCVMaskedPseudoInfo *Info =
4151
- RISCV::lookupMaskedIntrinsicByUnmasked (TrueOpc);
4152
- if (!Info)
4153
- return false ;
4154
-
4155
- // If True has a passthru operand then it needs to be the same as vmerge's
4156
- // False, since False will be used for the result's passthru operand.
4157
- if (HasTiedDest && !isImplicitDef (True->getOperand (0 ))) {
4158
- SDValue PassthruOpTrue = True->getOperand (0 );
4159
- if (False != PassthruOpTrue)
4160
- return false ;
4161
- }
4162
-
4163
- // Skip if True has side effect.
4164
- if (TII->get (TrueOpc).hasUnmodeledSideEffects ())
4165
- return false ;
4166
-
4167
- unsigned TrueChainOpIdx = True.getNumOperands () - 1 ;
4168
- bool HasChainOp =
4169
- True.getOperand (TrueChainOpIdx).getValueType () == MVT::Other;
4170
-
4171
- if (HasChainOp) {
4172
- // Avoid creating cycles in the DAG. We must ensure that none of the other
4173
- // operands depend on True through it's Chain.
4174
- SmallVector<const SDNode *, 4 > LoopWorklist;
4175
- SmallPtrSet<const SDNode *, 16 > Visited;
4176
- LoopWorklist.push_back (False.getNode ());
4177
- LoopWorklist.push_back (Mask.getNode ());
4178
- LoopWorklist.push_back (VL.getNode ());
4179
- if (SDNode::hasPredecessorHelper (True.getNode (), Visited, LoopWorklist))
4180
- return false ;
4181
- }
4182
-
4183
- // The vector policy operand may be present for masked intrinsics
4184
- bool HasVecPolicyOp = RISCVII::hasVecPolicyOp (TrueTSFlags);
4185
- unsigned TrueVLIndex =
4186
- True.getNumOperands () - HasVecPolicyOp - HasChainOp - 2 ;
4187
- SDValue TrueVL = True.getOperand (TrueVLIndex);
4188
- SDValue SEW = True.getOperand (TrueVLIndex + 1 );
4189
-
4190
- auto GetMinVL = [](SDValue LHS, SDValue RHS) {
4191
- if (LHS == RHS)
4192
- return LHS;
4193
- if (isAllOnesConstant (LHS))
4194
- return RHS;
4195
- if (isAllOnesConstant (RHS))
4196
- return LHS;
4197
- auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
4198
- auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
4199
- if (!CLHS || !CRHS)
4200
- return SDValue ();
4201
- return CLHS->getZExtValue () <= CRHS->getZExtValue () ? LHS : RHS;
4202
- };
4203
-
4204
- // Because N and True must have the same passthru operand (or True's operand
4205
- // is implicit_def), the "effective" body is the minimum of their VLs.
4206
- SDValue OrigVL = VL;
4207
- VL = GetMinVL (TrueVL, VL);
4208
- if (!VL)
4209
- return false ;
4210
-
4211
- // Some operations produce different elementwise results depending on the
4212
- // active elements, like viota.m or vredsum. This transformation is illegal
4213
- // for these if we change the active elements (i.e. mask or VL).
4214
- const MCInstrDesc &TrueBaseMCID = TII->get (RISCV::getRVVMCOpcode (TrueOpc));
4215
- if (RISCVII::elementsDependOnVL (TrueBaseMCID.TSFlags ) && (TrueVL != VL))
4216
- return false ;
4217
- if (RISCVII::elementsDependOnMask (TrueBaseMCID.TSFlags ) &&
4218
- (Mask && !usesAllOnesMask (Mask)))
4219
- return false ;
4220
-
4221
- // Make sure it doesn't raise any observable fp exceptions, since changing the
4222
- // active elements will affect how fflags is set.
4223
- if (mayRaiseFPException (True.getNode ()) && !True->getFlags ().hasNoFPExcept ())
4224
- return false ;
4225
-
4226
- SDLoc DL (N);
4227
-
4228
- unsigned MaskedOpc = Info->MaskedPseudo ;
4229
- #ifndef NDEBUG
4230
- const MCInstrDesc &MaskedMCID = TII->get (MaskedOpc);
4231
- assert (RISCVII::hasVecPolicyOp (MaskedMCID.TSFlags ) &&
4232
- " Expected instructions with mask have policy operand." );
4233
- assert (MaskedMCID.getOperandConstraint (MaskedMCID.getNumDefs (),
4234
- MCOI::TIED_TO) == 0 &&
4235
- " Expected instructions with mask have a tied dest." );
4236
- #endif
4237
-
4238
- // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
4239
- // operand is undefined.
4240
- //
4241
- // However, if the VL became smaller than what the vmerge had originally, then
4242
- // elements past VL that were previously in the vmerge's body will have moved
4243
- // to the tail. In that case we always need to use tail undisturbed to
4244
- // preserve them.
4245
- bool MergeVLShrunk = VL != OrigVL;
4246
- uint64_t Policy = (isImplicitDef (Passthru) && !MergeVLShrunk)
4247
- ? RISCVVType::TAIL_AGNOSTIC
4248
- : /* TUMU*/ 0 ;
4249
- SDValue PolicyOp =
4250
- CurDAG->getTargetConstant (Policy, DL, Subtarget->getXLenVT ());
4251
-
4252
-
4253
- SmallVector<SDValue, 8 > Ops;
4254
- Ops.push_back (False);
4255
-
4256
- const bool HasRoundingMode = RISCVII::hasRoundModeOp (TrueTSFlags);
4257
- const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
4258
- Ops.append (True->op_begin () + HasTiedDest, True->op_begin () + NormalOpsEnd);
4259
-
4260
- Ops.push_back (Mask);
4261
-
4262
- // For unmasked "VOp" with rounding mode operand, that is interfaces like
4263
- // (..., rm, vl) or (..., rm, vl, policy).
4264
- // Its masked version is (..., vm, rm, vl, policy).
4265
- // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
4266
- if (HasRoundingMode)
4267
- Ops.push_back (True->getOperand (TrueVLIndex - 1 ));
4268
-
4269
- Ops.append ({VL, SEW, PolicyOp});
4270
-
4271
- // Result node should have chain operand of True.
4272
- if (HasChainOp)
4273
- Ops.push_back (True.getOperand (TrueChainOpIdx));
4274
-
4275
- MachineSDNode *Result =
4276
- CurDAG->getMachineNode (MaskedOpc, DL, True->getVTList (), Ops);
4277
- Result->setFlags (True->getFlags ());
4278
-
4279
- if (!cast<MachineSDNode>(True)->memoperands_empty ())
4280
- CurDAG->setNodeMemRefs (Result, cast<MachineSDNode>(True)->memoperands ());
4281
-
4282
- // Replace vmerge.vvm node by Result.
4283
- ReplaceUses (SDValue (N, 0 ), SDValue (Result, 0 ));
4284
-
4285
- // Replace another value of True. E.g. chain and VL.
4286
- for (unsigned Idx = 1 ; Idx < True->getNumValues (); ++Idx)
4287
- ReplaceUses (True.getValue (Idx), SDValue (Result, Idx));
4288
-
4289
- return true ;
4290
- }
4291
-
4292
- bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold () {
4293
- bool MadeChange = false ;
4294
- SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end ();
4295
-
4296
- while (Position != CurDAG->allnodes_begin ()) {
4297
- SDNode *N = &*--Position;
4298
- if (N->use_empty () || !N->isMachineOpcode ())
4299
- continue ;
4300
-
4301
- if (IsVMerge (N))
4302
- MadeChange |= performCombineVMergeAndVOps (N);
4303
- }
4304
- return MadeChange;
4305
- }
4306
-
4307
4093
// / If our passthru is an implicit_def, use noreg instead. This side
4308
4094
// / steps issues with MachineCSE not being able to CSE expressions with
4309
4095
// / IMPLICIT_DEF operands while preserving the semantic intent. See
0 commit comments