@@ -5170,26 +5170,45 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
5170
5170
return false ;
5171
5171
}
5172
5172
case Instruction::Mul: {
5173
+ auto ShouldSinkSplatForIndexedVariant = [](Value *V) {
5174
+ auto *Ty = cast<VectorType>(V->getType ());
5175
+ // For SVE the lane-indexing is within 128-bits, so we can't fold splats.
5176
+ if (Ty->isScalableTy ())
5177
+ return false ;
5178
+
5179
+ // Indexed variants of Mul exist for i16 and i32 element types only.
5180
+ return Ty->getScalarSizeInBits () == 16 || Ty->getScalarSizeInBits () == 32 ;
5181
+ };
5182
+
5173
5183
int NumZExts = 0 , NumSExts = 0 ;
5174
5184
for (auto &Op : I->operands ()) {
5175
5185
// Make sure we are not already sinking this operand
5176
5186
if (any_of (Ops, [&](Use *U) { return U->get () == Op; }))
5177
5187
continue ;
5178
5188
5179
- if (match (&Op, m_SExt (m_Value ()))) {
5180
- NumSExts++;
5181
- continue ;
5182
- } else if (match (&Op, m_ZExt (m_Value ()))) {
5183
- NumZExts++;
5189
+ if (match (&Op, m_ZExtOrSExt (m_Value ()))) {
5190
+ auto *Ext = cast<Instruction>(Op);
5191
+ auto *ExtOp = Ext->getOperand (0 );
5192
+ if (isSplatShuffle (ExtOp) && ShouldSinkSplatForIndexedVariant (ExtOp))
5193
+ Ops.push_back (&Ext->getOperandUse (0 ));
5194
+ Ops.push_back (&Op);
5195
+
5196
+ if (isa<SExtInst>(Ext))
5197
+ NumSExts++;
5198
+ else
5199
+ NumZExts++;
5200
+
5184
5201
continue ;
5185
5202
}
5186
5203
5187
5204
ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
5205
+ if (!Shuffle)
5206
+ continue ;
5188
5207
5189
5208
// If the Shuffle is a splat and the operand is a zext/sext, sinking the
5190
5209
// operand and the s/zext can help create indexed s/umull. This is
5191
5210
// especially useful to prevent i64 mul being scalarized.
5192
- if (Shuffle && isSplatShuffle (Shuffle) &&
5211
+ if (isSplatShuffle (Shuffle) &&
5193
5212
match (Shuffle->getOperand (0 ), m_ZExtOrSExt (m_Value ()))) {
5194
5213
Ops.push_back (&Shuffle->getOperandUse (0 ));
5195
5214
Ops.push_back (&Op);
@@ -5200,9 +5219,6 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
5200
5219
continue ;
5201
5220
}
5202
5221
5203
- if (!Shuffle)
5204
- continue ;
5205
-
5206
5222
Value *ShuffleOperand = Shuffle->getOperand (0 );
5207
5223
InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
5208
5224
if (!Insert)
@@ -5234,12 +5250,26 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
5234
5250
NumZExts++;
5235
5251
}
5236
5252
5253
+ Ops.push_back (&Insert->getOperandUse (1 ));
5237
5254
Ops.push_back (&Shuffle->getOperandUse (0 ));
5238
5255
Ops.push_back (&Op);
5239
5256
}
5240
5257
5241
- // Is it profitable to sink if we found two of the same type of extends.
5242
- return !Ops.empty () && (NumSExts == 2 || NumZExts == 2 );
5258
+ // It is profitable to sink if we found two of the same type of extends.
5259
+ if (!Ops.empty () && (NumSExts == 2 || NumZExts == 2 ))
5260
+ return true ;
5261
+
5262
+ // Otherwise, see if we should sink splats for indexed variants.
5263
+ if (!ShouldSinkSplatForIndexedVariant (I))
5264
+ return false ;
5265
+
5266
+ Ops.clear ();
5267
+ if (isSplatShuffle (I->getOperand (0 )))
5268
+ Ops.push_back (&I->getOperandUse (0 ));
5269
+ if (isSplatShuffle (I->getOperand (1 )))
5270
+ Ops.push_back (&I->getOperandUse (1 ));
5271
+
5272
+ return !Ops.empty ();
5243
5273
}
5244
5274
case Instruction::FMul: {
5245
5275
// For SVE the lane-indexing is within 128-bits, so we can't fold splats.
0 commit comments