@@ -3389,6 +3389,57 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3389
3389
setOriginForNaryOp (I);
3390
3390
}
3391
3391
3392
+ // / Some instructions have additional zero-elements in the return type
3393
+ // / e.g., <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, ...)
3394
+ // /
3395
+ // / This function will return a vector type with the same number of elements
3396
+ // / as the input, but same per-element width as the return value e.g.,
3397
+ // / <8 x i8>.
3398
+ FixedVectorType *maybeShrinkVectorShadowType (Value *Src, IntrinsicInst &I) {
3399
+ assert (isa<FixedVectorType>(getShadowTy (&I)));
3400
+ FixedVectorType *ShadowType = cast<FixedVectorType>(getShadowTy (&I));
3401
+
3402
+ // TODO: generalize beyond 2x?
3403
+ if (ShadowType->getElementCount () ==
3404
+ cast<VectorType>(Src->getType ())->getElementCount () * 2 )
3405
+ ShadowType = FixedVectorType::getHalfElementsVectorType (ShadowType);
3406
+
3407
+ assert (ShadowType->getElementCount () ==
3408
+ cast<VectorType>(Src->getType ())->getElementCount ());
3409
+
3410
+ return ShadowType;
3411
+ }
3412
+
3413
+ // / Doubles the length of a vector shadow (filled with zeros) if necessary to
3414
+ // / match the length of the shadow for the instruction.
3415
+ // / This is more type-safe than CreateShadowCast().
3416
+ Value *maybeExtendVectorShadowWithZeros (Value *Shadow, IntrinsicInst &I) {
3417
+ IRBuilder<> IRB (&I);
3418
+ assert (isa<FixedVectorType>(Shadow->getType ()));
3419
+ assert (isa<FixedVectorType>(I.getType ()));
3420
+
3421
+ Value *FullShadow = getCleanShadow (&I);
3422
+ assert (cast<FixedVectorType>(Shadow->getType ())->getNumElements () <=
3423
+ cast<FixedVectorType>(FullShadow->getType ())->getNumElements ());
3424
+ assert (cast<FixedVectorType>(Shadow->getType ())->getScalarType () ==
3425
+ cast<FixedVectorType>(FullShadow->getType ())->getScalarType ());
3426
+
3427
+ if (Shadow->getType () == FullShadow->getType ()) {
3428
+ FullShadow = Shadow;
3429
+ } else {
3430
+ // TODO: generalize beyond 2x?
3431
+ SmallVector<int , 32 > ShadowMask (
3432
+ cast<FixedVectorType>(FullShadow->getType ())->getNumElements ());
3433
+ std::iota (ShadowMask.begin (), ShadowMask.end (), 0 );
3434
+
3435
+ // Append zeros
3436
+ FullShadow =
3437
+ IRB.CreateShuffleVector (Shadow, getCleanShadow (Shadow), ShadowMask);
3438
+ }
3439
+
3440
+ return FullShadow;
3441
+ }
3442
+
3392
3443
// / Handle x86 SSE vector conversion.
3393
3444
// /
3394
3445
// / e.g., single-precision to half-precision conversion:
@@ -3419,13 +3470,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3419
3470
3420
3471
// The return type might have more elements than the input.
3421
3472
// Temporarily shrink the return type's number of elements.
3422
- VectorType *ShadowType = cast<VectorType>(getShadowTy (&I));
3423
- if (ShadowType->getElementCount () ==
3424
- cast<VectorType>(Src->getType ())->getElementCount () * 2 )
3425
- ShadowType = VectorType::getHalfElementsVectorType (ShadowType);
3426
-
3427
- assert (ShadowType->getElementCount () ==
3428
- cast<VectorType>(Src->getType ())->getElementCount ());
3473
+ VectorType *ShadowType = maybeShrinkVectorShadowType (Src, I);
3429
3474
3430
3475
IRBuilder<> IRB (&I);
3431
3476
Value *S0 = getShadow (&I, 0 );
@@ -3440,19 +3485,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3440
3485
3441
3486
// The return type might have more elements than the input.
3442
3487
// Extend the return type back to its original width if necessary.
3443
- Value *FullShadow = getCleanShadow (&I);
3444
-
3445
- if (Shadow->getType () == FullShadow->getType ()) {
3446
- FullShadow = Shadow;
3447
- } else {
3448
- SmallVector<int , 8 > ShadowMask (
3449
- cast<FixedVectorType>(FullShadow->getType ())->getNumElements ());
3450
- std::iota (ShadowMask.begin (), ShadowMask.end (), 0 );
3451
-
3452
- // Append zeros
3453
- FullShadow =
3454
- IRB.CreateShuffleVector (Shadow, getCleanShadow (Shadow), ShadowMask);
3455
- }
3488
+ Value *FullShadow = maybeExtendVectorShadowWithZeros (Shadow, I);
3456
3489
3457
3490
setShadow (&I, FullShadow);
3458
3491
setOriginForNaryOp (I);
0 commit comments