@@ -4592,6 +4592,90 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4592
4592
ConstantInt::get (IRB.getInt32Ty (), 0 ));
4593
4593
}
4594
4594
4595
+ // Handle llvm.x86.avx512.mask.pmov{,s,us}.*.512
4596
+ //
4597
+ // e.g., call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512
4598
+ // (<8 x i64>, <16 x i8>, i8)
4599
+ // A WriteThru Mask
4600
+ //
4601
+ // call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512
4602
+ // (<16 x i32>, <16 x i8>, i16)
4603
+ //
4604
+ // Dst[i] = Mask[i] ? truncate_or_saturate(A[i]) : WriteThru[i]
4605
+ // Dst_shadow[i] = Mask[i] ? truncate(A_shadow[i]) : WriteThru_shadow[i]
4606
+ //
4607
+ // If Dst has more elements than A, the excess elements are zeroed (and the
4608
+ // corresponding shadow is initialized).
4609
+ //
4610
+ // Note: for PMOV (truncation), handleIntrinsicByApplyingToShadow is precise
4611
+ // and is much faster than this handler.
4612
+ void handleAVX512VectorDownConvert (IntrinsicInst &I) {
4613
+ IRBuilder<> IRB (&I);
4614
+
4615
+ assert (I.arg_size () == 3 );
4616
+ Value *A = I.getOperand (0 );
4617
+ Value *WriteThrough = I.getOperand (1 );
4618
+ Value *Mask = I.getOperand (2 );
4619
+
4620
+ assert (isa<FixedVectorType>(A->getType ()));
4621
+ assert (A->getType ()->isIntOrIntVectorTy ());
4622
+
4623
+ assert (isa<FixedVectorType>(WriteThrough->getType ()));
4624
+ assert (WriteThrough->getType ()->isIntOrIntVectorTy ());
4625
+
4626
+ unsigned ANumElements =
4627
+ cast<FixedVectorType>(A->getType ())->getNumElements ();
4628
+ unsigned OutputNumElements =
4629
+ cast<FixedVectorType>(WriteThrough->getType ())->getNumElements ();
4630
+ assert (ANumElements == OutputNumElements ||
4631
+ ANumElements * 2 == OutputNumElements);
4632
+
4633
+ assert (Mask->getType ()->isIntegerTy ());
4634
+ assert (Mask->getType ()->getScalarSizeInBits () == ANumElements);
4635
+ insertCheckShadowOf (Mask, &I);
4636
+
4637
+ assert (I.getType () == WriteThrough->getType ());
4638
+
4639
+ // Widen the mask, if necessary, to have one bit per element of the output
4640
+ // vector.
4641
+ // We want the extra bits to have '1's, so that the CreateSelect will
4642
+ // select the values from AShadow instead of WriteThroughShadow ("maskless"
4643
+ // versions of the intrinsics are sometimes implemented using an all-1's
4644
+ // mask and an undefined value for WriteThroughShadow). We accomplish this
4645
+ // by using bitwise NOT before and after the ZExt.
4646
+ if (ANumElements != OutputNumElements) {
4647
+ Mask = IRB.CreateNot (Mask);
4648
+ Mask = IRB.CreateZExt (Mask, Type::getIntNTy (*MS.C , OutputNumElements),
4649
+ " _ms_widen_mask" );
4650
+ Mask = IRB.CreateNot (Mask);
4651
+ }
4652
+ Mask = IRB.CreateBitCast (
4653
+ Mask, FixedVectorType::get (IRB.getInt1Ty (), OutputNumElements));
4654
+
4655
+ Value *AShadow = getShadow (A);
4656
+
4657
+ // The return type might have more elements than the input.
4658
+ // Temporarily shrink the return type's number of elements.
4659
+ VectorType *ShadowType = maybeShrinkVectorShadowType (A, I);
4660
+
4661
+ // PMOV truncates; PMOVS/PMOVUS uses signed/unsigned saturation.
4662
+ // This handler treats them all as truncation, which leads to some rare
4663
+ // false positives in the cases where the truncated bytes could
4664
+ // unambiguously saturate the value e.g., if A = ??????10 ????????
4665
+ // (big-endian), the unsigned saturated byte conversion is 11111111 i.e.,
4666
+ // fully defined, but the truncated byte is ????????.
4667
+ //
4668
+ // TODO: use GetMinMaxUnsigned() to handle saturation precisely.
4669
+ AShadow = IRB.CreateTrunc (AShadow, ShadowType, " _ms_trunc_shadow" );
4670
+ AShadow = maybeExtendVectorShadowWithZeros (AShadow, I);
4671
+
4672
+ Value *WriteThroughShadow = getShadow (WriteThrough);
4673
+
4674
+ Value *Shadow = IRB.CreateSelect (Mask, AShadow, WriteThroughShadow);
4675
+ setShadow (&I, Shadow);
4676
+ setOriginForNaryOp (I);
4677
+ }
4678
+
4595
4679
// For sh.* compiler intrinsics:
4596
4680
// llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
4597
4681
// (<8 x half>, <8 x half>, <8 x half>, i8, i32)
@@ -5412,6 +5496,66 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
5412
5496
break ;
5413
5497
}
5414
5498
5499
+ // AVX512 PMOV: Packed MOV, with truncation
5500
+ // Precisely handled by applying the same intrinsic to the shadow
5501
+ case Intrinsic::x86_avx512_mask_pmov_dw_512:
5502
+ case Intrinsic::x86_avx512_mask_pmov_db_512:
5503
+ case Intrinsic::x86_avx512_mask_pmov_qb_512:
5504
+ case Intrinsic::x86_avx512_mask_pmov_qw_512: {
5505
+ // Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 were removed in
5506
+ // f608dc1f5775ee880e8ea30e2d06ab5a4a935c22
5507
+ handleIntrinsicByApplyingToShadow (I, I.getIntrinsicID (),
5508
+ /* trailingVerbatimArgs=*/ 1 );
5509
+ break ;
5510
+ }
5511
+
5512
+ // AVX512 PMVOV{S,US}: Packed MOV, with signed/unsigned saturation
5513
+ // Approximately handled using the corresponding truncation intrinsic
5514
+ // TODO: improve handleAVX512VectorDownConvert to precisely model saturation
5515
+ case Intrinsic::x86_avx512_mask_pmovs_dw_512:
5516
+ case Intrinsic::x86_avx512_mask_pmovus_dw_512: {
5517
+ handleIntrinsicByApplyingToShadow (I,
5518
+ Intrinsic::x86_avx512_mask_pmov_dw_512,
5519
+ /* trailingVerbatimArgs=*/ 1 );
5520
+ break ;
5521
+ }
5522
+
5523
+ case Intrinsic::x86_avx512_mask_pmovs_db_512:
5524
+ case Intrinsic::x86_avx512_mask_pmovus_db_512: {
5525
+ handleIntrinsicByApplyingToShadow (I,
5526
+ Intrinsic::x86_avx512_mask_pmov_db_512,
5527
+ /* trailingVerbatimArgs=*/ 1 );
5528
+ break ;
5529
+ }
5530
+
5531
+ case Intrinsic::x86_avx512_mask_pmovs_qb_512:
5532
+ case Intrinsic::x86_avx512_mask_pmovus_qb_512: {
5533
+ handleIntrinsicByApplyingToShadow (I,
5534
+ Intrinsic::x86_avx512_mask_pmov_qb_512,
5535
+ /* trailingVerbatimArgs=*/ 1 );
5536
+ break ;
5537
+ }
5538
+
5539
+ case Intrinsic::x86_avx512_mask_pmovs_qw_512:
5540
+ case Intrinsic::x86_avx512_mask_pmovus_qw_512: {
5541
+ handleIntrinsicByApplyingToShadow (I,
5542
+ Intrinsic::x86_avx512_mask_pmov_qw_512,
5543
+ /* trailingVerbatimArgs=*/ 1 );
5544
+ break ;
5545
+ }
5546
+
5547
+ case Intrinsic::x86_avx512_mask_pmovs_qd_512:
5548
+ case Intrinsic::x86_avx512_mask_pmovus_qd_512:
5549
+ case Intrinsic::x86_avx512_mask_pmovs_wb_512:
5550
+ case Intrinsic::x86_avx512_mask_pmovus_wb_512: {
5551
+ // Since Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 do not exist, we
5552
+ // cannot use handleIntrinsicByApplyingToShadow. Instead, we call the
5553
+ // slow-path handler.
5554
+ handleAVX512VectorDownConvert (I);
5555
+ break ;
5556
+ }
5557
+
5558
+ // AVX512 FP16 Arithmetic
5415
5559
case Intrinsic::x86_avx512fp16_mask_add_sh_round:
5416
5560
case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
5417
5561
case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
0 commit comments