SamTebbs33
diff --git a/‎compiler-rt/lib/lsan/lsan_interceptors.cpp
Lines changed: 3 additions & 0 deletions b/‎compiler-rt/lib/lsan/lsan_interceptors.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎llvm/include/llvm/IR/PatternMatch.h
Lines changed: 8 additions & 0 deletions b/‎llvm/include/llvm/IR/PatternMatch.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎llvm/lib/Analysis/ValueTracking.cpp
Lines changed: 25 additions & 2 deletions b/‎llvm/lib/Analysis/ValueTracking.cpp
Lines changed: 25 additions & 2 deletions
diff --git a/‎llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Lines changed: 48 additions & 3 deletions b/‎llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
Lines changed: 48 additions & 3 deletions
diff --git a/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Lines changed: 3 additions & 3 deletions b/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
Lines changed: 4 additions & 4 deletions b/‎llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
Lines changed: 7 additions & 14 deletions b/‎llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
Lines changed: 7 additions & 14 deletions
@@ -146,6 +146,9 @@ INTERCEPTOR(void*, valloc, uptr size) {
   GET_STACK_TRACE_MALLOC;
   return lsan_valloc(size, stack);
 }
+#else
+#  define LSAN_MAYBE_INTERCEPT_FREE_SIZED
+#  define LSAN_MAYBE_INTERCEPT_FREE_ALIGNED_SIZED
 #endif  // !SANITIZER_APPLE
 
 #if SANITIZER_INTERCEPT_MEMALIGN
 
@@ -1323,6 +1323,14 @@ m_NSWAdd(const LHS &L, const RHS &R) {
                                                                             R);
 }
 template <typename LHS, typename RHS>
+inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
+                                 OverflowingBinaryOperator::NoSignedWrap, true>
+m_c_NSWAdd(const LHS &L, const RHS &R) {
+  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
+                                   OverflowingBinaryOperator::NoSignedWrap,
+                                   true>(L, R);
+}
+template <typename LHS, typename RHS>
 inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
                                  OverflowingBinaryOperator::NoSignedWrap>
 m_NSWSub(const LHS &L, const RHS &R) {
 
@@ -1346,6 +1346,8 @@ static void computeKnownBitsFromOperator(const Operator *I,
         isa<ScalableVectorType>(I->getType()))
       break;
 
+    unsigned NumElts = DemandedElts.getBitWidth();
+    bool IsLE = Q.DL.isLittleEndian();
     // Look through a cast from narrow vector elements to wider type.
     // Examples: v4i32 -> v2i64, v3i8 -> v24
     unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
@@ -1364,7 +1366,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
       //
       // The known bits of each sub-element are then inserted into place
       // (dependent on endian) to form the full result of known bits.
-      unsigned NumElts = DemandedElts.getBitWidth();
       unsigned SubScale = BitWidth / SubBitWidth;
       APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
       for (unsigned i = 0; i != NumElts; ++i) {
@@ -1376,10 +1377,32 @@ static void computeKnownBitsFromOperator(const Operator *I,
       for (unsigned i = 0; i != SubScale; ++i) {
         computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q,
                          Depth + 1);
-        unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
+        unsigned ShiftElt = IsLE ? i : SubScale - 1 - i;
         Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
       }
     }
+    // Look through a cast from wider vector elements to narrow type.
+    // Examples: v2i64 -> v4i32
+    if (SubBitWidth % BitWidth == 0) {
+      unsigned SubScale = SubBitWidth / BitWidth;
+      KnownBits KnownSrc(SubBitWidth);
+      APInt SubDemandedElts =
+          APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
+      computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q,
+                       Depth + 1);
+
+      Known.Zero.setAllBits();
+      Known.One.setAllBits();
+      for (unsigned i = 0; i != SubScale; ++i) {
+        if (DemandedElts[i]) {
+          unsigned Shifts = IsLE ? i : NumElts - 1 - i;
+          unsigned Offset = (Shifts % SubScale) * BitWidth;
+          Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset));
+          if (Known.isUnknown())
+            break;
+        }
+      }
+    }
     break;
   }
   case Instruction::SExt: {
 
@@ -1896,7 +1896,7 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
                                           {Sub, Builder.getFalse()});
     Value *Ret = Builder.CreateSub(
         ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
-        Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+        Ctlz, "", /*HasNUW=*/true, /*HasNSW=*/true);
     return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
   }
 
@@ -2363,8 +2363,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
     OverflowingBinaryOperator *LHSSub = cast<OverflowingBinaryOperator>(Op0);
     bool HasNUW = I.hasNoUnsignedWrap() && LHSSub->hasNoUnsignedWrap();
     bool HasNSW = HasNUW && I.hasNoSignedWrap() && LHSSub->hasNoSignedWrap();
-    Value *Add = Builder.CreateAdd(Y, Op1, "", /* HasNUW */ HasNUW,
-                                   /* HasNSW */ HasNSW);
+    Value *Add = Builder.CreateAdd(Y, Op1, "", /*HasNUW=*/HasNUW,
+                                   /*HasNSW=*/HasNSW);
     BinaryOperator *Sub = BinaryOperator::CreateSub(X, Add);
     Sub->setHasNoUnsignedWrap(HasNUW);
     Sub->setHasNoSignedWrap(HasNSW);
@@ -2835,6 +2835,51 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
   if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
     return Res;
 
+  // (sub (sext (add nsw (X, Y)), sext (X))) --> (sext (Y))
+  if (match(Op1, m_SExtLike(m_Value(X))) &&
+      match(Op0, m_SExtLike(m_c_NSWAdd(m_Specific(X), m_Value(Y))))) {
+    Value *SExtY = Builder.CreateSExt(Y, I.getType());
+    return replaceInstUsesWith(I, SExtY);
+  }
+
+  // (sub[ nsw] (sext (add nsw (X, Y)), sext (add nsw (X, Z)))) -->
+  // --> (sub[ nsw] (sext (Y), sext (Z)))
+  {
+    Value *Z, *Add0, *Add1;
+    if (match(Op0, m_SExtLike(m_Value(Add0))) &&
+        match(Op1, m_SExtLike(m_Value(Add1))) &&
+        ((match(Add0, m_NSWAdd(m_Value(X), m_Value(Y))) &&
+          match(Add1, m_c_NSWAdd(m_Specific(X), m_Value(Z)))) ||
+         (match(Add0, m_NSWAdd(m_Value(Y), m_Value(X))) &&
+          match(Add1, m_c_NSWAdd(m_Specific(X), m_Value(Z)))))) {
+      unsigned NumOfNewInstrs = 0;
+      // Non-constant Y, Z require new SExt.
+      NumOfNewInstrs += !isa<Constant>(Y) ? 1 : 0;
+      NumOfNewInstrs += !isa<Constant>(Z) ? 1 : 0;
+      // Check if we can trade some of the old instructions for the new ones.
+      unsigned NumOfDeadInstrs = 0;
+      if (Op0->hasOneUse()) {
+        // If Op0 (sext) has multiple uses, then we keep it
+        // and the add that it uses, otherwise, we can remove
+        // the sext and probably the add (depending on the number of its uses).
+        ++NumOfDeadInstrs;
+        NumOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
+      }
+      if (Op1->hasOneUse()) {
+        ++NumOfDeadInstrs;
+        NumOfDeadInstrs += Add1->hasOneUse() ? 1 : 0;
+      }
+      if (NumOfDeadInstrs >= NumOfNewInstrs) {
+        Value *SExtY = Builder.CreateSExt(Y, I.getType());
+        Value *SExtZ = Builder.CreateSExt(Z, I.getType());
+        Value *Sub = Builder.CreateSub(SExtY, SExtZ, "",
+                                       /*HasNUW=*/false,
+                                       /*HasNSW=*/I.hasNoSignedWrap());
+        return replaceInstUsesWith(I, Sub);
+      }
+    }
+  }
+
   return TryToNarrowDeduceFlags();
 }
 
 
@@ -290,7 +290,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
       auto *Op1C = cast<Constant>(Op1);
       return replaceInstUsesWith(
           I, Builder.CreateMul(NegOp0, ConstantExpr::getNeg(Op1C), "",
-                               /* HasNUW */ false,
+                               /*HasNUW=*/false,
                                HasNSW && Op1C->isNotMinSignedValue()));
     }
 
@@ -1255,8 +1255,8 @@ static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
       // or divisor has nsw and operator is sdiv.
       Value *Dividend = Builder.CreateShl(
           One, Y, "shl.dividend",
-          /*HasNUW*/ true,
-          /*HasNSW*/
+          /*HasNUW=*/true,
+          /*HasNSW=*/
           IsSigned ? (Shl0->hasNoUnsignedWrap() || Shl1->hasNoUnsignedWrap())
                    : Shl0->hasNoSignedWrap());
       return Builder.CreateLShr(Dividend, Z, "", I.isExact());
 
@@ -233,7 +233,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
     // However, only do this either if the old `sub` doesn't stick around, or
     // it was subtracting from a constant. Otherwise, this isn't profitable.
     return Builder.CreateSub(I->getOperand(1), I->getOperand(0),
-                             I->getName() + ".neg", /* HasNUW */ false,
+                             I->getName() + ".neg", /*HasNUW=*/false,
                              IsNSW && I->hasNoSignedWrap());
   }
 
@@ -404,15 +404,15 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
     IsNSW &= I->hasNoSignedWrap();
     if (Value *NegOp0 = negate(I->getOperand(0), IsNSW, Depth + 1))
       return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg",
-                               /* HasNUW */ false, IsNSW);
+                               /*HasNUW=*/false, IsNSW);
     // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<<C`.
     Constant *Op1C;
     if (!match(I->getOperand(1), m_ImmConstant(Op1C)) || !IsTrulyNegation)
       return nullptr;
     return Builder.CreateMul(
         I->getOperand(0),
         Builder.CreateShl(Constant::getAllOnesValue(Op1C->getType()), Op1C),
-        I->getName() + ".neg", /* HasNUW */ false, IsNSW);
+        I->getName() + ".neg", /*HasNUW=*/false, IsNSW);
   }
   case Instruction::Or: {
     if (!cast<PossiblyDisjointInst>(I)->isDisjoint())
@@ -483,7 +483,7 @@ std::array<Value *, 2> Negator::getSortedOperandsOfBinOp(Instruction *I) {
       // Can't negate either of them.
       return nullptr;
     return Builder.CreateMul(NegatedOp, OtherOp, I->getName() + ".neg",
-                             /* HasNUW */ false, IsNSW && I->hasNoSignedWrap());
+                             /*HasNUW=*/false, IsNSW && I->hasNoSignedWrap());
   }
   default:
     return nullptr; // Don't know, likely not negatible for free.
 
@@ -3732,7 +3732,6 @@ define <4 x i64> @test_avx2_psrl_0() {
   ret <4 x i64> %16
 }
 
-; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds.
 define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
 ; CHECK-LABEL: @PR125228(
 ; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63)
@@ -3741,7 +3740,8 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8>
 ; CHECK-NEXT:    [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64>
-; CHECK-NEXT:    [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]]
 ; CHECK-NEXT:    [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    ret <2 x i64> [[SHUFP_UNCASTED]]
 ;
 
@@ -12,8 +12,7 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg
 ; CHECK-NEXT:    [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2)
 ; CHECK-NEXT:    [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8>
-; CHECK-NEXT:    [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52)
-; CHECK-NEXT:    ret <16 x i8> [[AND7]]
+; CHECK-NEXT:    ret <16 x i8> [[BITCAST6]]
 ;
   %and = and <16 x i8> %arg1, splat (i8 3)
   %and3 = and <16 x i8> %arg2, splat (i8 48)
@@ -33,8 +32,7 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg)  {
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
 ; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
-; CHECK-NEXT:    [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
-; CHECK-NEXT:    ret <16 x i8> [[AND3]]
+; CHECK-NEXT:    ret <16 x i8> [[BITCAST2]]
 ;
   %and = and <16 x i8> %arg, splat (i8 15)
   %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -53,8 +51,7 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg)  {
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
 ; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
-; CHECK-NEXT:    [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
-; CHECK-NEXT:    ret <16 x i8> [[AND3]]
+; CHECK-NEXT:    ret <16 x i8> [[BITCAST2]]
 ;
   %and = and <16 x i8> %arg, splat (i8 15)
   %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -70,8 +67,7 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg)  {
 ; CHECK-SAME: <8 x i16> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15)
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8>
-; CHECK-NEXT:    [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1)
-; CHECK-NEXT:    ret <16 x i8> [[AND]]
+; CHECK-NEXT:    ret <16 x i8> [[BITCAST1]]
 ;
   %lshr = lshr <8 x i16> %arg, splat (i16 15)
   %bitcast1 = bitcast <8 x i16> %lshr to <16 x i8>
@@ -88,7 +84,8 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i
 ; CHECK-NEXT:    [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]])
 ; CHECK-NEXT:    [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32>
 ; CHECK-NEXT:    [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]])
+; CHECK-NEXT:    [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0
 ; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
 ;
   %call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1)
@@ -110,11 +107,7 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> %
 ; CHECK-NEXT:    [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8>
 ; CHECK-NEXT:    [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]]
 ; CHECK-NEXT:    [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]]
-; CHECK-NEXT:    [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16>
-; CHECK-NEXT:    [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8)
-; CHECK-NEXT:    [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8>
-; CHECK-NEXT:    [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]]
-; CHECK-NEXT:    ret <16 x i8> [[ADD13]]
+; CHECK-NEXT:    ret <16 x i8> [[ADD14]]
 ;
   %shl6 = shl <8 x i16> %arg2, splat (i16 8)
   %bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8>
Original file line number	Diff line number	Diff line change
`@@ -146,6 +146,9 @@ INTERCEPTOR(void*, valloc, uptr size) {`
`146`	`146`	`GET_STACK_TRACE_MALLOC;`
`147`	`147`	`return lsan_valloc(size, stack);`
`148`	`148`	`}`
	`149`	`+#else`
	`150`	`+# define LSAN_MAYBE_INTERCEPT_FREE_SIZED`
	`151`	`+# define LSAN_MAYBE_INTERCEPT_FREE_ALIGNED_SIZED`
`149`	`152`	`#endif // !SANITIZER_APPLE`
`150`	`153`
`151`	`154`	`#if SANITIZER_INTERCEPT_MEMALIGN`