diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -163,8 +163,17 @@
   return nullptr;
 }
 
-static void computeKnownBits(const Value *V, KnownBits &Known,
-                             unsigned Depth, const Query &Q);
+static void computeKnownBits(const Value *V, const APInt &DemandedElts,
+                             KnownBits &Known, unsigned Depth, const Query &Q);
+
+static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
+                             const Query &Q) {
+  Type *Ty = V->getType();
+  APInt DemandedElts = Ty->isVectorTy()
+                           ? APInt::getAllOnesValue(Ty->getVectorNumElements())
+                           : APInt(1, 1);
+  computeKnownBits(V, DemandedElts, Known, Depth, Q);
+}
 
 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
                             const DataLayout &DL, unsigned Depth,
@@ -295,8 +304,17 @@
       V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
 }
 
+static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
+                                   unsigned Depth, const Query &Q);
+
 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
-                                   const Query &Q);
+                                   const Query &Q) {
+  Type *Ty = V->getType();
+  APInt DemandedElts = Ty->isVectorTy()
+                           ? APInt::getAllOnesValue(Ty->getVectorNumElements())
+                           : APInt(1, 1);
+  return ComputeNumSignBits(V, DemandedElts, Depth, Q);
+}
 
 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
                                   unsigned Depth, AssumptionCache *AC,
@@ -1039,8 +1057,10 @@
     Known.setAllZero();
 }
 
-static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
-                                         unsigned Depth, const Query &Q) {
+static void computeKnownBitsFromOperator(const Operator *I,
+                                         const APInt &DemandedElts,
+                                         KnownBits &Known, unsigned Depth,
+                                         const Query &Q) {
   unsigned BitWidth = Known.getBitWidth();
 
   KnownBits Known2(Known);
@@ -1654,6 +1674,77 @@
       }
     }
     break;
+  case Instruction::ShuffleVector: {
+    // TODO: This is copied almost directly from the SelectionDAG version of
+    //       ComputeKnownBits. It would be better if we could share common
+    //       code. If not, make sure that changes are translated to the DAG.
+    auto *Shuf = cast<ShuffleVectorInst>(I);
+    const Value *LHS = Shuf->getOperand(0);
+    const Value *RHS = Shuf->getOperand(1);
+    int NumElts = LHS->getType()->getVectorNumElements();
+    int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
+    APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+    for (int i = 0; i != NumMaskElts; ++i) {
+      if (!DemandedElts)
+        continue;
+      int M = Shuf->getMaskValue(i);
+      assert(M < NumElts * 2 && "Invalid shuffle mask constant");
+      // For undef elements, we don't know anything about the common state of
+      // the shuffle result.
+      if (M == -1) {
+        Known.resetAll();
+        return;
+      }
+      if (M < NumElts)
+        DemandedLHS.setBit(M % NumElts);
+      else
+        DemandedRHS.setBit(M % NumElts);
+    }
+    Known.One.setAllBits();
+    Known.Zero.setAllBits();
+    if (!!DemandedLHS)
+      computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
+    // If we don't know any bits, early out.
+    if (Known.isUnknown())
+      break;
+    if (!!DemandedRHS) {
+      computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
+    }
+    break;
+  }
+  case Instruction::InsertElement: {
+    // If we know the element index, split the demand between the
+    // source vector and the inserted element, otherwise assume we need
+    // the original demanded vector elements and the value.
+    auto *IEI = cast<InsertElementInst>(I);
+    Value *Vec = IEI->getOperand(0);
+    Value *Elt = IEI->getOperand(1);
+    Value *Idx = IEI->getOperand(2);
+    bool DemandedVal = true;
+    APInt DemandedVecElts = DemandedElts;
+    unsigned NumElts = DemandedElts.getBitWidth();
+    if (auto *CIdx = dyn_cast<ConstantInt>(Idx))
+      if (CIdx->getValue().ult(NumElts)) {
+        unsigned EltIdx = CIdx->getZExtValue();
+        DemandedVal = !!DemandedElts[EltIdx];
+        DemandedVecElts.clearBit(EltIdx);
+      }
+    Known.One.setAllBits();
+    Known.Zero.setAllBits();
+    if (DemandedVal)
+      computeKnownBits(Elt, Known, Depth + 1, Q);
+    // If we don't know any bits, early out.
+    if (Known.isUnknown())
+      break;
+    if (!!DemandedVecElts) {
+      computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
+      Known.One &= Known2.One;
+      Known.Zero &= Known2.Zero;
+    }
+    break;
+  }
   case Instruction::ExtractElement:
     // Look through extract element. At the moment we keep this simple and skip
     // tracking the specific element. But at least we might find information
@@ -1688,6 +1779,7 @@
         }
       }
     }
+    break;
   }
 }
 
@@ -1714,23 +1806,33 @@
 /// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
-                      const Query &Q) {
+void computeKnownBits(const Value *V, const APInt &DemandedElts,
+                      KnownBits &Known, unsigned Depth, const Query &Q) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Known.getBitWidth();
 
-  assert((V->getType()->isIntOrIntVectorTy(BitWidth) ||
-          V->getType()->isPtrOrPtrVectorTy()) &&
+  Type *Ty = V->getType();
+  assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) &&
          "Not integer or pointer type!");
+  assert(((Ty->isVectorTy() &&
+           Ty->getVectorNumElements() == DemandedElts.getBitWidth()) ||
+          (!Ty->isVectorTy() && DemandedElts == APInt(1, 1))) &&
+         "Unexpected vector size");
 
-  Type *ScalarTy = V->getType()->getScalarType();
+  Type *ScalarTy = Ty->getScalarType();
   unsigned ExpectedWidth = ScalarTy->isPointerTy() ?
     Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy);
   assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth");
   (void)BitWidth;
   (void)ExpectedWidth;
 
+  if (!DemandedElts) {
+    // No demanded elts, better to assume we don't know anything.
+    Known.resetAll();
+    return;
+  }
+
   const APInt *C;
   if (match(V, m_APInt(C))) {
     // We know all of the bits for a scalar constant or a splat vector constant!
@@ -1746,10 +1848,15 @@
   // Handle a constant vector by taking the intersection of the known bits of
   // each element.
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+    assert((!Ty->isVectorTy() ||
+            CDS->getNumElements() == DemandedElts.getBitWidth()) &&
+           "Unexpected vector size");
     // We know that CDS must be a vector of integers. Take the intersection of
     // each element.
     Known.Zero.setAllBits(); Known.One.setAllBits();
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      if (Ty->isVectorTy() && !DemandedElts[i])
+        continue;
       APInt Elt = CDS->getElementAsAPInt(i);
       Known.Zero &= ~Elt;
       Known.One &= Elt;
@@ -1758,10 +1865,14 @@
   }
 
   if (const auto *CV = dyn_cast<ConstantVector>(V)) {
+    assert(CV->getNumOperands() == DemandedElts.getBitWidth() &&
+           "Unexpected vector size");
     // We know that CV must be a vector of integers. Take the intersection of
     // each element.
     Known.Zero.setAllBits(); Known.One.setAllBits();
     for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+      if (!DemandedElts[i])
+        continue;
       Constant *Element = CV->getAggregateElement(i);
       auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
       if (!ElementCI) {
@@ -1800,10 +1911,10 @@
   }
 
   if (const Operator *I = dyn_cast<Operator>(V))
-    computeKnownBitsFromOperator(I, Known, Depth, Q);
+    computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
 
   // Aligned pointers have trailing zeros - refine Known.Zero set
-  if (V->getType()->isPointerTy()) {
+  if (Ty->isPointerTy()) {
     const MaybeAlign Align = V->getPointerAlignment(Q.DL);
     if (Align)
       Known.Zero.setLowBits(countTrailingZeros(Align->value()));
@@ -2429,6 +2540,7 @@
 /// or if any element was not analyzed; otherwise, return the count for the
 /// element with the minimum number of sign bits.
 static unsigned computeNumSignBitsVectorConstant(const Value *V,
+                                                 const APInt &DemandedElts,
                                                  unsigned TyBits) {
   const auto *CV = dyn_cast<Constant>(V);
   if (!CV || !CV->getType()->isVectorTy())
@@ -2437,6 +2549,8 @@
   unsigned MinSignBits = TyBits;
   unsigned NumElts = CV->getType()->getVectorNumElements();
   for (unsigned i = 0; i != NumElts; ++i) {
+    if (!DemandedElts[i])
+      continue;
     // If we find a non-ConstantInt, bail out.
     auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
     if (!Elt)
@@ -2448,12 +2562,22 @@
   return MinSignBits;
 }
 
+static unsigned ComputeNumSignBitsImpl(const Value *V,
+                                       const APInt &DemandedElts,
+                                       unsigned Depth, const Query &Q);
+
 static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
-                                       const Query &Q);
+                                       const Query &Q) {
+  Type *Ty = V->getType();
+  APInt DemandedElts = Ty->isVectorTy()
+                           ? APInt::getAllOnesValue(Ty->getVectorNumElements())
+                           : APInt(1, 1);
+  return ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
+}
 
-static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
-                                   const Query &Q) {
-  unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q);
+static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
+                                   unsigned Depth, const Query &Q) {
+  unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
   assert(Result > 0 && "At least one sign bit needs to be present!");
   return Result;
 }
@@ -2464,15 +2588,22 @@
 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
 /// other, so we return 3. For vectors, return the number of sign bits for the
 /// vector element with the minimum number of known sign bits.
-static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
-                                       const Query &Q) {
+static unsigned ComputeNumSignBitsImpl(const Value *V,
+                                       const APInt &DemandedElts,
+                                       unsigned Depth, const Query &Q) {
   assert(Depth <= MaxDepth && "Limit Search Depth");
 
   // We return the minimum number of sign bits that are guaranteed to be present
   // in V, so for undef we have to conservatively return 1.  We don't have the
   // same behavior for poison though -- that's a FIXME today.
 
-  Type *ScalarTy = V->getType()->getScalarType();
+  Type *Ty = V->getType();
+  assert(((Ty->isVectorTy() &&
+           Ty->getVectorNumElements() == DemandedElts.getBitWidth()) ||
+          (!Ty->isVectorTy() && DemandedElts == APInt(1, 1))) &&
+         "Unexpected vector size");
+
+  Type *ScalarTy = Ty->getScalarType();
   unsigned TyBits = ScalarTy->isPointerTy() ?
     Q.DL.getPointerTypeSizeInBits(ScalarTy) :
     Q.DL.getTypeSizeInBits(ScalarTy);
@@ -2705,7 +2836,9 @@
       // Collect the minimum number of sign bits that are shared by every vector
       // element referenced by the shuffle.
       auto *Shuf = cast<ShuffleVectorInst>(U);
-      int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
+      const Value *LHS = Shuf->getOperand(0);
+      const Value *RHS = Shuf->getOperand(1);
+      int NumElts = LHS->getType()->getVectorNumElements();
       int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
       APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
       for (int i = 0; i != NumMaskElts; ++i) {
@@ -2722,16 +2855,16 @@
       }
       Tmp = std::numeric_limits<unsigned>::max();
       if (!!DemandedLHS)
-        Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
+        Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
       if (!!DemandedRHS) {
-        Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
+        Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
         Tmp = std::min(Tmp, Tmp2);
       }
       // If we don't know anything, early out and try computeKnownBits
       // fall-back.
       if (Tmp == 1)
         break;
-      assert(Tmp <= V->getType()->getScalarSizeInBits() &&
+      assert(Tmp <= Ty->getScalarSizeInBits() &&
              "Failed to determine minimum sign bits");
       return Tmp;
     }
@@ -2743,11 +2876,12 @@
 
   // If we can examine all elements of a vector constant successfully, we're
   // done (we can't do any better than that). If not, keep trying.
-  if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits))
+  if (unsigned VecSignBits =
+          computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
     return VecSignBits;
 
   KnownBits Known(TyBits);
-  computeKnownBits(V, Known, Depth, Q);
+  computeKnownBits(V, DemandedElts, Known, Depth, Q);
 
   // If we know that the sign bit is either zero or one, determine the number of
   // identical bits in the top of the input value.
diff --git a/llvm/test/Transforms/InstCombine/nsw.ll b/llvm/test/Transforms/InstCombine/nsw.ll
--- a/llvm/test/Transforms/InstCombine/nsw.ll
+++ b/llvm/test/Transforms/InstCombine/nsw.ll
@@ -99,13 +99,11 @@
   ret i8 %add
 }
 
-; TODO: computeKnownBits() should look through a shufflevector.
-
 define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec(
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[T3:%.*]] = shl nsw <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
 ; CHECK-NEXT:    ret <3 x i32> [[T3]]
 ;
   %t2 = zext <2 x i8> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll
--- a/llvm/test/Transforms/InstCombine/shift-add.ll
+++ b/llvm/test/Transforms/InstCombine/shift-add.ll
@@ -78,8 +78,7 @@
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[D:%.*]] = add <4 x i32> [[C]], <i32 0, i32 1, i32 50, i32 16>
-; CHECK-NEXT:    [[E:%.*]] = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, [[D]]
+; CHECK-NEXT:    [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
@@ -95,8 +94,7 @@
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[D:%.*]] = add <4 x i32> [[C]], <i32 0, i32 1, i32 50, i32 16>
-; CHECK-NEXT:    [[E:%.*]] = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, [[D]]
+; CHECK-NEXT:    [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
@@ -112,8 +110,7 @@
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[D:%.*]] = add <4 x i32> [[C]], <i32 0, i32 1, i32 50, i32 16>
-; CHECK-NEXT:    [[E:%.*]] = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, [[D]]
+; CHECK-NEXT:    [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -87,7 +87,7 @@
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 3
@@ -275,7 +275,7 @@
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE22:%.*]] ]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[INDUCTION:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i64> [[INDUCTION]], <i64 257, i64 257, i64 257, i64 257>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -484,7 +484,7 @@
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[INDUCTION:%.*]] = add <8 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ENABLED_MASKED_STRIDED-NEXT:    [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i32> [[INDUCTION]], [[BROADCAST_SPLAT2]]
@@ -766,7 +766,7 @@
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[INDEX]], i32 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[INDUCTION:%.*]] = add <8 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; ENABLED_MASKED_STRIDED-NEXT:    [[INDUCTION:%.*]] = or <8 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i32> {{.*}}, {{.*}}