Index: include/llvm/Analysis/ScalarEvolution.h
===================================================================
--- include/llvm/Analysis/ScalarEvolution.h
+++ include/llvm/Analysis/ScalarEvolution.h
@@ -575,6 +575,15 @@
   Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
   createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI);
 
+  /// Check if \p PhiScev is an integer loop header phi for which PSCEV was
+  /// able to build the AddRecurrence \p AR under some predicates. If so, the
+  /// function returns in \p CastInsts the cast instructions that participate in
+  /// the induction update chain associated with this phi, and that can be
+  /// ignored under these predicates.
+  bool getCastsForInductionPHI(const SCEVUnknown *PhiScev,
+                               const SCEVAddRecExpr *AR,
+                               SmallVectorImpl<Instruction *> &CastInsts);
+
   /// Returns an expression for a GEP
   ///
   /// \p GEP The GEP. The indices contained in the GEP itself are ignored,
Index: include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- include/llvm/Transforms/Utils/LoopUtils.h
+++ include/llvm/Transforms/Utils/LoopUtils.h
@@ -306,10 +306,13 @@
   /// induction, the induction descriptor \p D will contain the data describing
   /// this induction. If by some other means the caller has a better SCEV
   /// expression for \p Phi than the one returned by the ScalarEvolution
-  /// analysis, it can be passed through \p Expr.
-  static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
-                             InductionDescriptor &D,
-                             const SCEV *Expr = nullptr);
+  /// analysis, it can be passed through \p Expr. If the def-use chain 
+  /// associated with the phi includes casts (that we know we can ignore
+  /// under proper runtime checks), they are passed through \p CastsToIgnore.
+  static bool 
+  isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
+                 InductionDescriptor &D, const SCEV *Expr = nullptr,
+                 SmallVectorImpl<Instruction *> *CastsToIgnore = nullptr);
 
   /// Returns true if \p Phi is a floating point induction in the loop \p L.
   /// If \p Phi is an induction, the induction descriptor \p D will contain 
@@ -350,10 +353,18 @@
       Instruction::BinaryOpsEnd;
   }
 
+  /// Returns a reference to the type cast instructions in the induction 
+  /// update chain, that are redundant when guarded with a runtime
+  /// SCEV overflow check.
+  const SmallVectorImpl<Instruction *> &getCastInsts() const { 
+    return RedundantCasts; 
+  }
+
 private:
   /// Private constructor - used by \c isInductionPHI.
   InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
-                      BinaryOperator *InductionBinOp = nullptr);
+                      BinaryOperator *InductionBinOp = nullptr,
+                      SmallVectorImpl<Instruction *> *Casts = nullptr);
 
   /// Start value.
   TrackingVH<Value> StartValue;
@@ -363,6 +374,9 @@
   const SCEV *Step = nullptr;
   // Instruction that advances induction variable.
   BinaryOperator *InductionBinOp = nullptr;
+  // Instructions used for type-casts of the induction variable,
+  // that are redundant when guarded with a runtime SCEV overflow check.
+  SmallVector<Instruction *, 2> RedundantCasts;
 };
 
 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -4342,6 +4342,184 @@
   return L;
 }
 
+/// Helper function for getCastsForInductionPHIImpl.
+/// Given a Value \p CastedPhi, and a PhiNode Value \p PN, 
+/// look for the following sequence:
+///    %Ext = shl %PN, m 
+///    %CastedPhi = ashr %Ext, m 
+/// If found, return true, and insert the intermediate casts in \p CastInsts.
+/// Otherwise, return false.
+bool isAshrShlInductionCastSequence(Value *CastedPhi, Value *PN,
+                                    const DataLayout &DL, 
+                                    SmallVectorImpl<Instruction *> &CastInsts) {
+
+  // 1) look for ashr instruction
+  auto *AshrInst = dyn_cast<Instruction>(CastedPhi);
+  if (!AshrInst || AshrInst->getOpcode() != Instruction::AShr)
+    return false;
+
+  Value *Ext = AshrInst->getOperand(0);
+  uint64_t ExtSrcBitWidth = DL.getTypeSizeInBits(Ext->getType());
+  ConstantInt *AshrAmt = dyn_cast<ConstantInt>(AshrInst->getOperand(1));
+  if (!AshrAmt || !AshrAmt->getValue().ult(ExtSrcBitWidth))
+    return false;
+
+  // 2) look for the shl instruction
+  auto *ShlInst = dyn_cast<Instruction>(Ext);
+  if (!ShlInst || ShlInst->getOpcode() != Instruction::Shl || 
+      !ShlInst->hasOneUse() || ShlInst->getOperand(0) != PN)
+    return false;
+
+  auto *ShlAmt = dyn_cast<ConstantInt>(ShlInst->getOperand(1));
+  if (!ShlAmt || ShlAmt->getZExtValue() != AshrAmt->getZExtValue())
+    return false;
+
+  // First insert the last instruction from the ExtTrunc cast sequence.
+  // This last instruction (is the only one which) may be used outside of the 
+  // ExtTrunc cast sequence, so it is placed in position 0 of CastInsts, 
+  // for special handing later on.
+  assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+  CastInsts.push_back(AshrInst);
+  CastInsts.push_back(ShlInst); 
+  return true;
+}
+
+/// Helper function for getCastsForInductionPHIImpl.
+/// Given a Value \p CastedPhi, and a PhiNode Value \p PN, 
+/// look for the following sequence:
+///    %CastedPhi = and i64 %PN, 2^n-1
+/// If found, return true, and insert the intermediate cast in \p CastInsts.
+/// Otherwise, return false.
+bool isAndInductionCastSequence(Value *CastedPhi, Value *PN, 
+                                const DataLayout &DL, 
+                                SmallVectorImpl<Instruction *> &CastInsts) {
+
+  auto *AndInst = dyn_cast<Instruction>(CastedPhi);
+  if (!AndInst || AndInst->getOpcode() != Instruction::And ||
+      AndInst->getOperand(0) != PN)
+    return false;
+
+  ConstantInt *Mask = dyn_cast<ConstantInt>(AndInst->getOperand(1));
+  if (!Mask || !Mask->getValue().isMask())
+    return false;
+
+  assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+  CastInsts.push_back(AndInst);
+  return true;
+}
+
+/// Look for the following IR sequence:
+/// %for.body:
+///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+///   %casted_phi = "ExtTrunc i64 %x"
+///   %add = add i64 %casted_phi, %step
+///
+/// where %x is given in \p PN, and where "ExtTrunc i64 %x" can take one of the
+/// following forms:
+/// a) %casted_phi = And i64 %x, 2^n-1
+/// e.g.:
+/// %for.body:
+///    %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+///    %casted_phi = And i64 %x, 255
+///    %add = add i64 %casted_phi, %step
+///
+/// b) %tmp = shl i64 %x, m 
+///    %casted_phi = ashr exact i64 %tmp, m 
+/// e.g.:
+/// %for.body:
+///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+///   %sext = shl i64 %x, 32
+///   %casted_phi = ashr exact i64 %sext, 32
+///   %add = add i64 %casted_phi, %step
+///
+/// If found, return true, and insert the intermediate casts in \p CastInsts.
+/// Otherwise, return false.
+bool getCastsForInductionPHIImpl(PHINode *PN, const Loop *L,
+                                 SmallVectorImpl<Instruction *> &CastInsts) {
+ 
+  // 1) Look for the add instruction that increments the induction via the 
+  // loop backedge. 
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+  Value *BEValueV = PN->getIncomingValueForBlock(Latch);
+  auto *IndUpdate = dyn_cast<Instruction>(BEValueV);
+  if (!IndUpdate || IndUpdate->getOpcode() != Instruction::Add)
+    return false;
+  Value *Op0 = IndUpdate->getOperand(0);
+  Value *Op1 = IndUpdate->getOperand(1);
+  Value *CastedPhi = nullptr;
+  if (Op0 == PN || Op1 == PN)
+    return false;
+  if (L->isLoopInvariant(Op0))
+    CastedPhi = Op1;
+  else if (L->isLoopInvariant(Op1))
+    CastedPhi = Op0;
+  else
+    return false;
+
+  // 2) Look for the ExtTrunc Sequence
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+  return (isAshrShlInductionCastSequence(CastedPhi, PN, DL, CastInsts) ||
+          isAndInductionCastSequence(CastedPhi, PN, DL, CastInsts));
+}
+
+/// If PredicatedSCEVRewrites contains an entry that maps \p PhiScev to \p AR 
+/// under a runtime predicate, then we know the following:
+/// (1) The value of the phi at iteration i is:
+///      (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
+/// (2) Under the runtime predicate, the above expression is equal to:
+///       Start + i*Step 
+/// where Step is a loop invariant.
+///
+/// We want to find the cast instructions that are involved in the 
+/// update-chain of this induction. A caller that adds the required runtime
+/// predicate can be free to drop these cast instructions, and compute
+/// the phi using (2) above instead of (1). 
+///
+/// We look for the following sequence:
+///   x1 = phi (x0, x_next)
+///   x2 = ExtTrunc (x1)
+///   x_next = add (x2, Step)
+///
+/// Where ExtTrunc is the IR sequence that resulted in the SCEV "sext(trunc("
+/// or "zext(trunc" expression. This can be one of several patterns; We look 
+/// for one of the following two patterns:
+///   ExtTrunc1: 
+///     Cast:  %x2 = and  %x1, 2^n-1
+///   ExtTrunc2: 
+///     Cast:  %t = shl %x1, m
+///     Cast:  %x2 = ashr %t, m
+///       
+/// If we are able to find this sequence, we return the "Cast:" instructions 
+/// from the pattern we found.
+/// (TODO: Check for more IR induction patterns that can result in the SCEV 
+/// expression in (1) above.) 
+bool ScalarEvolution::getCastsForInductionPHI(
+    const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR,
+    SmallVectorImpl<Instruction *> &CastInsts) {
+
+  auto *PN = cast<PHINode>(PhiScev->getValue());
+  const Loop *L = isIntegerLoopHeaderPHI(PN, LI);
+  if (!L)
+    return false;
+
+  auto I = PredicatedSCEVRewrites.find({PhiScev, L});
+  if (I == PredicatedSCEVRewrites.end())
+    return false;
+  std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite =
+     I->second;
+  assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec");
+  if (Rewrite.first != AR)
+    return false;
+
+  // PhiScev was found in PredicatedSCEVRewrites, and it is mapped to \p AR
+  // under some runtime tests. Find the cast instructions that participate in
+  // the def-use chain of PhiScev in the loop. 
+  //
+  return getCastsForInductionPHIImpl(PN, L, CastInsts); 
+}
+
 // Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the
 // computation that updates the phi follows the following pattern:
 //   (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum
Index: lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- lib/Transforms/Utils/LoopUtils.cpp
+++ lib/Transforms/Utils/LoopUtils.cpp
@@ -678,7 +678,8 @@
 }
 
 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
-                                         const SCEV *Step, BinaryOperator *BOp)
+                                         const SCEV *Step, BinaryOperator *BOp,
+                                         SmallVectorImpl<Instruction *> *Casts)
   : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
   assert(IK != IK_NoInduction && "Not an induction");
 
@@ -705,6 +706,12 @@
           (InductionBinOp->getOpcode() == Instruction::FAdd ||
            InductionBinOp->getOpcode() == Instruction::FSub))) &&
          "Binary opcode should be specified for FP induction");
+
+  if (Casts) {
+    for (auto &Inst : *Casts) {
+      RedundantCasts.push_back(Inst);
+    }
+  }
 }
 
 int InductionDescriptor::getConsecutiveDirection() const {
@@ -870,13 +877,27 @@
     return false;
   }
 
-  return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
+  // Record any Cast instructions that participate in the induction update
+  SmallVector<Instruction *, 2> *CastInsts = nullptr;
+  const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
+  // If we started from an UnknownSCEV, and managed to build an addRecurrence
+  // only after enabling Assume with PSCEV, this means we may have encountered 
+  // cast instructions that required adding a runtime check in order to 
+  // guarantee the correctness of the AddRecurence respresentation of the 
+  // induction. 
+  if (PhiScev != AR && SymbolicPhi) {  
+    SmallVector<Instruction *, 2> Casts; 
+    if (PSE.getSE()->getCastsForInductionPHI(SymbolicPhi, AR, Casts))
+      CastInsts = &Casts;
+  }
+
+  return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, CastInsts);
 }
 
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
-                                         ScalarEvolution *SE,
-                                         InductionDescriptor &D,
-                                         const SCEV *Expr) {
+bool InductionDescriptor::isInductionPHI(
+    PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
+    InductionDescriptor &D, const SCEV *Expr,
+    SmallVectorImpl<Instruction *> *CastsToIgnore) {
   Type *PhiTy = Phi->getType();
   // We only handle integer and pointer inductions variables.
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@@ -908,7 +929,8 @@
     return false;
 
   if (PhiTy->isIntegerTy()) {
-    D = InductionDescriptor(StartValue, IK_IntInduction, Step);
+    D = InductionDescriptor(StartValue, IK_IntInduction, Step, nullptr, 
+                            CastsToIgnore);
     return true;
   }
 
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -604,6 +604,20 @@
   /// Returns true if we should generate a scalar version of \p IV.
   bool needsScalarInduction(Instruction *IV) const;
 
+  /// If there is a cast involved in the induction variable \p ID, which should 
+  /// be ignored in the vectorized loop body, this function records the 
+  /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the 
+  /// cast. We had already proved that the casted Phi is equal to the uncasted 
+  /// Phi in the vectorized loop (under a runtime guard), and therefore 
+  /// there is no need to vectorize the cast - the same value can be used in the 
+  /// vector loop for both the Phi and the cast. 
+  /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
+  /// Otherwise, \p VectorLoopValue is a widened/vectorized value.
+  void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID,
+                                              unsigned Part, 
+                                              Value *VectorLoopValue, 
+                                              unsigned Lane = UINT_MAX);
+
   /// Generate a shuffle sequence that will reverse the vector Vec.
   virtual Value *reverseVector(Value *Vec);
 
@@ -1572,6 +1586,11 @@
   /// Returns True if V is an induction variable in this loop.
   bool isInductionVariable(const Value *V);
 
+  /// Returns True if V is a casted induction variable in this loop, that had
+  /// been proven to be redundant (possible under runtime guards) and can be
+  /// ignored when creating the vectorized loop body.
+  bool isCastedInductionVariable(Value *V);
+
   /// Returns True if PN is a reduction variable in this loop.
   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
 
@@ -1780,6 +1799,12 @@
   /// variables can be pointers.
   InductionList Inductions;
 
+  /// Holds all the casts that participate in the update chain of the induction 
+  /// variables, and that have been proven to be redundant (possibly under a 
+  /// runtime guard). These casts can be ignored when creating the vectorized 
+  /// loop body.
+  SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
+
   /// Holds the phi nodes that are first-order recurrences.
   RecurrenceSet FirstOrderRecurrences;
 
@@ -2549,6 +2574,7 @@
   Instruction *LastInduction = VecInd;
   for (unsigned Part = 0; Part < UF; ++Part) {
     VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
+    recordVectorLoopValueForInductionCast(II, Part, LastInduction);
     if (isa<TruncInst>(EntryVal))
       addMetadata(LastInduction, EntryVal);
     LastInduction = cast<Instruction>(addFastMathFlag(
@@ -2582,6 +2608,22 @@
   return llvm::any_of(IV->users(), isScalarInst);
 }
 
+void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
+    const InductionDescriptor &ID, unsigned Part, Value *VectorLoopVal,
+    unsigned Lane) {
+  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
+  if (!Casts.empty()) {
+    // Only the first Cast instruction in the Casts vector is of interest.
+    // The rest of the Casts (if exist) have no uses outside the
+    // induction update chain itself.
+    Instruction *CastInst = *Casts.begin();
+    if (Lane < UINT_MAX)
+      VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal);
+    else
+      VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal);
+  }
+}
+
 void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
   assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
          "Primary induction variable must have an integer type");
@@ -2662,6 +2704,7 @@
       Value *EntryPart =
           getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
       VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
+      recordVectorLoopValueForInductionCast(ID, Part, EntryPart);
       if (Trunc)
         addMetadata(EntryPart, Trunc);
     }
@@ -2769,6 +2812,7 @@
       auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
       auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
       VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
+      recordVectorLoopValueForInductionCast(ID, Part, Add, Lane);
     }
   }
 }
@@ -5221,6 +5265,16 @@
     PHINode *Phi, const InductionDescriptor &ID,
     SmallPtrSetImpl<Value *> &AllowedExit) {
   Inductions[Phi] = ID;
+
+  // In case this induction also comes with casts that we know we can ignore
+  // in the vectorized loop body, record them here. Only one of these casts
+  // (the first in Casts, the last in the actual IR sequence) is needed here,
+  // because any other casts, if exist, are not used outside the cast sequence,
+  // and will therefore will not be queried.
+  const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
+  if (!Casts.empty())
+    InductionCastsToIgnore.insert(*Casts.begin());
+
   Type *PhiTy = Phi->getType();
   const DataLayout &DL = Phi->getModule()->getDataLayout();
 
@@ -5859,6 +5913,14 @@
   return Inductions.count(PN);
 }
 
+bool LoopVectorizationLegality::isCastedInductionVariable(Value *V) {
+  auto *Inst = dyn_cast<Instruction>(V);
+  if (!Inst)
+    return false;
+
+  return InductionCastsToIgnore.count(Inst);
+}
+
 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
   return FirstOrderRecurrences.count(Phi);
 }
@@ -6932,6 +6994,9 @@
       if (ValuesToIgnore.count(&I))
         continue;
 
+      if (VF > 1 && VecValuesToIgnore.count(&I))
+        continue;
+
       VectorizationCostTy C = getInstructionCost(&I, VF);
 
       // Check if we should override the cost.
@@ -6968,24 +7033,27 @@
 static const SCEV *getAddressAccessSCEV(
               Value *Ptr,
               LoopVectorizationLegality *Legal,
-              ScalarEvolution *SE,
+              PredicatedScalarEvolution &PSE,
               const Loop *TheLoop) {
+
   auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
   if (!Gep)
     return nullptr;
 
   // We are looking for a gep with all loop invariant indices except for one
   // which should be an induction variable.
+  auto SE = PSE.getSE();
   unsigned NumOperands = Gep->getNumOperands();
   for (unsigned i = 1; i < NumOperands; ++i) {
     Value *Opd = Gep->getOperand(i);
     if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
-        !Legal->isInductionVariable(Opd))
+        !Legal->isInductionVariable(Opd) &&
+        !Legal->isCastedInductionVariable(Opd))
       return nullptr;
   }
 
   // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
-  return SE->getSCEV(Ptr);
+  return PSE.getSCEV(Ptr);
 }
 
 static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -7005,7 +7073,7 @@
 
   // Figure out whether the access is strided and get the stride value
   // if it's known in compile time
-  const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
+  const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);
 
   // Get the cost of the scalar memory instruction and address computation.
   unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
@@ -7559,6 +7627,13 @@
     SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
     VecValuesToIgnore.insert(Casts.begin(), Casts.end());
   }
+  // Ignore type-casting instructions we identified during induction
+  // detection.
+  for (auto &Induction : *Legal->getInductionVars()) {
+    InductionDescriptor &IndDes = Induction.second;
+    const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
+    VecValuesToIgnore.insert(Casts.begin(), Casts.end());
+  }
 }
 
 LoopVectorizationCostModel::VectorizationFactor
@@ -7661,6 +7736,18 @@
           return U == Ind || DeadInstructions.count(cast<Instruction>(U));
         }))
       DeadInstructions.insert(IndUpdate);
+
+    // We record as "Dead" also the type-casting instructions we had identified 
+    // during induction analysis. We don't need any handling for them in the
+    // vectorized loop because we have proven that, under a proper runtime 
+    // test guarding the vectorized loop, the value of the phi, and the casted 
+    // value of the phi, are the same. The last instruction in this casting chain
+    // will get its scalar/vector/widened def from the scalar/vector/widened def 
+    // of the respective phi node. Any other casts in the induction def-use chain
+    // have no other uses outside the phi update chain, and will be ignored.
+    InductionDescriptor &IndDes = Induction.second;
+    const SmallVectorImpl<Instruction *> &Casts = IndDes.getCastInsts();
+    DeadInstructions.insert(Casts.begin(), Casts.end());
   }
 }
 
Index: test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
@@ -0,0 +1,211 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
+; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Given a loop with an induction variable which is being
+; truncated/extended using casts that had been proven to
+; be redundant under a runtime test, we want to make sure
+; that these casts, do not get vectorized/scalarized/widened. 
+; This is the case for inductions whose SCEV expression is
+; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
+; can be a result of the IR sequences we check below.
+; 
+; See also pr30654.
+;
+
+; Case1: Check the following induction pattern:
+;
+;  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+;  %sext = shl i32 %p.09, 24
+;  %conv = ashr exact i32 %sext, 24
+;  %add = add nsw i32 %conv, %step
+; 
+; This is the case in the following code:
+;
+; void doit1(int n, int step) {
+;   int i;
+;   char p = 0;
+;   for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + step;
+;   }
+; }
+;
+; The "ExtTrunc" IR sequence here is:
+;  "%sext = shl i32 %p.09, 24"
+;  "%conv = ashr exact i32 %sext, 24"
+; We check that it does not appear in the vector loop body, whether
+; we vectorize or scalarize the induction.
+; In the case of widened induction, this means that the induction phi
+; is directly used, without shl/ashr on the way.
+
+; VF8-LABEL: @doit1
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i32>
+; VF8: store <8 x i32> %vec.ind
+; VF8: middle.block:            
+
+; VF1-LABEL: @doit1
+; VF1: vector.body:
+; VF1-NOT: %[[TEST:[a-zA-Z0-9.]+]] = shl i32
+; VF1: middle.block:            
+
+@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
+
+define void @doit1(i32 %n, i32 %step) {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %sext = shl i32 %p.09, 24
+  %conv = ashr exact i32 %sext, 24
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv, %step
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+
+; Case2: Another variant of the above pattern is where the induction variable
+; is used only for address compuation (i.e. it is a GEP index) and therefore
+; the induction is not vectorized but rather only the step is widened. 
+;
+; This is the case in the following code, where the induction variable 'w_ix' 
+; is only used to access the array 'in':
+;
+; void doit2(int *in, int *out, size_t size, size_t step)
+; {
+;    int w_ix = 0;
+;    for (size_t offset = 0; offset < size; ++offset)
+;     {
+;        int w = in[w_ix];
+;        out[offset] = w;
+;        w_ix += step;
+;     }
+; }
+;
+; The "ExtTrunc" IR sequence here is similar to the previous case:
+;  "%sext = shl i64 %w_ix.012, 32
+;  %idxprom = ashr exact i64 %sext, 32"
+; We check that it does not appear in the vector loop body, whether
+; we widen or scalarize the induction.
+; In the case of widened induction, this means that the induction phi
+; is directly used, without shl/ashr on the way.
+
+; VF8-LABEL: @doit2
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i64> 
+; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind
+; VF8: middle.block:
+
+; VF1-LABEL: @doit2
+; VF1: vector.body:
+; VF1-NOT: %[[TEST:[a-zA-Z0-9.]+]] = shl i64
+; VF1: middle.block:
+;
+
+define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step)  {
+entry:
+  %cmp9 = icmp eq i64 %size, 0
+  br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %sext = shl i64 %w_ix.011, 32
+  %idxprom = ashr exact i64 %sext, 32
+  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
+  store i32 %0, i32* %arrayidx1, align 4
+  %add = add i64 %idxprom, %step
+  %inc = add nuw i64 %offset.010, 1
+  %exitcond = icmp eq i64 %inc, %size
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Case3: Lastly, check also the following induction pattern:
+; 
+;  %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
+;  %conv = and i32 %p.09, 255
+;  %add = add nsw i32 %conv, %step
+; 
+; This is the case in the following code:
+;
+; int a[N];
+; void doit3(int n, int step) {
+;   int i;
+;   unsigned char p = 0;
+;   for (i = 0; i < n; i++) {
+;      a[i] = p;
+;      p = p + step;
+;   }
+; }
+; 
+; The "ExtTrunc" IR sequence here is:
+;  "%conv = and i32 %p.09, 255".
+; We check that it does not appear in the vector loop body, whether
+; we vectorize or scalarize the induction.
+
+; VF8-LABEL: @doit3
+; VF8: vector.body:
+; VF8: %vec.ind = phi <8 x i32>
+; VF8: store <8 x i32> %vec.ind
+; VF8: middle.block:            
+
+; VF1-LABEL: @doit3
+; VF1: vector.body:
+; VF1-NOT: %{{.*}} = and i32 
+; VF1: middle.block:            
+
+define void @doit3(i32 %n, i32 %step) {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %conv = and i32 %p.09, 255
+  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+  store i32 %conv, i32* %arrayidx, align 4
+  %add = add nsw i32 %conv, %step
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}