Index: llvm/include/llvm/Analysis/ScalarEvolution.h
===================================================================
--- llvm/include/llvm/Analysis/ScalarEvolution.h
+++ llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -798,6 +798,12 @@
   /// evaluation type which can not result in overflow.
   const SCEV *getTripCountFromExitCount(const SCEV *ExitCount);
 
+  /// Returns the upper bound of the loop trip count infered from memory access.
+  /// Can not access bytes starting outside the statically allocated size
+  /// without being immediate UB.
+  /// Returns SCEVCouldNotCompute if the trip count could not be inferred.
+  const SCEV *getConstantMaxTripCountFromMemAccess(const Loop *L);
+
   /// Convert from an "exit count" (i.e. "backedge taken count") to a "trip
   /// count".  A "trip count" is the number of times the header of the loop
   /// will execute if an exit is taken after the specified number of backedges
Index: llvm/lib/Analysis/ScalarEvolution.cpp
===================================================================
--- llvm/lib/Analysis/ScalarEvolution.cpp
+++ llvm/lib/Analysis/ScalarEvolution.cpp
@@ -249,6 +249,11 @@
     cl::desc("Infer nuw/nsw flags using context where suitable"),
     cl::init(true));
 
+static cl::opt<bool> UseMemoryAccessUBForBEInference(
+    "scalar-evolution-infer-max-trip-count-from-memory-access", cl::Hidden,
+    cl::desc("Infer loop max trip count from memory access"),
+    cl::init(false));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -8086,6 +8091,161 @@
   return ((unsigned)ExitConst->getZExtValue()) + 1;
 }
 
+/// Collect Load/Store instructions that must be executed on each iteration
+/// inside loop \p L .
+static void
+collectExeLoadStoreInsideLoop(const Loop *L, DominatorTree &DT,
+                              SmallVector<Instruction *, 4> &MemInsts) {
+  // It is difficult to tell if the load/store instruction is executed on every
+  // iteration inside an irregular loop.
+  if (!L->isLoopSimplifyForm() || !L->isInnermost())
+    return;
+
+  // FIXME: To make the scene more typical, we only analysis loops that have
+  // one exiting block and that block must be the latch. To make it easier to
+  // capture loops that have memory access and memory access will be executed
+  // on each iteration.
+  const BasicBlock *LoopLatch = L->getLoopLatch();
+  assert(LoopLatch && "See defination of simplify form loop.");
+  if (L->getExitingBlock() != LoopLatch)
+    return;
+
+  for (auto *BB : L->getBlocks()) {
+    // Go here, we can know that Loop is a single exiting and simplified form
+    // loop. Make sure that infer from Memory Operation in those BBs must be
+    // executed in loop. First step, we can make sure that max execution time
+    // of MemAccessBB in loop represents latch max excution time.
+    // Such BB as below should be skipped:
+    //            Entry
+    //              │
+    //        ┌─────▼─────┐
+    //        │Loop Header◄─────┐
+    //        └──┬──────┬─┘     │
+    //           │      │       │
+    //  ┌────────▼──┐ ┌─▼─────┐ │
+    //  │MemAccessBB│ │OtherBB│ │
+    //  └────────┬──┘ └─┬─────┘ │
+    //           │      │       │
+    //         ┌─▼──────▼─┐     │
+    //         │Loop Latch├─────┘
+    //         └────┬─────┘
+    //              ▼
+    //             Exit
+    if (!DT.dominates(BB, LoopLatch))
+      continue;
+
+    for (Instruction &Inst : *BB) {
+      if (isa<LoadInst>(&Inst) || isa<StoreInst>(&Inst))
+        MemInsts.push_back(&Inst);
+    }
+  }
+}
+
+/// Returns true if memory access instruction \p I may be overlapping under
+/// recurrence expression \p Rec which comes from the pointer operand of \p I .
+static bool memReadWriteMaybeOverlapping(Instruction *I,
+                                         const SCEVAddRecExpr *Rec,
+                                         ScalarEvolution *SE) {
+  assert(isa<LoadInst>(I) || isa<StoreInst>(I));
+  const SCEV *AccessSize = SE->getElementSize(I);
+  const SCEV *Step = Rec->getStepRecurrence(*SE);
+  // The unknown situation is regarded as a possible overlap.
+  if (!AccessSize || !Step || !isa<SCEVConstant>(AccessSize) ||
+      !isa<SCEVConstant>(Step))
+    return true;
+
+  ConstantInt *StepC = cast<SCEVConstant>(Step)->getValue();
+  if (StepC->isZero() || StepC->isNegative() ||
+      StepC->getValue().getActiveBits() > 32)
+    return true;
+
+  ConstantInt *AcSizeC = cast<SCEVConstant>(AccessSize)->getValue();
+  // When the accessing size is greater than the step size, then overlap occurs.
+  if (AcSizeC->getValue().getZExtValue() > StepC->getValue().getZExtValue())
+    return true;
+
+  // Notice that pointer may be wraps arround. Treat that as overlap.
+  if (!Rec->hasNoSelfWrap())
+    return true;
+
+  return false;
+}
+
+/// Returns a SCEV representing the memory size of pointer \p V .
+/// TODO: Memory size of more types can be identified here.
+static const SCEV *getCertainSizeOfMem(const SCEV *V, Type *RTy,
+                                       const DataLayout &DL,
+                                       ScalarEvolution *SE) {
+  const SCEVUnknown *PtrBase = dyn_cast<SCEVUnknown>(V);
+  if (!PtrBase)
+    return nullptr;
+
+  // TODO: Memory which has certain size.
+  AllocaInst *AllocateInst = dyn_cast<AllocaInst>(PtrBase->getValue());
+  if (!AllocateInst)
+    return nullptr;
+
+  // Make sure only handle normal array.
+  auto *Ty = dyn_cast<ArrayType>(AllocateInst->getAllocatedType());
+  auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize());
+  if (!Ty || !ArrSize || !ArrSize->isOne())
+    return nullptr;
+
+  return SE->getConstant(RTy, DL.getTypeAllocSize(Ty));
+}
+
+const SCEV *
+ScalarEvolution::getConstantMaxTripCountFromMemAccess(const Loop *L) {
+  SmallVector<Instruction *, 4> MemInsts;
+  collectExeLoadStoreInsideLoop(L, DT, MemInsts);
+
+  // Collect AddRecExpr that meets the requirements and can be analyzed.
+  SmallPtrSet<const SCEVAddRecExpr *, 2> Exprs;
+  for (Instruction *I : MemInsts) {
+    Value *Ptr = getLoadStorePointerOperand(I);
+    assert(Ptr && "getLoadStorePointerOperand changed.");
+    auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(Ptr));
+    if (!AddRec || !AddRec->isAffine())
+      continue;
+
+    if (!memReadWriteMaybeOverlapping(I, AddRec, this))
+      Exprs.insert(AddRec);
+  }
+
+  const DataLayout &DL = getDataLayout();
+  SmallVector<const SCEV *> InferCountColl;
+  for (auto *Rec : Exprs) {
+    const SCEV *PtrBase = getPointerBase(Rec);
+    const SCEV *Step = Rec->getStepRecurrence(*this);
+    const SCEV *MemSize =
+        getCertainSizeOfMem(PtrBase, Step->getType(), DL, this);
+    if (!MemSize)
+      continue;
+
+    // Now we can infer a max execution time by MemLength/StepLength.
+    auto *MaxExeCount = dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step));
+    if (!MaxExeCount || MaxExeCount->getAPInt().getActiveBits() > 32)
+      continue;
+
+    // If the loop reaches the maximum number of executions, we can not
+    // access bytes starting outside the statically allocated size without
+    // being immediate UB. But it is allowed to enter loop header one more
+    // time.
+    auto *InferCount = dyn_cast<SCEVConstant>(
+        getAddExpr(MaxExeCount, getOne(MaxExeCount->getType())));
+    // Discard the maximum number of execution times under 32bits.
+    if (!InferCount || InferCount->getAPInt().getActiveBits() > 32)
+      continue;
+
+    InferCountColl.push_back(InferCount);
+  }
+
+  if (InferCountColl.empty())
+    return getCouldNotCompute();
+
+  return getUMinFromMismatchedTypes(InferCountColl);
+}
+
 unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
   auto *ExitCount = dyn_cast<SCEVConstant>(getBackedgeTakenCount(L, Exact));
   return getConstantTripCount(ExitCount);
@@ -8105,7 +8265,17 @@
 unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
   const auto *MaxExitCount =
       dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
-  return getConstantTripCount(MaxExitCount);
+  unsigned MaxExitCountN = getConstantTripCount(MaxExitCount);
+  if (UseMemoryAccessUBForBEInference) {
+    auto *MaxInferCount = getConstantMaxTripCountFromMemAccess(L);
+    if (auto *InferCount = dyn_cast<SCEVConstant>(MaxInferCount)) {
+      unsigned InferValue = (unsigned)(InferCount->getValue()->getZExtValue());
+      MaxExitCountN = (MaxExitCountN == 0)
+                          ? InferValue
+                          : std::min(MaxExitCountN, InferValue);
+    }
+  }
+  return MaxExitCountN;
 }
 
 unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
@@ -13443,6 +13613,17 @@
     OS << ": ";
     OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
   }
+
+  if (UseMemoryAccessUBForBEInference) {
+    unsigned SmallMaxTrip = SE->getSmallConstantMaxTripCount(L);
+    OS << "Loop ";
+    L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+    OS << ": ";
+    if (SmallMaxTrip)
+      OS << "Small constant max trip is " << SmallMaxTrip << "\n";
+    else
+      OS << "Small constant max trip couldn't be computed. " << "\n";
+  }
 }
 
 namespace llvm {
Index: llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/ScalarEvolution/infer-trip-count-idx-wrap.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 -scalar-evolution-infer-max-trip-count-from-memory-access 2>&1 | FileCheck %s
+
+define void @ComputeMaxTripCountFromArrayIdxWrap(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 257
+;
+entry:
+  %a = alloca [256 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i8 %iv to i64
+  %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i8 %iv, 1
+  %inc_zext = zext i8 %inc to i32
+  %cmp = icmp slt i32 %inc_zext, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromArrayIdxWrap2(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap2'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap2
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 128
+;
+entry:
+  %a = alloca [127 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i8 %iv to i64
+  %arrayidx = getelementptr inbounds [127 x i32], [127 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i8 %iv, 1
+  %inc_zext = zext i8 %inc to i32
+  %cmp = icmp slt i32 %inc_zext, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromArrayIdxWrap3(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayIdxWrap3'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayIdxWrap3
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 21
+;
+entry:
+  %a = alloca [20 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i8 %iv to i64
+  %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i8 %iv, 1
+  %inc_zext = zext i8 %inc to i32
+  %cmp = icmp slt i32 %inc_zext, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
Index: llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/ScalarEvolution/infer-trip-count.ll
@@ -0,0 +1,191 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 -scalar-evolution-infer-max-trip-count-from-memory-access 2>&1 | FileCheck %s
+
+define void @ComputeMaxTripCountFromArrayNormal(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayNormal'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayNormal
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 8
+;
+entry:
+  %a = alloca [7 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp = icmp slt i32 %inc, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+
+define void @ComputeMaxTripCountFromZeroArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromZeroArray'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromZeroArray
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 1
+;
+entry:
+  %a = alloca [0 x i32], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp = icmp slt i32 %inc, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @ComputeMaxTripCountFromExtremArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromExtremArray'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromExtremArray
+; CHECK-NEXT:  Loop %for.body: backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 2147483646
+; CHECK-NEXT:  Loop %for.body: symbolic max backedge-taken count is (-1 + %len)
+; CHECK-NEXT:  Loop %for.body: Predicated backedge-taken count is (-1 + %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.body: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.body: Small constant max trip is 2147483647
+;
+entry:
+  %a = alloca [4294967295 x i1], align 4
+  %cmp4 = icmp sgt i32 %len, 0
+  br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %idxprom = zext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [4294967295 x i1], [4294967295 x i1]* %a, i64 0, i64 %idxprom
+  store i1 0, i1* %arrayidx, align 4
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp = icmp slt i32 %inc, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+
+define void @ComputeMaxTripCountFromArrayInBranch(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromArrayInBranch'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromArrayInBranch
+; CHECK-NEXT:  Loop %for.cond: backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: constant max backedge-taken count is 2147483647
+; CHECK-NEXT:  Loop %for.cond: symbolic max backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: Predicated backedge-taken count is (0 smax %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.cond: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.cond: Small constant max trip is 2147483648
+;
+entry:
+  %a = alloca [8 x i32], align 4
+  br label %for.cond
+
+for.cond:
+  %iv = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %cmp = icmp slt i32 %iv, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  br label %for.end
+
+for.body:
+  %cmp1 = icmp slt i32 %iv, 8
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+  %idxprom = sext i32 %iv to i64
+  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %iv, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+define void @ComputeMaxTripCountFromMultiDemArray(i32 signext %len) {
+; CHECK-LABEL: 'ComputeMaxTripCountFromMultiDemArray'
+; CHECK-NEXT:  Determining loop execution counts for: @ComputeMaxTripCountFromMultiDemArray
+; CHECK-NEXT:  Loop %for.cond: backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: constant max backedge-taken count is 2147483647
+; CHECK-NEXT:  Loop %for.cond: symbolic max backedge-taken count is (0 smax %len)
+; CHECK-NEXT:  Loop %for.cond: Predicated backedge-taken count is (0 smax %len)
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %for.cond: Trip multiple is 1
+; CHECK-NEXT:  Loop %for.cond: Small constant max trip is 2147483648
+;
+entry:
+  %a = alloca [3 x [5 x i32]], align 4
+  br label %for.cond
+
+for.cond:
+  %iv = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %cmp = icmp slt i32 %iv, %len
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  br label %for.end
+
+for.body:
+  %arrayidx = getelementptr inbounds [3 x [5 x i32]], [3 x [5 x i32]]* %a, i64 0, i64 3
+  %idxprom = sext i32 %iv to i64
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %arrayidx, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %iv, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}