Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -5489,6 +5489,22 @@
    !0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0}
    !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1}
 
+'``llvm.loop.vectorize.ivdep.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata indicates to the vectorizer to ignore dependencies between
+memory accesses which have not been determined to be either safe or unsafe
+for vectorization. This differs from ``llvm.loop.parallel_access``, which
+considers no dependencies to be present between memory accesses belonging
+to the same access group. The first operand is the string
+``llvm.loop.vectorize.ivdep.enable`` and the second operand is a bit. A
+value of 1 implies that the functionality of this metadata is enabled for
+the loop.
+
+.. code-block:: llvm
+
+   !0 = !{!"llvm.loop.vectorize.ivdep.enable", i1 1}
+
 '``llvm.loop.vectorize.width``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
Index: llvm/include/llvm/Analysis/LoopAccessAnalysis.h
===================================================================
--- llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -201,7 +201,7 @@
   ///
   /// Only checks sets with elements in \p CheckDeps.
   bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
-                   const ValueToValueMap &Strides);
+                   const ValueToValueMap &Strides, bool UnknownDepHint);
 
   /// No memory dependence was encountered that would inhibit
   /// vectorization.
@@ -516,7 +516,8 @@
 class LoopAccessInfo {
 public:
   LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI,
-                 AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI);
+                 AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+                 bool UnknownDepHint = false);
 
   /// Return true we can analyze the memory accesses in the loop and there are
   /// no memory dependence cycles.
@@ -608,7 +609,8 @@
 private:
   /// Analyze the loop.
   void analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
-                   const TargetLibraryInfo *TLI, DominatorTree *DT);
+                   const TargetLibraryInfo *TLI, DominatorTree *DT,
+                   bool UnknownDepHint);
 
   /// Check if the structure of the loop allows it to be analyzed by this
   /// pass.
@@ -735,7 +737,7 @@
   /// Query the result of the loop access information for the loop \p L.
   ///
   /// If there is no cached result available run the analysis.
-  const LoopAccessInfo &getInfo(Loop *L);
+  const LoopAccessInfo &getInfo(Loop *L, bool UnknownDepHint = false);
 
   void releaseMemory() override {
     // Invalidate the cache when the pass is freed.
Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -44,7 +44,7 @@
 /// careful NOT to add them if the user hasn't specifically asked so.
 class LoopVectorizeHints {
   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
-                  HK_PREDICATE };
+                  HK_PREDICATE, HK_IVDEP };
 
   /// Hint - associates name and validation with the hint value.
   struct Hint {
@@ -73,6 +73,9 @@
   /// Vector Predicate
   Hint Predicate;
 
+  /// Ignore Vector dependencies
+  Hint Ivdep;
+
   /// Return the loop metadata prefix.
   static StringRef Prefix() { return "llvm.loop."; }
 
@@ -102,6 +105,7 @@
   unsigned getInterleave() const { return Interleave.Value; }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
   unsigned getPredicate() const { return Predicate.Value; }
+  unsigned getIvdep() const { return Ivdep.Value; }
   enum ForceKind getForce() const {
     if ((ForceKind)Force.Value == FK_Undefined &&
         hasDisableAllTransformsHint(TheLoop))
@@ -199,7 +203,7 @@
   LoopVectorizationLegality(
       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
       TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA,
-      Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
+      Function *F, std::function<const LoopAccessInfo &(Loop &, bool)> *GetLAA,
       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
       AssumptionCache *AC)
@@ -405,7 +409,7 @@
   DominatorTree *DT;
 
   // LoopAccess analysis.
-  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
+  std::function<const LoopAccessInfo &(Loop &, bool)> *GetLAA;
 
   // And the loop-accesses info corresponding to this loop.  This pointer is
   // null until canVectorizeMemory sets it up.
Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -138,7 +138,7 @@
   DemandedBits *DB;
   AliasAnalysis *AA;
   AssumptionCache *AC;
-  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
+  std::function<const LoopAccessInfo &(Loop &, bool)> *GetLAA;
   OptimizationRemarkEmitter *ORE;
   ProfileSummaryInfo *PSI;
 
@@ -149,7 +149,7 @@
                TargetTransformInfo &TTI_, DominatorTree &DT_,
                BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
                DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
-               std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
+               std::function<const LoopAccessInfo &(Loop &, bool)> &GetLAA_,
                OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
 
   bool processLoop(Loop *L);
Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp
===================================================================
--- llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1633,10 +1633,12 @@
 
 bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
                                    MemAccessInfoList &CheckDeps,
-                                   const ValueToValueMap &Strides) {
+                                   const ValueToValueMap &Strides,
+                                   bool UnknownDepHint) {
 
   MaxSafeDepDistBytes = -1;
   SmallPtrSet<MemAccessInfo, 8> Visited;
+  Status = VectorizationSafetyStatus::Safe;
   for (MemAccessInfo CurAccess : CheckDeps) {
     if (Visited.count(CurAccess))
       continue;
@@ -1678,7 +1680,13 @@
 
             Dependence::DepType Type =
                 isDependent(*A.first, A.second, *B.first, B.second, Strides);
-            mergeInStatus(Dependence::isSafeForVectorization(Type));
+            // Update safety status depending on whether the Dependence type
+            // is safe. If Unknown Dependence type is to be considered safe,
+            // do not update safety status.
+            if (!UnknownDepHint ||
+                !(Dependence::isSafeForVectorization(Type) ==
+                VectorizationSafetyStatus::PossiblySafeWithRtChecks))
+              mergeInStatus(Dependence::isSafeForVectorization(Type));
 
             // Gather dependences unless we accumulated MaxDependences
             // dependences.  In that case return as soon as we find the first
@@ -1788,7 +1796,8 @@
 
 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
                                  const TargetLibraryInfo *TLI,
-                                 DominatorTree *DT) {
+                                 DominatorTree *DT,
+                                 bool UnknownDepHint) {
   typedef SmallPtrSet<Value*, 16> ValueSet;
 
   // Holds the Load and Store instructions.
@@ -2022,7 +2031,8 @@
   if (Accesses.isDependencyCheckNeeded()) {
     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
     CanVecMem = DepChecker->areDepsSafe(
-        DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
+        DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides,
+        UnknownDepHint);
     MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
 
     if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
@@ -2343,7 +2353,8 @@
 
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                               DominatorTree *DT, LoopInfo *LI)
+                               DominatorTree *DT, LoopInfo *LI,
+                               bool UnknownDepHint)
     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
       PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
       DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
@@ -2351,7 +2362,7 @@
       HasConvergentOp(false),
       HasDependenceInvolvingLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
-    analyzeLoop(AA, LI, TLI, DT);
+    analyzeLoop(AA, LI, TLI, DT, UnknownDepHint);
 }
 
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
@@ -2397,11 +2408,13 @@
   PSE->print(OS, Depth);
 }
 
-const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
+const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L,
+                                                        bool UnknownDepHint) {
   auto &LAI = LoopAccessInfoMap[L];
 
   if (!LAI)
-    LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
+    LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI,
+                                           UnknownDepHint);
 
   return *LAI.get();
 }
Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -61,6 +61,8 @@
   case HK_ISVECTORIZED:
   case HK_PREDICATE:
     return (Val == 0 || Val == 1);
+  case HK_IVDEP:
+    return (Val == 1);
   }
   return false;
 }
@@ -72,7 +74,8 @@
       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
       Force("vectorize.enable", FK_Undefined, HK_FORCE),
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
-      Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
+      Predicate("vectorize.predicate.enable", 0, HK_PREDICATE),
+      Ivdep("vectorize.ivdep.enable", 0, HK_IVDEP), TheLoop(L),
       ORE(ORE) {
   // Populate values with existing loop metadata.
   getHintsFromMetadata();
@@ -224,7 +227,8 @@
     return;
   unsigned Val = C->getZExtValue();
 
-  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
+  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate,
+                   &Ivdep};
   for (auto H : Hints) {
     if (Name == H->Name) {
       if (H->validate(Val))
@@ -825,7 +829,7 @@
 }
 
 bool LoopVectorizationLegality::canVectorizeMemory() {
-  LAI = &(*GetLAA)(*TheLoop);
+  LAI = &(*GetLAA)(*TheLoop, Hints->getIvdep());
   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
   if (LAR) {
     ORE->emit([&]() {
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1610,8 +1610,10 @@
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
     auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
-    std::function<const LoopAccessInfo &(Loop &)> GetLAA =
-        [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
+    std::function<const LoopAccessInfo &(Loop &, bool)> GetLAA =
+        [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo &
+                                             { return LAA->
+                                               getInfo(&L, UnknownDepHint); };
 
     return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
                         GetLAA, *ORE, PSI);
@@ -7800,7 +7802,7 @@
     Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
     DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
     DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
-    std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
+    std::function<const LoopAccessInfo &(Loop &, bool)> &GetLAA_,
     OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
   SE = &SE_;
   LI = &LI_;
@@ -7879,8 +7881,8 @@
                           : nullptr;
 
     auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
-    std::function<const LoopAccessInfo &(Loop &)> GetLAA =
-        [&](Loop &L) -> const LoopAccessInfo & {
+    std::function<const LoopAccessInfo &(Loop &, bool)> GetLAA =
+        [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo & {
       LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
       return LAM.getResult<LoopAccessAnalysis>(L, AR);
     };
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-aliasing.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-aliasing.ll
@@ -0,0 +1,166 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; ModuleID = 'ivdeptest.c'
+source_filename = "ivdeptest.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1
+@.str.1 = private unnamed_addr constant [6 x i8] c"%d %d\00", align 1
+@.str.2 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32* @addLoops(i32* noalias %a, i32* noalias %b, i32 %LEN_1D) #0 {
+entry:
+  %a.addr = alloca i32*, align 8
+  %b.addr = alloca i32*, align 8
+  %LEN_1D.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32* %a, i32** %a.addr, align 8
+  store i32* %b, i32** %b.addr, align 8
+  store i32 %LEN_1D, i32* %LEN_1D.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+; CHECK: vector.ph:
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %LEN_1D.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+; CHECK: br label %vector.body
+
+; CHECK: vector.body:
+for.body:                                         ; preds = %for.cond
+  %2 = load i32*, i32** %a.addr, align 8
+  %3 = load i32*, i32** %b.addr, align 8
+  %4 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %4 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %3, i64 %idxprom
+  %5 = load i32, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %5 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %2, i64 %idxprom1
+  %6 = load i32, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %6, 1
+  store i32 %inc, i32* %arrayidx2, align 4
+  br label %for.inc
+; CHECK: br i1 %24, label %middle.block, label %vector.body, !llvm.loop !2
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32, i32* %i, align 4
+  %inc3 = add nsw i32 %7, 1
+  store i32 %inc3, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32*, i32** %a.addr, align 8
+  ret i32* %8
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %LEN_1D = alloca i32, align 4
+  %i = alloca i32, align 4
+  %a = alloca i32*, align 8
+  %b = alloca i32*, align 8
+  %c = alloca i32*, align 8
+  store i32 0, i32* %retval, align 4
+  %call = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %LEN_1D)
+  %0 = load i32, i32* %LEN_1D, align 4
+  %conv = sext i32 %0 to i64
+  %mul = mul i64 %conv, 4
+  %call1 = call noalias i8* @malloc(i64 %mul) #3
+  %1 = bitcast i8* %call1 to i32*
+  store i32* %1, i32** %a, align 8
+  %2 = load i32, i32* %LEN_1D, align 4
+  %conv2 = sext i32 %2 to i64
+  %mul3 = mul i64 %conv2, 4
+  %call4 = call noalias i8* @malloc(i64 %mul3) #3
+  %3 = bitcast i8* %call4 to i32*
+  store i32* %3, i32** %b, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %4 = load i32, i32* %i, align 4
+  %5 = load i32, i32* %LEN_1D, align 4
+  %cmp = icmp slt i32 %4, %5
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %6 = load i32*, i32** %a, align 8
+  %7 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %7 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %6, i64 %idxprom
+  %8 = load i32*, i32** %b, align 8
+  %9 = load i32, i32* %i, align 4
+  %idxprom6 = sext i32 %9 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %8, i64 %idxprom6
+  %call8 = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0), i32* %arrayidx, i32* %arrayidx7)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %10 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %11 = load i32*, i32** %a, align 8
+  %12 = load i32*, i32** %b, align 8
+  %13 = load i32, i32* %LEN_1D, align 4
+  %call9 = call i32* @addLoops(i32* %11, i32* %12, i32 %13)
+  store i32* %call9, i32** %c, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond10
+
+for.cond10:                                       ; preds = %for.inc17, %for.end
+  %14 = load i32, i32* %i, align 4
+  %15 = load i32, i32* %LEN_1D, align 4
+  %cmp11 = icmp slt i32 %14, %15
+  br i1 %cmp11, label %for.body13, label %for.end19
+
+for.body13:                                       ; preds = %for.cond10
+  %16 = load i32*, i32** %c, align 8
+  %17 = load i32, i32* %i, align 4
+  %idxprom14 = sext i32 %17 to i64
+  %arrayidx15 = getelementptr inbounds i32, i32* %16, i64 %idxprom14
+  %18 = load i32, i32* %arrayidx15, align 4
+  %call16 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i32 %18)
+  br label %for.inc17
+
+for.inc17:                                        ; preds = %for.body13
+  %19 = load i32, i32* %i, align 4
+  %inc18 = add nsw i32 %19, 1
+  store i32 %inc18, i32* %i, align 4
+  br label %for.cond10
+
+for.end19:                                        ; preds = %for.cond10
+  ret i32 0
+}
+
+declare dso_local i32 @__isoc99_scanf(i8*, ...) #1
+
+; Function Attrs: nounwind
+declare dso_local noalias i8* @malloc(i64) #2
+
+declare dso_local i32 @printf(i8*, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-NEXT: !4 = distinct !{!4, !5, !3}
+; CHECK-NEXT: !5 = !{!"llvm.loop.unroll.runtime.disable"}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-novec.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-novec.ll
@@ -0,0 +1,156 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; ModuleID = 'dep.c'
+; Should not vectorize
+source_filename = "dep.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1
+@.str.1 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32* @dep(i32* %A, i32 %n) #0 {
+entry:
+  %A.addr = alloca i32*, align 8
+  %n.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32* %A, i32** %A.addr, align 8
+  store i32 %n, i32* %n.addr, align 4
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %n.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32*, i32** %A.addr, align 8
+  %3 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %4 = load i32, i32* %arrayidx, align 4
+  %5 = load i32*, i32** %A.addr, align 8
+  %6 = load i32, i32* %i, align 4
+  %sub = sub nsw i32 %6, 1
+  %idxprom1 = sext i32 %sub to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %5, i64 %idxprom1
+  %7 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %4, %7
+  %8 = load i32*, i32** %A.addr, align 8
+  %9 = load i32, i32* %i, align 4
+  %idxprom3 = sext i32 %9 to i64
+  %arrayidx4 = getelementptr inbounds i32, i32* %8, i64 %idxprom3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %10 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  %11 = load i32*, i32** %A.addr, align 8
+  ret i32* %11
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main(i32 %argc, i8** %argv) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %n = alloca i32, align 4
+  %i = alloca i32, align 4
+  %A = alloca i32*, align 8
+  store i32 0, i32* %retval, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  store i8** %argv, i8*** %argv.addr, align 8
+  %call = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %n)
+  %0 = load i32, i32* %n, align 4
+  %conv = sext i32 %0 to i64
+  %mul = mul i64 %conv, 4
+  %call1 = call noalias i8* @malloc(i64 %mul) #3
+  %1 = bitcast i8* %call1 to i32*
+  store i32* %1, i32** %A, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %2 = load i32, i32* %i, align 4
+  %3 = load i32, i32* %n, align 4
+  %cmp = icmp slt i32 %2, %3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32*, i32** %A, align 8
+  %5 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %5 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom
+  %call3 = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %arrayidx)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %6 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %6, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %7 = load i32*, i32** %A, align 8
+  %8 = load i32, i32* %n, align 4
+  %call4 = call i32* @dep(i32* %7, i32 %8)
+  store i32* %call4, i32** %A, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc12, %for.end
+  %9 = load i32, i32* %i, align 4
+  %10 = load i32, i32* %n, align 4
+  %cmp6 = icmp slt i32 %9, %10
+  br i1 %cmp6, label %for.body8, label %for.end14
+
+for.body8:                                        ; preds = %for.cond5
+  %11 = load i32*, i32** %A, align 8
+  %12 = load i32, i32* %i, align 4
+  %idxprom9 = sext i32 %12 to i64
+  %arrayidx10 = getelementptr inbounds i32, i32* %11, i64 %idxprom9
+  %13 = load i32, i32* %arrayidx10, align 4
+  %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i32 %13)
+  br label %for.inc12
+
+for.inc12:                                        ; preds = %for.body8
+  %14 = load i32, i32* %i, align 4
+  %inc13 = add nsw i32 %14, 1
+  store i32 %inc13, i32* %i, align 4
+  br label %for.cond5
+
+for.end14:                                        ; preds = %for.cond5
+  ret i32 0
+}
+
+declare dso_local i32 @__isoc99_scanf(i8*, ...) #1
+
+; Function Attrs: nounwind
+declare dso_local noalias i8* @malloc(i64) #2
+
+declare dso_local i32 @printf(i8*, ...) #1
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3, !4}
+; CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+; CHECK-NEXT: !4 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-unkbounds.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-unkbounds.ll
@@ -0,0 +1,195 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; ModuleID = 'unkbounds.c'
+source_filename = "unkbounds.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@.str = private unnamed_addr constant [2 x i8] c"r\00", align 1
+@.str.1 = private unnamed_addr constant [3 x i8] c"%d\00", align 1
+@.str.2 = private unnamed_addr constant [14 x i8] c"Scanned n:%d\0A\00", align 1
+@.str.3 = private unnamed_addr constant [19 x i8] c"Invalid array size\00", align 1
+@.str.4 = private unnamed_addr constant [12 x i8] c"Scanned:%d\0A\00", align 1
+@.str.5 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32* @doublefirst20(i32* %A) #0 {
+; CHECK: entry:
+; CHECK: br label %vector.body
+
+entry:
+  %A.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store i32* %A, i32** %A.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: vector.body:
+for.body:                                         ; preds = %for.cond
+  %1 = load i32*, i32** %A.addr, align 8
+  %2 = load i32, i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
+  %mul = mul nsw i32 %2, %3
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds i32, i32* %1, i64 %idxprom
+  %4 = load i32, i32* %arrayidx, align 4
+  %mul1 = mul nsw i32 %4, 2
+  store i32 %mul1, i32* %arrayidx, align 4
+  br label %for.inc
+; CHECK: br i1 %23, label %for.end, label %vector.body, !llvm.loop !2
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32*, i32** %A.addr, align 8
+  ret i32* %6
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @main(i32 %argc, i8** %argv) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %n = alloca i32, align 4
+  %i = alloca i32, align 4
+  %fname = alloca i8*, align 8
+  %fptr = alloca %struct._IO_FILE*, align 8
+  %A = alloca i32*, align 8
+  store i32 0, i32* %retval, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  store i8** %argv, i8*** %argv.addr, align 8
+  %0 = load i8**, i8*** %argv.addr, align 8
+  %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1
+  %1 = load i8*, i8** %arrayidx, align 8
+  store i8* %1, i8** %fname, align 8
+  %2 = load i8*, i8** %fname, align 8
+  %call = call %struct._IO_FILE* @fopen(i8* %2, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0))
+  store %struct._IO_FILE* %call, %struct._IO_FILE** %fptr, align 8
+  %3 = load %struct._IO_FILE*, %struct._IO_FILE** %fptr, align 8
+  %cmp = icmp ne %struct._IO_FILE* %3, null
+  br i1 %cmp, label %if.then, label %if.end25
+
+if.then:                                          ; preds = %entry
+  %4 = load %struct._IO_FILE*, %struct._IO_FILE** %fptr, align 8
+  %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @__isoc99_fscanf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0), i32* %n)
+  %5 = load i32, i32* %n, align 4
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.2, i64 0, i64 0), i32 %5)
+  %6 = load i32, i32* %n, align 4
+  %cmp3 = icmp slt i32 %6, 4096
+  br i1 %cmp3, label %if.then4, label %if.end
+
+if.then4:                                         ; preds = %if.then
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.3, i64 0, i64 0))
+  store i32 0, i32* %retval, align 4
+  br label %if.end25
+
+if.end:                                           ; preds = %if.then
+  %7 = load i32, i32* %n, align 4
+  %conv = sext i32 %7 to i64
+  %mul = mul i64 %conv, 4
+  %call6 = call noalias i8* @malloc(i64 %mul) #3
+  %8 = bitcast i8* %call6 to i32*
+  store i32* %8, i32** %A, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %9 = load i32, i32* %i, align 4
+  %10 = load i32, i32* %n, align 4
+  %cmp7 = icmp slt i32 %9, %10
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %11 = load %struct._IO_FILE*, %struct._IO_FILE** %fptr, align 8
+  %12 = load i32*, i32** %A, align 8
+  %13 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %13 to i64
+  %arrayidx9 = getelementptr inbounds i32, i32* %12, i64 %idxprom
+  %call10 = call i32 (%struct._IO_FILE*, i8*, ...) @__isoc99_fscanf(%struct._IO_FILE* %11, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0), i32* %arrayidx9)
+  %14 = load i32*, i32** %A, align 8
+  %15 = load i32, i32* %i, align 4
+  %idxprom11 = sext i32 %15 to i64
+  %arrayidx12 = getelementptr inbounds i32, i32* %14, i64 %idxprom11
+  %16 = load i32, i32* %arrayidx12, align 4
+  %call13 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.4, i64 0, i64 0), i32 %16)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %17 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %17, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %18 = load i32*, i32** %A, align 8
+  %call14 = call i32* @doublefirst20(i32* %18)
+  store i32* %call14, i32** %A, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond15
+
+for.cond15:                                       ; preds = %for.inc22, %for.end
+  %19 = load i32, i32* %i, align 4
+  %20 = load i32, i32* %n, align 4
+  %cmp16 = icmp slt i32 %19, %20
+  br i1 %cmp16, label %for.body18, label %for.end24
+
+for.body18:                                       ; preds = %for.cond15
+  %21 = load i32*, i32** %A, align 8
+  %22 = load i32, i32* %i, align 4
+  %idxprom19 = sext i32 %22 to i64
+  %arrayidx20 = getelementptr inbounds i32, i32* %21, i64 %idxprom19
+  %23 = load i32, i32* %arrayidx20, align 4
+  %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.5, i64 0, i64 0), i32 %23)
+  br label %for.inc22
+
+for.inc22:                                        ; preds = %for.body18
+  %24 = load i32, i32* %i, align 4
+  %inc23 = add nsw i32 %24, 1
+  store i32 %inc23, i32* %i, align 4
+  br label %for.cond15
+
+for.end24:                                        ; preds = %for.cond15
+  store i32 0, i32* %retval, align 4
+  br label %if.end25
+
+if.end25:                                         ; preds = %if.then4, %for.end24, %entry
+  %25 = load i32, i32* %retval, align 4
+  ret i32 %25
+}
+
+declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #1
+
+declare dso_local i32 @__isoc99_fscanf(%struct._IO_FILE*, i8*, ...) #1
+
+declare dso_local i32 @printf(i8*, ...) #1
+
+; Function Attrs: nounwind
+declare dso_local noalias i8* @malloc(i64) #2
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-unkdep.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-unkdep.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; ModuleID = 'test.cpp'
+source_filename = "test.cpp"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1
+
+; Function Attrs: noinline norecurse uwtable
+define dso_local i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  %n = alloca i32, align 4
+  %r = alloca i32, align 4
+  %m = alloca i32, align 4
+  %k = alloca i32, align 4
+  %c = alloca i32, align 4
+  %saved_stack = alloca i8*, align 8
+  %__vla_expr0 = alloca i64, align 8
+  %i5 = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  store i32 1, i32* %r, align 4
+  %call = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %n)
+  %0 = load i32, i32* %n, align 4
+  %1 = zext i32 %0 to i64
+  %2 = call i8* @llvm.stacksave()
+  store i8* %2, i8** %saved_stack, align 8
+  %vla = alloca i32, i64 %1, align 16
+  store i64 %1, i64* %__vla_expr0, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %3 = load i32, i32* %i, align 4
+  %4 = load i32, i32* %n, align 4
+  %cmp = icmp slt i32 %3, %4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %5 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %5 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %vla, i64 %idxprom
+  %call1 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %arrayidx)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %6 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %6, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %call2 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %k)
+  %call3 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %m)
+  %call4 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %c)
+  store i32 0, i32* %i5, align 4
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.inc13, %for.end
+  %7 = load i32, i32* %i5, align 4
+  %8 = load i32, i32* %m, align 4
+  %cmp7 = icmp slt i32 %7, %8
+  br i1 %cmp7, label %for.body8, label %for.end15
+;CHECK: vector.ph
+;CHECK: br i1 %13, label %middle.block.unr-lcssa, label %vector.ph.new
+;CHECK: vector.ph.new:
+;CHECK: br label %vector.body
+
+for.body8:                                        ; preds = %for.cond6
+  %9 = load i32, i32* %i5, align 4
+  %10 = load i32, i32* %k, align 4
+  %add = add nsw i32 %9, %10
+  %idxprom9 = sext i32 %add to i64
+  %arrayidx10 = getelementptr inbounds i32, i32* %vla, i64 %idxprom9
+  %11 = load i32, i32* %arrayidx10, align 4
+  %12 = load i32, i32* %c, align 4
+  %mul = mul nsw i32 %11, %12
+  %13 = load i32, i32* %i5, align 4
+  %idxprom11 = sext i32 %13 to i64
+  %arrayidx12 = getelementptr inbounds i32, i32* %vla, i64 %idxprom11
+  store i32 %mul, i32* %arrayidx12, align 4
+  br label %for.inc13
+;CHECK:  br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !2
+
+for.inc13:                                        ; preds = %for.body8
+  %14 = load i32, i32* %i5, align 4
+  %inc14 = add nsw i32 %14, 1
+  store i32 %inc14, i32* %i5, align 4
+  br label %for.cond6, !llvm.loop !2
+
+for.end15:                                        ; preds = %for.cond6
+  store i32 0, i32* %retval, align 4
+  %15 = load i8*, i8** %saved_stack, align 8
+  call void @llvm.stackrestore(i8* %15)
+  %16 = load i32, i32* %retval, align 4
+  ret i32 %16
+}
+
+declare dso_local i32 @scanf(i8*, ...) #1
+
+; Function Attrs: nounwind
+declare i8* @llvm.stacksave() #2
+
+; Function Attrs: nounwind
+declare void @llvm.stackrestore(i8*) #2
+
+attributes #0 = { noinline norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-NEXT: !4 = distinct !{!4, !5, !3}
+; CHECK-NEXT: !5 = !{!"llvm.loop.unroll.runtime.disable"}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}