Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -5489,6 +5489,22 @@
    !0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0}
    !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1}
 
+'``llvm.loop.vectorize.ivdep.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata indicates to the vectorizer to ignore dependencies between
+memory accesses which have not been determined to be either safe or unsafe
+for vectorization. This differs from ``llvm.loop.parallel_access``, which
+considers no dependencies to be present between memory accesses belonging
+to the same access group. The first operand is the string
+``llvm.loop.vectorize.ivdep.enable`` and the second operand is a bit. A
+value of 1 implies that the functionality of this metadata is enabled for
+the loop.
+
+.. code-block:: llvm
+
+   !0 = !{!"llvm.loop.vectorize.ivdep.enable", i1 1}
+
 '``llvm.loop.vectorize.width``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
Index: llvm/include/llvm/Analysis/LoopAccessAnalysis.h
===================================================================
--- llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -201,7 +201,7 @@
   ///
   /// Only checks sets with elements in \p CheckDeps.
   bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
-                   const ValueToValueMap &Strides);
+                   const ValueToValueMap &Strides, bool UnknownDepHint);
 
   /// No memory dependence was encountered that would inhibit
   /// vectorization.
@@ -516,7 +516,8 @@
 class LoopAccessInfo {
 public:
   LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI,
-                 AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI);
+                 AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+                 bool UnknownDepHint = false);
 
   /// Return true we can analyze the memory accesses in the loop and there are
   /// no memory dependence cycles.
@@ -608,7 +609,8 @@
 private:
   /// Analyze the loop.
   void analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
-                   const TargetLibraryInfo *TLI, DominatorTree *DT);
+                   const TargetLibraryInfo *TLI, DominatorTree *DT,
+                   bool UnknownDepHint);
 
   /// Check if the structure of the loop allows it to be analyzed by this
   /// pass.
@@ -735,7 +737,7 @@
   /// Query the result of the loop access information for the loop \p L.
   ///
   /// If there is no cached result available run the analysis.
-  const LoopAccessInfo &getInfo(Loop *L);
+  const LoopAccessInfo &getInfo(Loop *L, bool UnknownDepHint = false);
 
   void releaseMemory() override {
     // Invalidate the cache when the pass is freed.
Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -44,7 +44,7 @@
 /// careful NOT to add them if the user hasn't specifically asked so.
 class LoopVectorizeHints {
   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
-                  HK_PREDICATE };
+                  HK_PREDICATE, HK_IVDEP };
 
   /// Hint - associates name and validation with the hint value.
   struct Hint {
@@ -73,6 +73,9 @@
   /// Vector Predicate
   Hint Predicate;
 
+  /// Ignore Vector dependencies
+  Hint Ivdep;
+
   /// Return the loop metadata prefix.
   static StringRef Prefix() { return "llvm.loop."; }
 
@@ -102,6 +105,7 @@
   unsigned getInterleave() const { return Interleave.Value; }
   unsigned getIsVectorized() const { return IsVectorized.Value; }
   unsigned getPredicate() const { return Predicate.Value; }
+  unsigned getIvdep() const { return Ivdep.Value; }
   enum ForceKind getForce() const {
     if ((ForceKind)Force.Value == FK_Undefined &&
         hasDisableAllTransformsHint(TheLoop))
@@ -199,7 +203,7 @@
   LoopVectorizationLegality(
       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
       TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA,
-      Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
+      Function *F, std::function<const LoopAccessInfo &(Loop &, bool)> *GetLAA,
       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
       AssumptionCache *AC)
@@ -405,7 +409,7 @@
   DominatorTree *DT;
 
   // LoopAccess analysis.
-  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
+  std::function<const LoopAccessInfo &(Loop &, bool)> *GetLAA;
 
   // And the loop-accesses info corresponding to this loop.  This pointer is
   // null until canVectorizeMemory sets it up.
Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
===================================================================
--- llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -138,7 +138,7 @@
   DemandedBits *DB;
   AliasAnalysis *AA;
   AssumptionCache *AC;
-  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
+  std::function<const LoopAccessInfo &(Loop &, bool)> *GetLAA;
   OptimizationRemarkEmitter *ORE;
   ProfileSummaryInfo *PSI;
 
@@ -149,7 +149,7 @@
                TargetTransformInfo &TTI_, DominatorTree &DT_,
                BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
                DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
-               std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
+               std::function<const LoopAccessInfo &(Loop &, bool)> &GetLAA_,
                OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
 
   bool processLoop(Loop *L);
Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp
===================================================================
--- llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1633,10 +1633,12 @@
 
 bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
                                    MemAccessInfoList &CheckDeps,
-                                   const ValueToValueMap &Strides) {
+                                   const ValueToValueMap &Strides,
+                                   bool UnknownDepHint) {
 
   MaxSafeDepDistBytes = -1;
   SmallPtrSet<MemAccessInfo, 8> Visited;
+  Status = VectorizationSafetyStatus::Safe;
   for (MemAccessInfo CurAccess : CheckDeps) {
     if (Visited.count(CurAccess))
       continue;
@@ -1678,7 +1680,13 @@
 
             Dependence::DepType Type =
                 isDependent(*A.first, A.second, *B.first, B.second, Strides);
-            mergeInStatus(Dependence::isSafeForVectorization(Type));
+            // Update safety status depending on whether the Dependence type
+            // is safe. If Unknown Dependence type is to be considered safe,
+            // do not update safety status.
+            if (!UnknownDepHint ||
+                !(Dependence::isSafeForVectorization(Type) ==
+                VectorizationSafetyStatus::PossiblySafeWithRtChecks))
+              mergeInStatus(Dependence::isSafeForVectorization(Type));
 
             // Gather dependences unless we accumulated MaxDependences
             // dependences.  In that case return as soon as we find the first
@@ -1788,7 +1796,8 @@
 
 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
                                  const TargetLibraryInfo *TLI,
-                                 DominatorTree *DT) {
+                                 DominatorTree *DT,
+                                 bool UnknownDepHint) {
   typedef SmallPtrSet<Value*, 16> ValueSet;
 
   // Holds the Load and Store instructions.
@@ -2022,7 +2031,8 @@
   if (Accesses.isDependencyCheckNeeded()) {
     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
     CanVecMem = DepChecker->areDepsSafe(
-        DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
+        DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides,
+        UnknownDepHint);
     MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
 
     if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
@@ -2343,7 +2353,8 @@
 
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                               DominatorTree *DT, LoopInfo *LI)
+                               DominatorTree *DT, LoopInfo *LI,
+                               bool UnknownDepHint)
     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
       PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
       DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
@@ -2351,7 +2362,7 @@
       HasConvergentOp(false),
       HasDependenceInvolvingLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
-    analyzeLoop(AA, LI, TLI, DT);
+    analyzeLoop(AA, LI, TLI, DT, UnknownDepHint);
 }
 
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
@@ -2397,11 +2408,13 @@
   PSE->print(OS, Depth);
 }
 
-const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
+const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L,
+                                                        bool UnknownDepHint) {
   auto &LAI = LoopAccessInfoMap[L];
 
   if (!LAI)
-    LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
+    LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI,
+                                           UnknownDepHint);
 
   return *LAI.get();
 }
Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -61,6 +61,8 @@
   case HK_ISVECTORIZED:
   case HK_PREDICATE:
     return (Val == 0 || Val == 1);
+  case HK_IVDEP:
+    return (Val == 1);
   }
   return false;
 }
@@ -72,7 +74,8 @@
       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
       Force("vectorize.enable", FK_Undefined, HK_FORCE),
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
-      Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
+      Predicate("vectorize.predicate.enable", 0, HK_PREDICATE),
+      Ivdep("vectorize.ivdep.enable", 0, HK_IVDEP), TheLoop(L),
       ORE(ORE) {
   // Populate values with existing loop metadata.
   getHintsFromMetadata();
@@ -224,7 +227,8 @@
     return;
   unsigned Val = C->getZExtValue();
 
-  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
+  Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate,
+                   &Ivdep};
   for (auto H : Hints) {
     if (Name == H->Name) {
       if (H->validate(Val))
@@ -825,7 +829,7 @@
 }
 
 bool LoopVectorizationLegality::canVectorizeMemory() {
-  LAI = &(*GetLAA)(*TheLoop);
+  LAI = &(*GetLAA)(*TheLoop, Hints->getIvdep());
   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
   if (LAR) {
     ORE->emit([&]() {
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1610,8 +1610,10 @@
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
     auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
-    std::function<const LoopAccessInfo &(Loop &)> GetLAA =
-        [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
+    std::function<const LoopAccessInfo &(Loop &, bool)> GetLAA =
+        [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo &
+                                             { return LAA->
+                                               getInfo(&L, UnknownDepHint); };
 
     return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
                         GetLAA, *ORE, PSI);
@@ -7800,7 +7802,7 @@
     Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
     DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
     DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
-    std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
+    std::function<const LoopAccessInfo &(Loop &, bool)> &GetLAA_,
     OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
   SE = &SE_;
   LI = &LI_;
@@ -7879,8 +7881,8 @@
                           : nullptr;
 
     auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
-    std::function<const LoopAccessInfo &(Loop &)> GetLAA =
-        [&](Loop &L) -> const LoopAccessInfo & {
+    std::function<const LoopAccessInfo &(Loop &, bool)> GetLAA =
+        [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo & {
       LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
       return LAM.getResult<LoopAccessAnalysis>(L, AR);
     };
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-alias.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-alias.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; IR generated for a function containing the loop:
+;   #pragma clang loop ivdep(enable)
+;   for (int i=0; i<LEN_1D; i++)
+;     a[b[i]]++
+; where LEN_1D is an integer constant.
+; The above is an unknown dependency as the vectorizer cannot determine if
+; vectorizing accesses to a[b[i]] will be safe or unsafe.
+; Check if loop has been vectorized when ivdep is present.
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32* @addLoops(i32* noalias %a, i32* noalias %b, i32 %LEN_1D) #0 {
+entry:
+  %a.addr = alloca i32*, align 8
+  %b.addr = alloca i32*, align 8
+  %LEN_1D.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32* %a, i32** %a.addr, align 8
+  store i32* %b, i32** %b.addr, align 8
+  store i32 %LEN_1D, i32* %LEN_1D.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+; CHECK: vector.ph:
+; CHECK-NEXT: %n.vec = and i64 %wide.trip.count, 4294967292
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %LEN_1D.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+; CHECK: br label %vector.body
+
+; CHECK: vector.body:
+for.body:                                         ; preds = %for.cond
+  %2 = load i32*, i32** %a.addr, align 8
+  %3 = load i32*, i32** %b.addr, align 8
+; CHECK: %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+; CHECK: %3 = extractelement <4 x i64> %2, i32 0
+  %4 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %4 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %3, i64 %idxprom
+  %5 = load i32, i32* %arrayidx, align 4
+; CHECK: %16 = insertelement <4 x i32> %15, i32 %12, i32 1
+; CHECK: %21 = extractelement <4 x i32> %19, i32 1
+  %idxprom1 = sext i32 %5 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %2, i64 %idxprom1
+  %6 = load i32, i32* %arrayidx2, align 4
+  %inc = add nsw i32 %6, 1
+  store i32 %inc, i32* %arrayidx2, align 4
+  br label %for.inc
+; CHECK: %24 = icmp eq i64 %index.next, %n.vec
+; CHECK: br i1 %24, label %middle.block, label %vector.body, !llvm.loop !2
+
+for.inc:                                          ; preds = %for.body
+  %7 = load i32, i32* %i, align 4
+  %inc3 = add nsw i32 %7, 1
+  store i32 %inc3, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  %8 = load i32*, i32** %a.addr, align 8
+  ret i32* %8
+}
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-NEXT: !4 = distinct !{!4, !5, !3}
+; CHECK-NEXT: !5 = !{!"llvm.loop.unroll.runtime.disable"}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-novec.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-novec.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; IR generated for a function containing the loop:
+;   #pragma clang loop ivdep(enable)
+;     for (i = 1; i < n; i++)
+;       A[i] = A[i] + A[i-1];
+; where n is an integer constants.
+; The above dependency can be determine by the vectorizer to be unsafe for
+; vectorization.
+; Should not vectorize even if ivdep is present.
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32* @calcDepArray(i32* %A, i32 %n) #0 {
+entry:
+  %A.addr = alloca i32*, align 8
+  %n.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32* %A, i32** %A.addr, align 8
+  store i32 %n, i32* %n.addr, align 4
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %n.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: for.body:
+for.body:                                         ; preds = %for.cond
+  %2 = load i32*, i32** %A.addr, align 8
+  %3 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %4 = load i32, i32* %arrayidx, align 4
+  %5 = load i32*, i32** %A.addr, align 8
+  %6 = load i32, i32* %i, align 4
+  %sub = sub nsw i32 %6, 1
+  %idxprom1 = sext i32 %sub to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %5, i64 %idxprom1
+  %7 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %4, %7
+  %8 = load i32*, i32** %A.addr, align 8
+  %9 = load i32, i32* %i, align 4
+  %idxprom3 = sext i32 %9 to i64
+  %arrayidx4 = getelementptr inbounds i32, i32* %8, i64 %idxprom3
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %10 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  %11 = load i32*, i32** %A.addr, align 8
+  ret i32* %11
+}
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3, !4}
+; CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+; CHECK-NEXT: !4 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-unkbounds.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-unkbounds.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; IR generated for a function containing the loop:
+;   #pragma clang loop ivdep(enable)
+;     for (i = 0; i < 64; i++)
+;       A[i*i] *= 2;
+; In the above example, the vectorizer cannot determine if
+; array accesses are within array bounds and is safe for vectorization.
+; Vectorizer regards it as an unknown dependency.
+; Check if loop has been vectorized when ivdep is present.
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32* @doubleArrayElements(i32* %A) #0 {
+entry:
+  %A.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store i32* %A, i32** %A.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+; CHECK: br label %vector.body
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 64
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: vector.body:
+for.body:                                         ; preds = %for.cond
+; CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %entry ], [ %vec.ind.next, %vector.body ] 
+; CHECK: %0 = mul <4 x i64> %vec.ind, %vec.ind
+; CHECK: %2 = extractelement <4 x i64> %1, i32 0
+  %1 = load i32*, i32** %A.addr, align 8
+  %2 = load i32, i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
+  %mul = mul nsw i32 %2, %3
+  %idxprom = sext i32 %mul to i64
+  %arrayidx = getelementptr inbounds i32, i32* %1, i64 %idxprom
+; CHECK: %15 = insertelement <4 x i32> %14, i32 %11, i32 1
+; CHECK: %21 = extractelement <4 x i32> %18, i32 2
+  %4 = load i32, i32* %arrayidx, align 4
+  %mul1 = mul nsw i32 %4, 2
+  store i32 %mul1, i32* %arrayidx, align 4
+; CHECK: %vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
+  br label %for.inc
+; CHECK: br i1 %23, label %for.end, label %vector.body, !llvm.loop !2
+
+for.inc:                                          ; preds = %for.body
+  %5 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  %6 = load i32*, i32** %A.addr, align 8
+  ret i32* %6
+}
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-unkdep.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/ivdep-unkdep.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+; IR generated for a function containing the loop:
+;    #pragma clang loop ivdep(enable)
+;    for (i = 0; i < m; i++){
+;      a[i] = a[i + k] * c;
+; where m, k, c are integer constants.
+; The above is an unknown dependency as the vectorizer cannot determine if
+; accesses are independent and a[i + k] is within
+; array bounds. It depends on value of k and dependence is not determined to
+; be safe or unsafe.
+; Check if the loop has been vectorized when ivdep is present.
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @calcArray(i32* %a, i32 %m, i32 %k, i32 %c) #0 {
+entry:
+  %a.addr = alloca i32*, align 8
+  %m.addr = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32* %a, i32** %a.addr, align 8
+  store i32 %m, i32* %m.addr, align 4
+  store i32 %k, i32* %k.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %m.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+; CHECK: vector.ph
+; CHECK: %n.vec = and i64 %wide.trip.count, 4294967288
+; CHECK: %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %c, i32 0
+; CHECK: %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK: br i1 %4, label %middle.block.unr-lcssa, label %vector.ph.new
+
+; CHECK: vector.ph.new:
+; CHECK: br label %vector.body
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32*, i32** %a.addr, align 8
+  %3 = load i32, i32* %i, align 4
+  %4 = load i32, i32* %k.addr, align 4
+  %add = add nsw i32 %3, %4
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %5 = load i32, i32* %arrayidx, align 4
+  %6 = load i32, i32* %c.addr, align 4
+; CHECK: %wide.load = load <4 x i32>, <4 x i32>* %7, align 4
+; CHECK: %10 = mul nsw <4 x i32> %wide.load, %broadcast.splat11
+; CHECK: store <4 x i32> %10, <4 x i32>* %13, align 4
+  %mul = mul nsw i32 %5, %6
+  %7 = load i32*, i32** %a.addr, align 8
+  %8 = load i32, i32* %i, align 4
+  %idxprom1 = sext i32 %8 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %7, i64 %idxprom1
+  store i32 %mul, i32* %arrayidx2, align 4
+  br label %for.inc
+; CHECK:  br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !2
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond, !llvm.loop !2
+
+for.end:                                          ; preds = %for.cond
+  ret i32 0
+}
+
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"}
+; CHECK: !2 = distinct !{!2, !3}
+; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-NEXT: !4 = distinct !{!4, !5, !3}
+; CHECK-NEXT: !5 = !{!"llvm.loop.unroll.runtime.disable"}
+!2 = distinct !{!2, !3, !4}
+!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}