Index: include/llvm/InitializePasses.h
===================================================================
--- include/llvm/InitializePasses.h
+++ include/llvm/InitializePasses.h
@@ -175,6 +175,7 @@
 void initializeLoopRerollPass(PassRegistry&);
 void initializeLoopUnrollPass(PassRegistry&);
 void initializeLoopUnswitchPass(PassRegistry&);
+void initializeLoopVersioningLICMPass(PassRegistry&);
 void initializeLoopIdiomRecognizePass(PassRegistry&);
 void initializeLowerAtomicPass(PassRegistry&);
 void initializeLowerBitSetsPass(PassRegistry&);
Index: include/llvm/LinkAllPasses.h
===================================================================
--- include/llvm/LinkAllPasses.h
+++ include/llvm/LinkAllPasses.h
@@ -104,6 +104,7 @@
       (void) llvm::createLoopRerollPass();
       (void) llvm::createLoopUnrollPass();
       (void) llvm::createLoopUnswitchPass();
+      (void) llvm::createLoopVersioningLICMPass();
       (void) llvm::createLoopIdiomPass();
       (void) llvm::createLoopRotatePass();
       (void) llvm::createLowerExpectIntrinsicPass();
Index: include/llvm/Transforms/Scalar.h
===================================================================
--- include/llvm/Transforms/Scalar.h
+++ include/llvm/Transforms/Scalar.h
@@ -140,6 +140,12 @@
 
 //===----------------------------------------------------------------------===//
 //
+// LoopVersioningLICM - This pass is a loop multi versioning pass for LICM.
+//
+Pass *createLoopVersioningLICMPass();
+
+//===----------------------------------------------------------------------===//
+//
 // LoopInterchange - This pass interchanges loops to provide a more
 // cache-friendly memory access patterns.
 //
Index: lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- lib/Transforms/IPO/PassManagerBuilder.cpp
+++ lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -55,6 +55,10 @@
   cl::init(true), cl::Hidden,
   cl::desc("Enable the new, experimental SROA pass"));
 
+static cl::opt<bool> UseLoopVersioningLICM("loop-versioning-licm",
+  cl::init(false), cl::Hidden,
+  cl::desc("Enable the new, experimental Loop Versioning LICM pass"));
+
 static cl::opt<bool>
 RunLoopRerolling("reroll-loops", cl::Hidden,
                  cl::desc("Run the loop rerolling pass"));
@@ -244,6 +248,11 @@
   MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
   MPM.add(createInstructionCombiningPass());
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
+  // Do loop versioning if right option provided, default disabled.
+  if (UseLoopVersioningLICM) {
+    MPM.add(createLoopVersioningLICMPass());        // Do Loop Versioning
+    MPM.add(createLICMPass());                  // Hoist loop invariants
+  }
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
   if (EnableLoopInterchange)
Index: lib/Transforms/Scalar/CMakeLists.txt
===================================================================
--- lib/Transforms/Scalar/CMakeLists.txt
+++ lib/Transforms/Scalar/CMakeLists.txt
@@ -25,6 +25,7 @@
   LoopStrengthReduce.cpp
   LoopUnrollPass.cpp
   LoopUnswitch.cpp
+  LoopVersioningLICM.cpp
   LowerAtomic.cpp
   LowerExpectIntrinsic.cpp
   MemCpyOptimizer.cpp
Index: lib/Transforms/Scalar/LoopVersioningLICM.cpp
===================================================================
--- lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -0,0 +1,912 @@
+//=-- LoopVersioningLICM.cpp - Creates aggressive alias version of a loop --=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// Cases where memory aliasing wasn't sure compiler became conservative and
+// didn't proceeds with invariant code motion. This results in possible
+// missed optimization opportunities.
+//
+// Our main motivation is to exploit such cases and allow LICM optimization.
+//
+// Most of the time when alias analysis is unsure about memory access and it
+// assumes may-alias. This un surety from alias analysis restrict LICM to
+// proceed further. Cases where alias analysis is unsure we like to use loop
+// versioning as an alternative.
+//
+// Loop Versioning will creates version of the loop with aggressive alias and
+// the other with conservative (default) alias. Aggressive alias version of
+// loop will have all the memory access marked as no-alias. These two version
+// of loop will be preceded by a memory runtime check.
+// This runtime check consists of bound checks for all unique memory accessed
+// in loop, and it ensures aliasing of memory. Based on this check result at
+// runtime any of the loops gets executed, if memory is non aliased then
+// aggressive aliasing loop gets executed, else when memory is aliased then non
+// aggressive aliased version gets executed.
+//
+// Following are the top level steps:
+//
+// a) Perform loop versioning feasibility check.
+// b) If loop is a candidate for versioning then create a memory bound check,
+//    by considering all the memory access in loop body.
+// c) Clone original loop and set all memory access as no-alias in new loop.
+// d) Set original loop & versioned loop as a branch target of runtime check
+//    result.
+// e) Call LICM on aggressive alias versioned of loop.
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/IR/PredIteratorCache.h"
+using namespace llvm;
+#include <iostream>
+
+#define DEBUG_TYPE "loop-versioning-licm"
+#define ORIGINAL_LOOP_METADATA "LoopVersioningLICMOriginalLoop"
+#define VERSION_LOOP_METADATA "LoopVersioningLICMVersionLoop"
+
+typedef SmallPtrSet<Value *, 16> ValueSet;
+
+/// Loop Versioning invariant threshold
+static cl::opt<unsigned>
+    LVInvarThreshold("loopver-invar-threshold",
+                     cl::desc("Loop Versioning invariant threshold"),
+                     cl::init(25), cl::Hidden);
+
+/// Loop Versioning max loop depth threshold
+static cl::opt<unsigned>
+    LVLoopDepthThreshold("loopver-max-depth-threshold",
+                         cl::desc("Loop Versioning loop depth threshold"),
+                         cl::init(2), cl::Hidden);
+
+/// Loop Versioning memcheck pointers threshold
+static cl::opt<unsigned> LVMemchkPtrThreshold(
+    "loopver-memcheck-ptr-threshold",
+    cl::desc("Loop Versioning memcheck pointers threshold"), cl::init(5),
+    cl::Hidden);
+
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
+  unsigned LastOperand = Gep->getNumOperands() - 1;
+  const DataLayout &DL = Gep->getModule()->getDataLayout();
+
+  unsigned GEPAllocSize = DL.getTypeAllocSize(
+      cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+  // Walk backwards and try to peel off zeros.
+  while (LastOperand > 1 &&
+         match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) {
+    // Find the type we're currently indexing into.
+    gep_type_iterator GEPTI = gep_type_begin(Gep);
+    std::advance(GEPTI, LastOperand - 1);
+
+    // If it's a type with the same allocation size as the result of the GEP we
+    // can peel off the zero index.
+    if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+      break;
+    --LastOperand;
+  }
+
+  return LastOperand;
+}
+
+/// \brief Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+static Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI,
+                       LPPassManager *LPM) {
+  Loop *New = new Loop();
+  LPM->insertLoop(New, PL);
+
+  // Add all of the blocks in L to the new loop.
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
+       ++I)
+    if (LI->getLoopFor(*I) == L)
+      New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+
+  // Add all of the subloops to the new loop.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    cloneLoop(*I, New, VM, LI, LPM);
+
+  return New;
+}
+
+/// \brief Remove GEPs whose indices but the last one are loop invariant and
+/// return the induction operand of the gep pointer.
+static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP)
+    return Ptr;
+
+  unsigned InductionOperand = getGEPInductionOperand(GEP);
+
+  // Check that all of the gep indices are uniform except for our induction
+  // operand.
+  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
+    if (i != InductionOperand &&
+        !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
+      return Ptr;
+  return GEP->getOperand(InductionOperand);
+}
+
+/// \brief Look for a cast use of the passed value.
+static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
+  Value *UniqueCast = nullptr;
+  for (User *U : Ptr->users()) {
+    CastInst *CI = dyn_cast<CastInst>(U);
+    if (CI && CI->getType() == Ty) {
+      if (!UniqueCast)
+        UniqueCast = CI;
+      else
+        return nullptr;
+    }
+  }
+  return UniqueCast;
+}
+
+/// \brief Get the stride of a pointer access in a loop.
+/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
+/// pointer to the Value, or null otherwise.
+static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+  const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+  if (!PtrTy || PtrTy->isAggregateType())
+    return nullptr;
+
+  // Try to remove a gep instruction to make the pointer (actually index at this
+  // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
+  // pointer, otherwise, we are analyzing the index.
+  Value *OrigPtr = Ptr;
+
+  // The size of the pointer access.
+  int64_t PtrAccessSize = 1;
+
+  Ptr = stripGetElementPtr(Ptr, SE, Lp);
+  const SCEV *V = SE->getSCEV(Ptr);
+
+  if (Ptr != OrigPtr)
+    // Strip off casts.
+    while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
+      V = C->getOperand();
+
+  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
+  if (!S)
+    return nullptr;
+
+  V = S->getStepRecurrence(*SE);
+  if (!V)
+    return nullptr;
+
+  // Strip off the size of access multiplication if we are still analyzing the
+  // pointer.
+  if (OrigPtr == Ptr) {
+    const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+    DL.getTypeAllocSize(PtrTy->getElementType());
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
+      if (M->getOperand(0)->getSCEVType() != scConstant)
+        return nullptr;
+
+      const APInt &APStepVal =
+          cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+
+      // Huge step value - give up.
+      if (APStepVal.getBitWidth() > 64)
+        return nullptr;
+
+      int64_t StepVal = APStepVal.getSExtValue();
+      if (PtrAccessSize != StepVal)
+        return nullptr;
+      V = M->getOperand(1);
+    }
+  }
+
+  // Strip off casts.
+  Type *StripedOffRecurrenceCast = nullptr;
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
+    StripedOffRecurrenceCast = C->getType();
+    V = C->getOperand();
+  }
+
+  // Look for the loop invariant symbolic value.
+  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
+  if (!U)
+    return nullptr;
+
+  Value *Stride = U->getValue();
+  if (!Lp->isLoopInvariant(Stride))
+    return nullptr;
+
+  // If we have stripped off the recurrence cast we have to make sure that we
+  // return the value that is used in this loop so that we can replace it later.
+  if (StripedOffRecurrenceCast)
+    Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
+
+  return Stride;
+}
+
+// Sets input string as meta data to loop latch terminator instruction.
+static void setLoopMetaData(Loop *CurLoop, const char *MDString) {
+  assert(CurLoop->getLoopPreheader() != nullptr &&
+         "Loop should have a preheader");
+  Instruction *I = CurLoop->getLoopLatch()->getTerminator();
+  LLVMContext &C = I->getContext();
+  SmallVector<Metadata *, 1> Elts;
+  Elts.push_back(
+      ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
+  MDNode *Node = MDNode::get(C, Elts);
+  I->setMetadata(MDString, Node);
+}
+
+namespace {
+struct LoopVersioningLICM : public LoopPass {
+  static char ID;
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<LoopAccessAnalysis>();
+  }
+
+  using llvm::Pass::doFinalization;
+
+  bool doFinalization() override { return false; }
+
+  LoopVersioningLICM()
+      : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr),
+        TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false),
+        Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr),
+        PointerSet(nullptr), RuntimeMemoryCheckThreshold(LVMemchkPtrThreshold),
+        LoopDepthThreshold(LVLoopDepthThreshold),
+        InvariantThreshold(LVInvarThreshold), LoadnStoreCounter(0),
+        InvariantCounter(0), isReadOnlyLoop(true) {
+    initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry());
+  }
+
+  AliasAnalysis *AA;         // Current AliasAnalysis information
+  ScalarEvolution *SE;       // Current ScalarEvolution
+  LoopInfo *LI;              // Current LoopInfo
+  DominatorTree *DT;         // Dominator Tree for the current Loop.
+  TargetLibraryInfo *TLI;    // TargetLibraryInfo for constant folding.
+  LoopAccessAnalysis *LAA;   // Current LoopAccessAnalysis
+  const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo
+
+  bool Changed;            // Set to true when we change anything.
+  BasicBlock *Preheader;   // The preheader block of the current loop.
+  Loop *CurLoop;           // The current loop we are working on.
+  AliasSetTracker *CurAST; // AliasSet information for the current loop.
+  ValueSet *PointerSet;
+  ValueToValueMap Strides;
+
+  unsigned RuntimeMemoryCheckThreshold; // Pointer limit for runtime check.
+  unsigned LoopDepthThreshold;          // Maximum loop nest threshold
+  unsigned InvariantThreshold;          // Minimum invariant threshold
+  unsigned LoadnStoreCounter;           // Counter to track num of load & store
+  unsigned InvariantCounter;            // Counter to track num of invariant
+  bool isReadOnlyLoop;                  // to check loop is read only.
+
+  bool isLegalForVersioning();
+  bool loopStructureLegality();
+  bool loopInstructionsLegality();
+  bool loopMemoryLegality();
+  Loop *versionizeLoop(LPPassManager &);
+  void splitExitEdges(Loop *, const SmallVectorImpl<BasicBlock *> &);
+  void collectStridedAccess(Value *LoadOrStoreInst);
+  bool isLoopAlreadyVisited();
+  void setNoAliasToLoop(Loop *);
+  bool instructionSafeForVersioning(Instruction *);
+  const char *getPassName() const override { return "Loop Versioning"; }
+};
+}
+
+/// \brief Collects stride access from a given value.
+void LoopVersioningLICM::collectStridedAccess(Value *MemAccess) {
+  Value *Ptr = nullptr;
+  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
+    Ptr = LI->getPointerOperand();
+  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
+    Ptr = SI->getPointerOperand();
+  else
+    return;
+
+  Value *Stride = getStrideFromPointer(Ptr, SE, CurLoop);
+  if (!Stride)
+    return;
+
+  DEBUG(dbgs() << "Found a strided access that we can version");
+  DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
+  Strides[Ptr] = Stride;
+}
+
+/// \brief Split all of the edges from inside the loop to their exit
+/// blocks.  Update the appropriate Phi nodes as we do so.
+void LoopVersioningLICM::splitExitEdges(
+    Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
+
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+                                       pred_end(ExitBlock));
+
+    // Although SplitBlockPredecessors doesn't preserve loop-simplify in
+    // general, if we call it on all predecessors of all exits then it does.
+    if (!ExitBlock->isLandingPad()) {
+      SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", AA, DT, LI,
+                             this->mustPreserveAnalysisID(LCSSAID));
+    } else {
+      SmallVector<BasicBlock *, 2> NewBBs;
+      SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
+                                  NewBBs, AA, DT, LI);
+    }
+  }
+}
+/// \brief Check loop structure and confirms its good for Loop Versioning.
+bool LoopVersioningLICM::loopStructureLegality() {
+  // Loop must have a preheader, if not return false.
+  if (!CurLoop->getLoopPreheader()) {
+    DEBUG(dbgs() << "    loop preheader is missing\n");
+    return false;
+  }
+  // Loop should be innermost loops, if not return false.
+  if (CurLoop->getSubLoops().size()) {
+    DEBUG(dbgs() << "    loop is not innermost\n");
+    return false;
+  }
+  // Loop should have a single backedge, if not return false.
+  if (CurLoop->getNumBackEdges() != 1) {
+    DEBUG(dbgs() << "    loop has multiple backedges\n");
+    return false;
+  }
+  // Loop must have a single exiting block, if not return false.
+  if (!CurLoop->getExitingBlock()) {
+    DEBUG(dbgs() << "    loop has multiple exiting block\n");
+    return false;
+  }
+  // We only handle bottom-tested loops, i.e. loop in which the condition is
+  // checked at the end of each iteration. With that we can assume that all
+  // instructions in the loop are executed the same number of times.
+  if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) {
+    DEBUG(dbgs()
+          << "    loop control flow is not understood by LoopVersioningLICM\n");
+    return false;
+  }
+  // Loop depth more then LoopDepthThreshold are not allowed
+  if (CurLoop->getLoopDepth() > LoopDepthThreshold) {
+    DEBUG(dbgs() << "    loop depth is more then threshold\n");
+    return false;
+  }
+  // PAS: Loop should have a dedicated exit block, if not return false.
+  if (!CurLoop->hasDedicatedExits()) {
+    DEBUG(dbgs() << "    loop does not has dedicated exit blocks\n");
+    return false;
+  }
+  // Loop should have a trip count, if not return false.
+  // ScalarEvolution needs to be able to find the exit count.
+  const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
+  if (ExitCount == SE->getCouldNotCompute()) {
+    DEBUG(dbgs() << "    loop does not has trip count\n");
+    return false;
+  }
+  return true;
+}
+
+/// \brief Check loop memory accesses and confirms its good for
+/// Loop versioning.
+bool LoopVersioningLICM::loopMemoryLegality() {
+  // LoopAccessInfo should not be null.
+  assert(LAI != nullptr && "LoopAccessInfo should not be null");
+  // Check if LoopAccessAnalysis finds loop memory access safe
+  // for loop versioning. If unsafe return false.
+  if (!LAI->canVectorizeMemory()) {
+    DEBUG(dbgs() << "    LAA found unsafe memory access.\n");
+    return false;
+  }
+  bool HasMayAlias = 0;
+  bool TypeSafety = 0;
+  bool IsMod = 0;
+  PointerSet->clear();
+  // Memory check:
+  // Iterate over alias set tracker and individual alias sets.
+  // 1) Make sure PointerSet should have atleast 2 pointers.
+  // 2) PointerSet should'nt have pointers more then RuntimeMemoryCheckThreshold
+  // 3) Make sure AliasSets should not have any must alias set.
+  for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E;
+       ++I) {
+    AliasSet &AS = *I;
+    // Skip Forward Alias Sets, dont consider ignored alias sets object.
+    if (AS.isForwardingAliasSet())
+      continue;
+    // Return false in case of MustAlias
+    if (AS.isMustAlias())
+      return false;
+    Value *SomePtr = AS.begin()->getValue();
+    bool typeCheck = 1;
+    // Check for Ismod & MayAlias
+    HasMayAlias |= AS.isMayAlias();
+    IsMod |= AS.isMod();
+    for (auto A : AS) {
+      Value *Ptr = A.getValue();
+      // alias tracker should have pointers of same data type.
+      typeCheck = (typeCheck && (SomePtr->getType() == Ptr->getType()));
+      // Insert memory to PointerSet
+      PointerSet->insert(Ptr);
+      // Check any memory modified in loop body.
+    }
+    // Atleast one alias tracker should have pointers of same data type.
+    TypeSafety |= typeCheck;
+  }
+  // Ensure types should be of same type.
+  if (!TypeSafety) {
+    DEBUG(dbgs() << "    Alias tracker type safety failed!\n");
+    return false;
+  }
+  // Ensure loop body should not be read only.
+  if (!IsMod) {
+    DEBUG(dbgs() << "    No memory modified in loop body\n");
+    return false;
+  }
+  // Make sure alias set has may alias case.
+  // If there no alias memory ambiguity, return false.
+  if (!HasMayAlias) {
+    DEBUG(dbgs() << "    No ambiguity in memory access.\n");
+    return false;
+  }
+  // Atleast 2 pointers needed for runtime check.
+  if (PointerSet->size() <= 1) {
+    DEBUG(dbgs() << "    Didnt found sufficient pointers in loop body\n");
+    return false;
+  }
+  return true;
+}
+
+/// \brief Check loop instructions safe for Loop versioning.
+/// It returns true if its safe else returns false.
+/// Consider following:
+/// 1) Check all load store in loop body are non atomic & non volatile.
+/// 2) Make sure loop body should not have any call instruction.
+/// 3) Check store instruction execution guarantee.
+/// 4) Loop body should not have any may throw instruction.
+bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
+  // Instruction should not be NULL.
+  assert(I != nullptr && "Null instruction found!");
+  // Check parallel loop
+  const bool IsAnnotatedParallel = CurLoop->isAnnotatedParallel();
+  // We dont support call instructions. however, we ignore few intrinsic.
+  // If any call instruction found then simply return false.
+  CallInst *CI = dyn_cast<CallInst>(I);
+  if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
+      !(CI->getCalledFunction() && TLI &&
+        TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
+    DEBUG(dbgs() << "    Found a non-intrinsic, non-libfunc callsite.\n");
+    return false;
+  }
+  if (I->mayThrow()) {
+    DEBUG(dbgs() << "    may throw instruction found in loop body\n");
+    return false;
+  }
+  // If current instruction is load instructions
+  // make sure its a simple load(non atomic & non volatile)
+  if (I->mayReadFromMemory()) {
+    LoadInst *Ld = dyn_cast<LoadInst>(I);
+    if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+      DEBUG(dbgs() << "    Found a non-simple load.\n");
+      return false;
+    }
+    LoadnStoreCounter++;
+    collectStridedAccess(Ld);
+    Value *Ptr = Ld->getPointerOperand();
+    if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
+      InvariantCounter++;
+  }
+  // If current instruction is store instruction
+  // make sure its a simple store(non atomic & non volatile)
+  else if (I->mayWriteToMemory()) {
+    StoreInst *St = dyn_cast<StoreInst>(I);
+    if (!St || (!St->isSimple() && !IsAnnotatedParallel)) {
+      DEBUG(dbgs() << "    Found a non-simple store.\n");
+      return false;
+    }
+    LoadnStoreCounter++;
+    collectStridedAccess(St);
+    // Check for store to a loop invariant, again interested in
+    // atleast one loop invariant store.
+    Value *Ptr = St->getPointerOperand();
+    if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
+      InvariantCounter++;
+
+    isReadOnlyLoop = false;
+  }
+  return true;
+}
+
+/// \brief Check loop instructions and confirms its good for Loop versioning.
+bool LoopVersioningLICM::loopInstructionsLegality() {
+  // Resetting counters.
+  LoadnStoreCounter = 0;
+  InvariantCounter = 0;
+  isReadOnlyLoop = true;
+  // Instruction check: Iterate over loop blocks and instructions of each block
+  // and check instruction safety.
+  for (Loop::block_iterator bb = CurLoop->block_begin(),
+                            be = CurLoop->block_end();
+       bb != be; ++bb) {
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+      // If instruction in unsafe just return false.
+      if (!instructionSafeForVersioning(it))
+        return false;
+    }
+  }
+  // Get LoopAccessInfo from current loop.
+  LAI = &LAA->getInfo(CurLoop, Strides);
+  // Loop should have atleast invariant store instruction.
+  if (!InvariantCounter) {
+    DEBUG(dbgs() << "    Invariant not found !!\n");
+    return false;
+  }
+  // Read only loop not allowed.
+  if (isReadOnlyLoop) {
+    DEBUG(dbgs() << "    Found a read-only loop!\n");
+    return false;
+  }
+  // Check memory threshold, should be in limit.
+  if (PointerSet->size() > RuntimeMemoryCheckThreshold) {
+    DEBUG(dbgs() << "    Loop body has pointers more then defined threshold\n");
+    return false;
+  }
+  // Check invariant threshold, should be in limit.
+  if ((((float)(InvariantCounter) / (float)(LoadnStoreCounter)) * 100) <
+      InvariantThreshold) {
+    DEBUG(dbgs()
+          << "    Invariant load & store are less then defined threshold\n");
+    DEBUG(dbgs() << "    Invariant loads & stores: "
+                 << (int)(((float)(InvariantCounter) /
+                           (float)(LoadnStoreCounter)) *
+                          100) << "%\n");
+    DEBUG(dbgs() << "    Invariant loads & store threshold: "
+                 << InvariantThreshold << "%\n");
+    return false;
+  }
+  return true;
+}
+
+/// \brief Checks loop is already visited or not.
+/// It checks for loop meta data, in case of revisiting loop
+/// returns true else false.
+bool LoopVersioningLICM::isLoopAlreadyVisited() {
+  // Check for loop latch & terminator instruction.
+  if (!CurLoop->getLoopLatch() || !CurLoop->getLoopLatch()->getTerminator())
+    return false;
+  // Check for loop versioning meta data.
+  Instruction *I = CurLoop->getLoopLatch()->getTerminator();
+  if (I && (I->getMetadata(ORIGINAL_LOOP_METADATA) ||
+            I->getMetadata(VERSION_LOOP_METADATA))) {
+    return true;
+  }
+  return false;
+}
+
+/// \brief Checks legality for loop versioning by considering following:
+/// a) loop structure legality   b) loop instruction legality
+/// c) loop memory access legality.
+/// Return true if legal else returns false.
+bool LoopVersioningLICM::isLegalForVersioning() {
+  DEBUG(dbgs() << "Loop: " << *CurLoop);
+  // Make sure not re-visiting same loop again.
+  if (isLoopAlreadyVisited()) {
+    DEBUG(dbgs() << "    Revisiting loop in loop versioning not allowed.\n\n");
+    return false;
+  }
+  // Check loop structure leagality.
+  if (!loopStructureLegality()) {
+    DEBUG(dbgs() << "    Loop structure not suitable for loop versioning\n\n");
+    return false;
+  }
+  // Check loop memory access leagality.
+  if (!loopInstructionsLegality()) {
+    DEBUG(
+        dbgs() << "    Loop instructions not suitable for loop versioning\n\n");
+    return false;
+  }
+  // Check loop memory access leagality.
+  if (!loopMemoryLegality()) {
+    DEBUG(dbgs()
+          << "    Loop memory access not suitable for loop versioning\n\n");
+    return false;
+  }
+  // Loop versioning is feasible, return true.
+  DEBUG(dbgs() << "    Loop Versioning found to be beneficial\n\n");
+  return true;
+}
+
+/// \brief Create memcheck & new version of loop
+/// 1) Clone original loop
+/// 2) Create a runtime memory check.
+/// 3) Add both loops under runtime check results target.
+/// 4) Set all blocks properly.
+/// It transform loop as shown below
+///                         +----------------+
+///                         |Runtime Memcheck|
+///                         +----------------+
+///                                 |
+///              +----------+----------------+----------+
+///              |                                      |
+///    +---------+----------+               +-----------+----------+
+///    |Orig Loop Preheader |               |Cloned Loop Preheader |
+///    +--------------------+               +----------------------+
+///              |                                      |
+///    +--------------------+               +----------------------+
+///    |Orig Loop Body      |               |Cloned Loop Body      |
+///    +--------------------+               +----------------------+
+///              |                                      |
+///    +--------------------+               +----------------------+
+///    |Orig Loop|Exit Block|               |Cloned Loop Exit Block|
+///    +--------------------+               +-----------+----------+
+///              |                                      |
+///              +----------+--------------+-----------+
+///                                 |
+///                           +-----+----+
+///                           |Join Block|
+///                           +----------+
+Loop *LoopVersioningLICM::versionizeLoop(LPPassManager &LPM) {
+  std::vector<BasicBlock *> LoopBlocks;
+  std::vector<BasicBlock *> NewBlocks;
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  LoopBlocks.clear();
+  NewBlocks.clear();
+  ExitBlocks.clear();
+
+  BasicBlock *LoopHeader = CurLoop->getHeader();
+  BasicBlock *LoopPreheader = CurLoop->getLoopPreheader();
+  Function *F = LoopHeader->getParent();
+  // First step, split the preheader and exit blocks, and add these blocks to
+  // the LoopBlocks list.
+  BasicBlock *NewPreheader = SplitEdge(LoopPreheader, LoopHeader, DT, LI);
+  LoopBlocks.push_back(NewPreheader);
+  // We want the loop to come after the preheader, but before the exit blocks.
+  LoopBlocks.insert(LoopBlocks.end(), CurLoop->block_begin(),
+                    CurLoop->block_end());
+
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+  // Split all of the edges from inside the loop to their exit blocks.  Update
+  // the appropriate Phi nodes as we do so.
+  splitExitEdges(CurLoop, ExitBlocks);
+
+  // The exit blocks may have been changed due to edge splitting, recompute.
+  ExitBlocks.clear();
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  // Add exit blocks to the loop blocks.
+  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+
+  // Next step, clone all of the basic blocks that make up the loop (including
+  // the loop preheader and exit blocks), keeping track of the mapping between
+  // the instructions and blocks.
+  NewBlocks.reserve(LoopBlocks.size());
+  ValueToValueMapTy VMap;
+  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
+    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".loopVersion", F);
+    NewBlocks.push_back(NewBB);
+    VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+    LPM.cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, CurLoop);
+  }
+  // Splice the newly inserted blocks into the function right before the
+  // original preheader.
+  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
+                                NewBlocks[0], F->end());
+
+  // Now we create the new Loop object for the versioned loop.
+  Loop *NewLoop = cloneLoop(CurLoop, CurLoop->getParentLoop(), VMap, LI, &LPM);
+
+  Loop *ParentLoop = CurLoop->getParentLoop();
+  if (ParentLoop) {
+    // Make sure to add the cloned preheader and exit blocks to the parent loop
+    // as well.
+    ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI);
+  }
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
+    // The new exit block should be in the same loop as the old one.
+    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
+      ExitBBLoop->addBasicBlockToLoop(NewExit, *LI);
+
+    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
+           "Exit block should have been split to have one successor!");
+    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+
+    // If the successor of the exit block had PHI nodes, add an entry for
+    // NewExit.
+    for (BasicBlock::iterator I = ExitSucc->begin();
+         PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+      ValueToValueMapTy::iterator It = VMap.find(V);
+      if (It != VMap.end())
+        V = It->second;
+      PN->addIncoming(V, NewExit);
+    }
+
+    if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
+      PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
+                                    ExitSucc->getFirstInsertionPt());
+      for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
+           I != E; ++I) {
+        BasicBlock *BB = *I;
+        LandingPadInst *LPI = BB->getLandingPadInst();
+        LPI->replaceAllUsesWith(PN);
+        PN->addIncoming(LPI, BB);
+      }
+    }
+  }
+  // Rewrite the code to refer to itself.
+  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+                              E = NewBlocks[i]->end();
+         I != E; ++I)
+      RemapInstruction(I, VMap,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+
+  // Rewrite the original preheader to select between versions of the loop.
+  BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator());
+  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
+         "Preheader splitting did not work correctly!");
+  // create runtime check.
+  Instruction *MemRuntimeCheck;
+  Instruction *FirstCheckInst;
+  std::tie(FirstCheckInst, MemRuntimeCheck) =
+      LAI->addRuntimeCheck(LoopPreheader->getTerminator());
+
+  if (MemRuntimeCheck) {
+    BasicBlock *CheckBlock =
+        LoopPreheader->splitBasicBlock(MemRuntimeCheck, "LVer.memcheck");
+    if (ParentLoop)
+      ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
+    BranchInst::Create(LoopBlocks[0], NewBlocks[0], MemRuntimeCheck,
+                       CheckBlock);
+    LPM.deleteSimpleAnalysisValue(OldBR, CurLoop);
+    OldBR->eraseFromParent();
+  }
+
+  return NewLoop;
+}
+
+/// \brief Update loop with aggressive alias.
+/// It marks load & store memory instructions with no-alias.
+void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
+  // Get latch terminator instruction.
+  Instruction *I = VerLoop->getLoopLatch()->getTerminator();
+  // Create alias scope domain.
+  MDBuilder MDB(I->getContext());
+  MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain");
+  std::string Name = "LVAliasScope";
+  DenseMap<const Instruction *, MDNode *> NewScopes;
+  SmallVector<Metadata *, 4> Scopes, NoAliases;
+  // Iterate over each instruction of loop.
+  // set no-alias for all load & store instructions.
+  for (Loop::block_iterator bb = VerLoop->block_begin(),
+                            be = VerLoop->block_end();
+       bb != be; ++bb) {
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+      // Only interested in load & stores
+      if (!it->mayReadFromMemory() && !it->mayWriteToMemory())
+        continue;
+      // create scope metadata.
+      Instruction *I = dyn_cast<Instruction>(it);
+      MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
+      Scopes.push_back(NewScope);
+      NoAliases.push_back(NewScope);
+      // Set no-alias for current instruction.
+      I->setMetadata(
+          LLVMContext::MD_noalias,
+          MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias),
+                              MDNode::get(I->getContext(), NoAliases)));
+      // set alias-scope for current instruction.
+      I->setMetadata(
+          LLVMContext::MD_alias_scope,
+          MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope),
+                              MDNode::get(I->getContext(), Scopes)));
+    }
+  }
+}
+
+bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+  if (skipOptnoneFunction(L))
+    return false;
+  Changed = false;
+  // Get our Loop and Alias Analysis information...
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  SE = &getAnalysis<ScalarEvolution>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  LAA = &getAnalysis<LoopAccessAnalysis>();
+  LAI = nullptr;
+  // Set Current Loop
+  CurLoop = L;
+  // Get the preheader block to move instructions into...
+  Preheader = L->getLoopPreheader();
+  // Initial allocation
+  CurAST = new AliasSetTracker(*AA);
+  PointerSet = new ValueSet();
+
+  // Loop over the body of this loop, construct AST.
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
+       ++I) {
+    BasicBlock *BB = *I;
+    if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
+      CurAST->add(*BB);          // Incorporate the specified basic block
+  }
+  // Check feasiblity of loop versioning.
+  // If versioning found to be feasible and beneficial then procees
+  // else simply return, by cleaning up memory.
+  if (isLegalForVersioning()) {
+    // Do loop versioning.
+    // Create memcheck for unique memory accessed inside loop.
+    // Clone original loop, and set blocks properly.
+    Loop *VerLoop = versionizeLoop(LPM);
+    // Set Loop Versioning metaData for original loop.
+    setLoopMetaData(CurLoop, ORIGINAL_LOOP_METADATA);
+    // Set Loop Versioning metaData for version loop.
+    setLoopMetaData(VerLoop, VERSION_LOOP_METADATA);
+    // Update version loop with aggressive aliasing.
+    setNoAliasToLoop(VerLoop);
+    // Delete newly created loop from loop pass manager.
+    LPM.deleteLoopFromQueue(VerLoop);
+    Changed = true;
+  }
+  // Delete allocated memory.
+  delete CurAST;
+  delete PointerSet;
+  return Changed;
+}
+
+char LoopVersioningLICM::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopVersioningLICM, "do-loop-versioning-licm",
+                      "Loop Versioning For LICM", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_END(LoopVersioningLICM, "do-loop-versioning-licm",
+                    "Loop Versioning For LICM", false, false)
+
+Pass *llvm::createLoopVersioningLICMPass() { return new LoopVersioningLICM(); }
Index: lib/Transforms/Scalar/Scalar.cpp
===================================================================
--- lib/Transforms/Scalar/Scalar.cpp
+++ lib/Transforms/Scalar/Scalar.cpp
@@ -54,6 +54,7 @@
   initializeLoopRerollPass(Registry);
   initializeLoopUnrollPass(Registry);
   initializeLoopUnswitchPass(Registry);
+  initializeLoopVersioningLICMPass(Registry);
   initializeLoopIdiomRecognizePass(Registry);
   initializeLowerAtomicPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
Index: test/Transforms/LoopVersioning/loopversioning1.ll
===================================================================
--- test/Transforms/LoopVersioning/loopversioning1.ll
+++ test/Transforms/LoopVersioning/loopversioning1.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s  -O1  -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s
+;
+; Test to confirm loop is a candidate for Loop versioning.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT:   Loop Versioning found to be beneficial
+
+define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+  %cmp14 = icmp eq i32 %itr, 0
+  br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc11
+  %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp212 = icmp ult i32 %j.016, %itr
+  br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %add = add i32 %i.015, %itr
+  %idxprom6 = zext i32 %i.015 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.lr.ph, %for.body3
+  %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+  %idxprom = zext i32 %j.113 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+  store i32 %add, i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
+  %add8 = add nsw i32 %0, %add
+  store i32 %add8, i32* %arrayidx7, align 4
+  %inc = add nuw i32 %j.113, 1
+  %cmp2 = icmp ult i32 %inc, %itr
+  br i1 %cmp2, label %for.body3, label %for.inc11.loopexit
+
+for.inc11.loopexit:                               ; preds = %for.body3
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.inc11.loopexit, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+  %inc12 = add nuw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc12, %itr
+  br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit:                               ; preds = %for.inc11
+  br label %for.end13
+
+for.end13:                                        ; preds = %for.end13.loopexit, %entry
+  ret i32 0
+}
+
Index: test/Transforms/LoopVersioning/loopversioning2.ll
===================================================================
--- test/Transforms/LoopVersioning/loopversioning2.ll
+++ test/Transforms/LoopVersioning/loopversioning2.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s  -O1  -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s
+;
+; Test to confirm loop is a good candidate for loop versioning
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT:     Loop Versioning found to be beneficial
+define i32 @foo(i32* nocapture %var1, i32* nocapture %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+  %cmp17 = icmp eq i32 %itr, 0
+  br i1 %cmp17, label %for.end16, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc14
+  %j.019 = phi i32 [ %j.1.lcssa, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ]
+  %i.018 = phi i32 [ %inc15, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp215 = icmp ult i32 %j.019, %itr
+  br i1 %cmp215, label %for.body3.lr.ph, label %for.inc14
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %add = add i32 %i.018, %itr
+  %idxprom9 = zext i32 %i.018 to i64
+  %arrayidx10 = getelementptr inbounds i32, i32* %var3, i64 %idxprom9
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.lr.ph, %for.body3
+  %j.116 = phi i32 [ %j.019, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+  %idxprom = zext i32 %j.116 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+  store i32 %add, i32* %arrayidx, align 4
+  %arrayidx6 = getelementptr inbounds i32, i32* %var2, i64 %idxprom
+  store i32 %add, i32* %arrayidx6, align 4
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx10, align 4
+  %add11 = add nsw i32 %1, %0
+  store i32 %add11, i32* %arrayidx10, align 4
+  %inc = add nuw i32 %j.116, 1
+  %cmp2 = icmp ult i32 %inc, %itr
+  br i1 %cmp2, label %for.body3, label %for.inc14.loopexit
+
+for.inc14.loopexit:                               ; preds = %for.body3
+  br label %for.inc14
+
+for.inc14:                                        ; preds = %for.inc14.loopexit, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.019, %for.cond1.preheader ], [ %itr, %for.inc14.loopexit ]
+  %inc15 = add nuw i32 %i.018, 1
+  %cmp = icmp ult i32 %inc15, %itr
+  br i1 %cmp, label %for.cond1.preheader, label %for.end16.loopexit
+
+for.end16.loopexit:                               ; preds = %for.inc14
+  br label %for.end16
+
+for.end16:                                        ; preds = %for.end16.loopexit, %entry
+  ret i32 0
+}
Index: test/Transforms/LoopVersioning/loopversioning3.ll
===================================================================
--- test/Transforms/LoopVersioning/loopversioning3.ll
+++ test/Transforms/LoopVersioning/loopversioning3.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s  -O1  -S -loop-versioning-licm -debug-only=loop-versioning-licm  2>&1 | FileCheck %s
+;
+; Test to confirm loop doesn't has invariant store.
+; loop is not a candidate for loop versioning.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT:    Invariant not found !!
+; CHECK-NEXT:    Loop instructions not suitable for loop versioning
+
+define i32 @foo(i32* nocapture %var1, i32 %itr) #0 {
+entry:
+  %cmp18 = icmp eq i32 %itr, 0
+  br i1 %cmp18, label %for.end8, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc6
+  %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ]
+  %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ]
+  %cmp216 = icmp ult i32 %j.020, %itr
+  br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %0 = zext i32 %j.020 to i64
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %1, %itr
+  store i32 %add, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %itr
+  br i1 %exitcond, label %for.inc6, label %for.body3
+
+for.inc6:                                         ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %inc7 = add nuw i32 %i.019, 1
+  %exitcond21 = icmp eq i32 %inc7, %itr
+  br i1 %exitcond21, label %for.end8, label %for.cond1.preheader
+
+for.end8:                                         ; preds = %for.inc6, %entry
+  ret i32 0
+}
+