Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -175,6 +175,7 @@ void initializeLoopRerollPass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); +void initializeLoopVersioningPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); void initializeLowerAtomicPass(PassRegistry&); void initializeLowerBitSetsPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -104,6 +104,7 @@ (void) llvm::createLoopRerollPass(); (void) llvm::createLoopUnrollPass(); (void) llvm::createLoopUnswitchPass(); + (void) llvm::createLoopVersioningPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); (void) llvm::createLowerExpectIntrinsicPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -140,6 +140,12 @@ //===----------------------------------------------------------------------===// // +// LoopVersioning - This pass is a loop multi versioning pass. +// +Pass *createLoopVersioningPass(); + +//===----------------------------------------------------------------------===// +// // LoopInterchange - This pass interchanges loops to provide a more // cache-friendly memory access patterns. // Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -55,6 +55,10 @@ cl::init(true), cl::Hidden, cl::desc("Enable the new, experimental SROA pass")); +static cl::opt UseLoopVersioning("loop-versioning", + cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental Loop Versioning pass")); + static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); @@ -244,6 +248,11 @@ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createInstructionCombiningPass()); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + // Do loop versioning if right option provided, default disabled. + if (UseLoopVersioning) { + MPM.add(createLoopVersioningPass()); // Do Loop Versioning + MPM.add(createLICMPass()); // Hoist loop invariants + } MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops if (EnableLoopInterchange) Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -25,6 +25,7 @@ LoopStrengthReduce.cpp LoopUnrollPass.cpp LoopUnswitch.cpp + LoopVersioning.cpp LowerAtomic.cpp LowerExpectIntrinsic.cpp MemCpyOptimizer.cpp Index: lib/Transforms/Scalar/LoopVersioning.cpp =================================================================== --- lib/Transforms/Scalar/LoopVersioning.cpp +++ lib/Transforms/Scalar/LoopVersioning.cpp @@ -0,0 +1,971 @@ +//===-- LoopVersioning.cpp - Creates aggressive alias version of a loop --===// +// +// The LLVM Compiler Infrastructure +// +// Cases where memory aliasing wasn't sure compiler became conservative and +// didn't proceeds with invariant code motion. This results in possible +// missed optimization opportunities. +// +// Our main motivation is to exploit such cases and allow LICM optimization. +// +// Most of the time when alias analysis is unsure about memory access and it +// assumes may-alias. This un surety from alias analysis restrict LICM to +// proceed further. Cases where alias analysis is unsure we like to use loop +// versioning as an alternative. +// +// Loop Versioning will creates version of the loop with aggressive alias and +// the other with conservative (default) alias. Aggressive alias version of +// loop will have all the memory access marked as no-alias. These two version +// of loop will be preceded by a memory runtime check. +// This runtime check consists of bound checks for all unique memory accessed +// in loop, and it ensures aliasing of memory. Based on this check result at +// runtime any of the loops gets executed, if memory is non aliased then +// aggressive aliasing loop gets executed, else when memory is aliased then non +// aggressive aliased version gets executed. +// +// Following are the top level steps: +// +// a) Perform loop versioning feasibility check. +// b) If loop is a candidate for versioning then create a memory bound check, +// by considering all the memory access in loop body. +// c) Clone original loop and set all memory access as no-alias in new loop. +// d) Set original loop & versioned loop as a branch target of runtime check +// result. +// e) Call LICM on aggressive alias versioned of loop. +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/VectorUtils.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/IR/PredIteratorCache.h" +using namespace llvm; +#include + +#define DEBUG_TYPE "loop-versioning" +#define ORIGINAL_LOOP_METADATA "LoopVersioningOriginalLoop" +#define VERSION_LOOP_METADATA "LoopVersioningVersionLoop" + +typedef SmallPtrSet ValueSet; + +/// Loop Versioning invariant threshold +static cl::opt + LVInvarThreshold("loopver-invar-threshold", + cl::desc("Loop Versioning invariant threshold"), + cl::init(25), cl::Hidden); + +/// Loop Versioning max loop depth threshold +static cl::opt + LVLoopDepthThreshold("loopver-max-depth-threshold", + cl::desc("Loop Versioning loop depth threshold"), + cl::init(2), cl::Hidden); + +/// Loop Versioning memcheck pointers threshold +static cl::opt LVMemchkPtrThreshold( + "loopver-memcheck-ptr-threshold", + cl::desc("Loop Versioning memcheck pointers threshold"), cl::init(5), + cl::Hidden); + +/// \brief Find the operand of the GEP that should be checked for consecutive +/// stores. This ignores trailing indices that have no effect on the final +/// pointer. +static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { + unsigned LastOperand = Gep->getNumOperands() - 1; + const DataLayout &DL = Gep->getModule()->getDataLayout(); + + unsigned GEPAllocSize = DL.getTypeAllocSize( + cast(Gep->getType()->getScalarType())->getElementType()); + + // Walk backwards and try to peel off zeros. + while (LastOperand > 1 && + match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) { + // Find the type we're currently indexing into. + gep_type_iterator GEPTI = gep_type_begin(Gep); + std::advance(GEPTI, LastOperand - 1); + + // If it's a type with the same allocation size as the result of the GEP we + // can peel off the zero index. + if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) + break; + --LastOperand; + } + + return LastOperand; +} + +/// \brief Recursively clone the specified loop and all of its children, +/// mapping the blocks with the specified map. +static Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, + LPPassManager *LPM) { + Loop *New = new Loop(); + LPM->insertLoop(New, PL); + + // Add all of the blocks in L to the new loop. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; + ++I) + if (LI->getLoopFor(*I) == L) + New->addBasicBlockToLoop(cast(VM[*I]), *LI); + + // Add all of the subloops to the new loop. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + cloneLoop(*I, New, VM, LI, LPM); + + return New; +} + +/// \brief Remove GEPs whose indices but the last one are loop invariant and +/// return the induction operand of the gep pointer. +static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { + GetElementPtrInst *GEP = dyn_cast(Ptr); + if (!GEP) + return Ptr; + + unsigned InductionOperand = getGEPInductionOperand(GEP); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) + return Ptr; + return GEP->getOperand(InductionOperand); +} + +/// \brief Look for a cast use of the passed value. +static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { + Value *UniqueCast = nullptr; + for (User *U : Ptr->users()) { + CastInst *CI = dyn_cast(U); + if (CI && CI->getType() == Ty) { + if (!UniqueCast) + UniqueCast = CI; + else + return nullptr; + } + } + return UniqueCast; +} + +/// \brief Get the stride of a pointer access in a loop. +/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a +/// pointer to the Value, or null otherwise. +static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { + const PointerType *PtrTy = dyn_cast(Ptr->getType()); + if (!PtrTy || PtrTy->isAggregateType()) + return nullptr; + + // Try to remove a gep instruction to make the pointer (actually index at this + // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the + // pointer, otherwise, we are analyzing the index. + Value *OrigPtr = Ptr; + + // The size of the pointer access. + int64_t PtrAccessSize = 1; + + Ptr = stripGetElementPtr(Ptr, SE, Lp); + const SCEV *V = SE->getSCEV(Ptr); + + if (Ptr != OrigPtr) + // Strip off casts. + while (const SCEVCastExpr *C = dyn_cast(V)) + V = C->getOperand(); + + const SCEVAddRecExpr *S = dyn_cast(V); + if (!S) + return nullptr; + + V = S->getStepRecurrence(*SE); + if (!V) + return nullptr; + + // Strip off the size of access multiplication if we are still analyzing the + // pointer. + if (OrigPtr == Ptr) { + const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout(); + DL.getTypeAllocSize(PtrTy->getElementType()); + if (const SCEVMulExpr *M = dyn_cast(V)) { + if (M->getOperand(0)->getSCEVType() != scConstant) + return nullptr; + + const APInt &APStepVal = + cast(M->getOperand(0))->getValue()->getValue(); + + // Huge step value - give up. + if (APStepVal.getBitWidth() > 64) + return nullptr; + + int64_t StepVal = APStepVal.getSExtValue(); + if (PtrAccessSize != StepVal) + return nullptr; + V = M->getOperand(1); + } + } + + // Strip off casts. + Type *StripedOffRecurrenceCast = nullptr; + if (const SCEVCastExpr *C = dyn_cast(V)) { + StripedOffRecurrenceCast = C->getType(); + V = C->getOperand(); + } + + // Look for the loop invariant symbolic value. + const SCEVUnknown *U = dyn_cast(V); + if (!U) + return nullptr; + + Value *Stride = U->getValue(); + if (!Lp->isLoopInvariant(Stride)) + return nullptr; + + // If we have stripped off the recurrence cast we have to make sure that we + // return the value that is used in this loop so that we can replace it later. + if (StripedOffRecurrenceCast) + Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast); + + return Stride; +} + +// Sets input string as meta data to loop latch terminator instruction. +static void setLoopMetaData(Loop *CurLoop, const char *MDString) { + assert(CurLoop->getLoopPreheader() != nullptr && + "Loop should have a preheader"); + Instruction *I = CurLoop->getLoopLatch()->getTerminator(); + LLVMContext &C = I->getContext(); + SmallVector Elts; + Elts.push_back( + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); + MDNode *Node = MDNode::get(C, Elts); + I->setMetadata(MDString, Node); +} + +namespace { +struct LoopVersioning : public LoopPass { + static char ID; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + using llvm::Pass::doFinalization; + + bool doFinalization() override { return false; } + + LoopVersioning() + : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr), + TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false), + Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr), + PointerSet(nullptr), RuntimeMemoryCheckThreshold(LVMemchkPtrThreshold), + LoopDepthThreshold(LVLoopDepthThreshold), + InvariantThreshold(LVInvarThreshold), LoadnStoreCounter(0), + InvariantCounter(0), isReadOnlyLoop(true) { + initializeLoopVersioningPass(*PassRegistry::getPassRegistry()); + } + + AliasAnalysis *AA; // Current AliasAnalysis information + ScalarEvolution *SE; // Current ScalarEvolution + LoopInfo *LI; // Current LoopInfo + DominatorTree *DT; // Dominator Tree for the current Loop. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + LoopAccessAnalysis *LAA; // Current LoopAccessAnalysis + const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo + + bool Changed; // Set to true when we change anything. + BasicBlock *Preheader; // The preheader block of the current loop. + Loop *CurLoop; // The current loop we are working on. + AliasSetTracker *CurAST; // AliasSet information for the current loop. + ValueSet *PointerSet; + ValueToValueMap Strides; + + unsigned RuntimeMemoryCheckThreshold; // Pointer limit for runtime check. + unsigned LoopDepthThreshold; // Maximum loop nest threshold + unsigned InvariantThreshold; // Minimum invariant threshold + unsigned LoadnStoreCounter; // Counter to track num of load & store + unsigned InvariantCounter; // Counter to track num of invariant + bool isReadOnlyLoop; // to check loop is read only. + + bool isLegalForVersioning(); + bool loopStructureLegality(); + bool loopInstructionsLegality(); + bool loopMemoryLegality(); + Loop *versionizeLoop(LPPassManager &); + void splitExitEdges(Loop *, const SmallVectorImpl &); + void collectStridedAccess(Value *LoadOrStoreInst); + bool performInvariantCodeMotion(Loop *); + bool isLoopAlreadyVisited(); + void setNoAliasToLoop(Loop *); + bool instructionSafeForVersioning(Instruction *); + const char *getPassName() const override { return "Loop Versioning"; } +}; +} + +/// \brief Collects stride access from a given value. +void LoopVersioning::collectStridedAccess(Value *MemAccess) { + Value *Ptr = nullptr; + if (LoadInst *LI = dyn_cast(MemAccess)) + Ptr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast(MemAccess)) + Ptr = SI->getPointerOperand(); + else + return; + + Value *Stride = getStrideFromPointer(Ptr, SE, CurLoop); + if (!Stride) + return; + + DEBUG(dbgs() << "Found a strided access that we can version"); + DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + Strides[Ptr] = Stride; +} + +/// \brief Split all of the edges from inside the loop to their exit +/// blocks. Update the appropriate Phi nodes as we do so. +void LoopVersioning::splitExitEdges( + Loop *L, const SmallVectorImpl &ExitBlocks) { + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + SmallVector Preds(pred_begin(ExitBlock), + pred_end(ExitBlock)); + + // Although SplitBlockPredecessors doesn't preserve loop-simplify in + // general, if we call it on all predecessors of all exits then it does. + if (!ExitBlock->isLandingPad()) { + SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", AA, DT, LI, + this->mustPreserveAnalysisID(LCSSAID)); + } else { + SmallVector NewBBs; + SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", + NewBBs, AA, DT, LI); + } + } +} +/// \brief Check loop structure and confirms its good for Loop Versioning. +bool LoopVersioning::loopStructureLegality() { + // Loop must have a preheader, if not return false. + if (!CurLoop->getLoopPreheader()) { + DEBUG(dbgs() << " loop preheader is missing\n"); + return false; + } + // Loop should be innermost loops, if not return false. + if (CurLoop->getSubLoops().size()) { + DEBUG(dbgs() << " loop is not innermost\n"); + return false; + } + // Loop should have a single backedge, if not return false. + if (CurLoop->getNumBackEdges() != 1) { + DEBUG(dbgs() << " loop has multiple backedges\n"); + return false; + } + // Loop must have a single exiting block, if not return false. + if (!CurLoop->getExitingBlock()) { + DEBUG(dbgs() << " loop has multiple exiting block\n"); + return false; + } + // We only handle bottom-tested loops, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) { + DEBUG(dbgs() + << " loop control flow is not understood by LoopVersioning\n"); + return false; + } + // Loop depth more then LoopDepthThreshold are not allowed + if (CurLoop->getLoopDepth() > LoopDepthThreshold) { + DEBUG(dbgs() << " loop depth is more then threshold\n"); + return false; + } + // PAS: Loop should have a dedicated exit block, if not return false. + if (!CurLoop->hasDedicatedExits()) { + DEBUG(dbgs() << " loop does not has dedicated exit blocks\n"); + return false; + } + // Loop should have a trip count, if not return false. + // ScalarEvolution needs to be able to find the exit count. + const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); + if (ExitCount == SE->getCouldNotCompute()) { + DEBUG(dbgs() << " loop does not has trip count\n"); + return false; + } + return true; +} + +/// \brief Check loop memory accesses and confirms its good for +/// Loop versioning. +bool LoopVersioning::loopMemoryLegality() { + // LoopAccessInfo should not be null. + assert(LAI != nullptr && "LoopAccessInfo should not be null"); + // Check if LoopAccessAnalysis finds loop memory access safe + // for loop versioning. If unsafe return false. + if (!LAI->canVectorizeMemory()) { + DEBUG(dbgs() << " LAA found unsafe memory access.\n"); + return false; + } + bool HasMayAlias = 0; + bool TypeSafety = 0; + bool IsMod = 0; + PointerSet->clear(); + // Memory check: + // Iterate over alias set tracker and individual alias sets. + // 1) Make sure PointerSet should have atleast 2 pointers. + // 2) PointerSet should'nt have pointers more then RuntimeMemoryCheckThreshold + // 3) Make sure AliasSets should not have any must alias set. + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; + ++I) { + AliasSet &AS = *I; + // Skip Forward Alias Sets, dont consider ignored alias sets object. + if (AS.isForwardingAliasSet()) + continue; + // Return false in case of MustAlias + if (AS.isMustAlias()) + return false; + Value *SomePtr = AS.begin()->getValue(); + bool typeCheck = 1; + // Check for Ismod & MayAlias + HasMayAlias |= AS.isMayAlias(); + IsMod |= AS.isMod(); + for (auto A : AS) { + Value *Ptr = A.getValue(); + // alias tracker should have pointers of same data type. + typeCheck = (typeCheck && (SomePtr->getType() == Ptr->getType())); + // Insert memory to PointerSet + PointerSet->insert(Ptr); + // Check any memory modified in loop body. + } + // Atleast one alias tracker should have pointers of same data type. + TypeSafety |= typeCheck; + } + // Ensure types should be of same type. + if (!TypeSafety) { + DEBUG(dbgs() << " Alias tracker type safety failed!\n"); + return false; + } + // Ensure loop body should not be read only. + if (!IsMod) { + DEBUG(dbgs() << " No memory modified in loop body\n"); + return false; + } + // Make sure alias set has may alias case. + // If there no alias memory ambiguity, return false. + if (!HasMayAlias) { + DEBUG(dbgs() << " No ambiguity in memory access.\n"); + return false; + } + // Atleast 2 pointers needed for runtime check. + if (PointerSet->size() <= 1) { + DEBUG(dbgs() << " Didnt found sufficient pointers in loop body\n"); + return false; + } + return true; +} + +/// \brief Check loop instructions safe for Loop versioning. +/// It returns true if its safe else returns false. +/// Consider following: +/// 1) Check all load store in loop body are non atomic & non volatile. +/// 2) Make sure loop body should not have any call instruction. +/// 3) Check store instruction execution guarantee. +/// 4) Loop body should not have any may throw instruction. +bool LoopVersioning::instructionSafeForVersioning(Instruction *I) { + // Instruction should not be NULL. + assert(I != nullptr && "Null instruction found!"); + // Check parallel loop + const bool IsAnnotatedParallel = CurLoop->isAnnotatedParallel(); + // We dont support call instructions. however, we ignore few intrinsic. + // If any call instruction found then simply return false. + CallInst *CI = dyn_cast(I); + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa(CI) && + !(CI->getCalledFunction() && TLI && + TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { + DEBUG(dbgs() << " Found a non-intrinsic, non-libfunc callsite.\n"); + return false; + } + if (I->mayThrow()) { + DEBUG(dbgs() << " may throw instruction found in loop body\n"); + return false; + } + // If current instruction is load instructions + // make sure its a simple load(non atomic & non volatile) + if (I->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast(I); + if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << " Found a non-simple load.\n"); + return false; + } + LoadnStoreCounter++; + collectStridedAccess(Ld); + Value *Ptr = Ld->getPointerOperand(); + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + } + // If current instruction is store instruction + // make sure its a simple store(non atomic & non volatile) + else if (I->mayWriteToMemory()) { + StoreInst *St = dyn_cast(I); + if (!St || (!St->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << " Found a non-simple store.\n"); + return false; + } + LoadnStoreCounter++; + collectStridedAccess(St); + // Check for store to a loop invariant, again interested in + // atleast one loop invariant store. + Value *Ptr = St->getPointerOperand(); + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + + isReadOnlyLoop = false; + } + return true; +} + +/// \brief Check loop instructions and confirms its good for Loop versioning. +bool LoopVersioning::loopInstructionsLegality() { + // Resetting counters. + LoadnStoreCounter = 0; + InvariantCounter = 0; + isReadOnlyLoop = true; + // Instruction check: Iterate over loop blocks and instructions of each block + // and check instruction safety. + for (Loop::block_iterator bb = CurLoop->block_begin(), + be = CurLoop->block_end(); + bb != be; ++bb) { + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + // If instruction in unsafe just return false. + if (!instructionSafeForVersioning(it)) + return false; + } + } + // Get LoopAccessInfo from current loop. + LAI = &LAA->getInfo(CurLoop, Strides); + // Loop should have atleast invariant store instruction. + if (!InvariantCounter) { + DEBUG(dbgs() << " Invariant not found !!\n"); + return false; + } + // Read only loop not allowed. + if (isReadOnlyLoop) { + DEBUG(dbgs() << " Found a read-only loop!\n"); + return false; + } + // Check memory threshold, should be in limit. + if (PointerSet->size() > RuntimeMemoryCheckThreshold) { + DEBUG(dbgs() << " Loop body has pointers more then defined threshold\n"); + return false; + } + // Check invariant threshold, should be in limit. + if ((((float)(InvariantCounter) / (float)(LoadnStoreCounter)) * 100) < + InvariantThreshold) { + DEBUG(dbgs() + << " Invariant load & store are less then defined threshold\n"); + DEBUG(dbgs() << " Invariant loads & stores: " + << (int)(((float)(InvariantCounter) / + (float)(LoadnStoreCounter)) * + 100) + << "%\n"); + DEBUG(dbgs() << " Invariant loads & store threshold: " + << InvariantThreshold << "%\n"); + return false; + } + return true; +} + +/// \brief Checks loop is already visited or not. +/// It checks for loop meta data, in case of revisiting loop +/// returns true else false. +bool LoopVersioning::isLoopAlreadyVisited() { + // Check for loop latch & terminator instruction. + if (!CurLoop->getLoopLatch() || !CurLoop->getLoopLatch()->getTerminator()) + return false; + // Check for loop versioning meta data. + Instruction *I = CurLoop->getLoopLatch()->getTerminator(); + if (I && (I->getMetadata(ORIGINAL_LOOP_METADATA) || + I->getMetadata(VERSION_LOOP_METADATA))) { + return true; + } + return false; +} + +/// \brief Checks legality for loop versioning by considering following: +/// a) loop structure legality b) loop instruction legality +/// c) loop memory access legality. +/// Return true if legal else returns false. +bool LoopVersioning::isLegalForVersioning() { + DEBUG(dbgs() << "Loop: " << *CurLoop); + // Make sure not re-visiting same loop again. + if (isLoopAlreadyVisited()) { + DEBUG(dbgs() << " Revisiting loop in loop versioning not allowed.\n\n"); + return false; + } + // Check loop structure leagality. + if (!loopStructureLegality()) { + DEBUG(dbgs() << " Loop structure not suitable for loop versioning\n\n"); + return false; + } + // Check loop memory access leagality. + if (!loopInstructionsLegality()) { + DEBUG( + dbgs() << " Loop instructions not suitable for loop versioning\n\n"); + return false; + } + // Check loop memory access leagality. + if (!loopMemoryLegality()) { + DEBUG(dbgs() + << " Loop memory access not suitable for loop versioning\n\n"); + return false; + } + // Loop versioning is feasible, return true. + DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n"); + return true; +} + +/// \brief This function performs Loop invariant code motion on newly +/// version on loop. +/// It calls following for new loop: +/// a)sinkRegion b)hoistRegion c)promoteLoopAccessesToScalars +bool LoopVersioning::performInvariantCodeMotion(Loop *VerLoop) { + bool ChangedByLICM = false; + Function *F = Preheader->getParent(); + // Recalculate Dominator tree + DT->recalculate(*F); + // Recalculate LoopInfo from updated dominator tree. + LoopInfo nLI; + nLI.Analyze(*DT); + // Loop over the body of this loop, construct AST. + AliasSetTracker *NewAST = new AliasSetTracker(*AA); + for (Loop::block_iterator I = VerLoop->block_begin(), + E = VerLoop->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + if (nLI.getLoopFor(BB) == VerLoop) // Ignore blocks in subloops. + NewAST->add(*BB); // Incorporate the specified basic block + } + // Compute loop safety information. + LICMSafetyInfo SafetyInfo; + computeLICMSafetyInfo(&SafetyInfo, VerLoop); + + // If loop has dedicated exits perform sinkRegion. + if (VerLoop->hasDedicatedExits()) + ChangedByLICM |= sinkRegion(DT->getNode(VerLoop->getHeader()), AA, &nLI, DT, + TLI, VerLoop, NewAST, &SafetyInfo); + // If loop has a pre header perform hoistRegion. + if (VerLoop->getLoopPreheader()) + ChangedByLICM |= hoistRegion(DT->getNode(VerLoop->getHeader()), AA, &nLI, + DT, TLI, VerLoop, NewAST, &SafetyInfo); + + if (VerLoop->getLoopPreheader() || VerLoop->hasDedicatedExits()) { + SmallVector ExitBlocks; + SmallVector InsertPts; + PredIteratorCache PIC; + // Loop over all of the alias sets in the tracker object. + for (AliasSetTracker::iterator I = NewAST->begin(), E = NewAST->end(); + I != E; ++I) { + // perform promoteLoopAccessesToScalars + ChangedByLICM = + promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts, PIC, &nLI, DT, + VerLoop, NewAST, &SafetyInfo); + // Once we have promoted values across the loop body we have to + // recursively reform LCSSA. + if (ChangedByLICM) + formLCSSARecursively(*VerLoop, *DT, &nLI, + getAnalysisIfAvailable()); + } + } + return ChangedByLICM; +} + +/// \brief Create memcheck & new version of loop +/// 1) Clone original loop +/// 2) Create a runtime memory check. +/// 3) Add both loops under runtime check results target. +/// 4) Set all blocks properly. +/// It transform loop as shown below +/// +----------------+ +/// |Runtime Memcheck| +/// +----------------+ +/// | +/// +----------+----------------+----------+ +/// | | +/// +---------+----------+ +-----------+----------+ +/// |Orig Loop Preheader | |Cloned Loop Preheader | +/// +--------------------+ +----------------------+ +/// | | +/// +--------------------+ +----------------------+ +/// |Orig Loop Body | |Cloned Loop Body | +/// +--------------------+ +----------------------+ +/// | | +/// +--------------------+ +----------------------+ +/// |Orig Loop|Exit Block| |Cloned Loop Exit Block| +/// +--------------------+ +-----------+----------+ +/// | | +/// +----------+--------------+-----------+ +/// | +/// +-----+----+ +/// |Join Block| +/// +----------+ +Loop *LoopVersioning::versionizeLoop(LPPassManager &LPM) { + std::vector LoopBlocks; + std::vector NewBlocks; + SmallVector ExitBlocks; + LoopBlocks.clear(); + NewBlocks.clear(); + ExitBlocks.clear(); + + BasicBlock *LoopHeader = CurLoop->getHeader(); + BasicBlock *LoopPreheader = CurLoop->getLoopPreheader(); + Function *F = LoopHeader->getParent(); + // First step, split the preheader and exit blocks, and add these blocks to + // the LoopBlocks list. + BasicBlock *NewPreheader = SplitEdge(LoopPreheader, LoopHeader, DT, LI); + LoopBlocks.push_back(NewPreheader); + // We want the loop to come after the preheader, but before the exit blocks. + LoopBlocks.insert(LoopBlocks.end(), CurLoop->block_begin(), + CurLoop->block_end()); + + CurLoop->getUniqueExitBlocks(ExitBlocks); + // Split all of the edges from inside the loop to their exit blocks. Update + // the appropriate Phi nodes as we do so. + splitExitEdges(CurLoop, ExitBlocks); + + // The exit blocks may have been changed due to edge splitting, recompute. + ExitBlocks.clear(); + CurLoop->getUniqueExitBlocks(ExitBlocks); + + // Add exit blocks to the loop blocks. + LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); + + // Next step, clone all of the basic blocks that make up the loop (including + // the loop preheader and exit blocks), keeping track of the mapping between + // the instructions and blocks. + NewBlocks.reserve(LoopBlocks.size()); + ValueToValueMapTy VMap; + for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".loopVersion", F); + NewBlocks.push_back(NewBB); + VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + LPM.cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, CurLoop); + } + // Splice the newly inserted blocks into the function right before the + // original preheader. + F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Now we create the new Loop object for the versioned loop. + Loop *NewLoop = cloneLoop(CurLoop, CurLoop->getParentLoop(), VMap, LI, &LPM); + + Loop *ParentLoop = CurLoop->getParentLoop(); + if (ParentLoop) { + // Make sure to add the cloned preheader and exit blocks to the parent loop + // as well. + ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); + } + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *NewExit = cast(VMap[ExitBlocks[i]]); + // The new exit block should be in the same loop as the old one. + if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) + ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); + + assert(NewExit->getTerminator()->getNumSuccessors() == 1 && + "Exit block should have been split to have one successor!"); + BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); + + // If the successor of the exit block had PHI nodes, add an entry for + // NewExit. + for (BasicBlock::iterator I = ExitSucc->begin(); + PHINode *PN = dyn_cast(I); ++I) { + Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); + ValueToValueMapTy::iterator It = VMap.find(V); + if (It != VMap.end()) + V = It->second; + PN->addIncoming(V, NewExit); + } + + if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { + PHINode *PN = PHINode::Create(LPad->getType(), 0, "", + ExitSucc->getFirstInsertionPt()); + for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); + I != E; ++I) { + BasicBlock *BB = *I; + LandingPadInst *LPI = BB->getLandingPadInst(); + LPI->replaceAllUsesWith(PN); + PN->addIncoming(LPI, BB); + } + } + } + // Rewrite the code to refer to itself. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); + I != E; ++I) + RemapInstruction(I, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); + + // Rewrite the original preheader to select between versions of the loop. + BranchInst *OldBR = cast(LoopPreheader->getTerminator()); + assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && + "Preheader splitting did not work correctly!"); + // create runtime check. + Instruction *MemRuntimeCheck; + Instruction *FirstCheckInst; + std::tie(FirstCheckInst, MemRuntimeCheck) = + LAI->addRuntimeCheck(LoopPreheader->getTerminator()); + + if (MemRuntimeCheck) { + BasicBlock *CheckBlock = + LoopPreheader->splitBasicBlock(MemRuntimeCheck, "LVer.memcheck"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); + BranchInst::Create(LoopBlocks[0], NewBlocks[0], MemRuntimeCheck, + CheckBlock); + LPM.deleteSimpleAnalysisValue(OldBR, CurLoop); + OldBR->eraseFromParent(); + } + + return NewLoop; +} + +/// \brief Update loop with aggressive alias. +/// It marks load & store memory instructions with no-alias. +void LoopVersioning::setNoAliasToLoop(Loop *VerLoop) { + // Get latch terminator instruction. + Instruction *I = VerLoop->getLoopLatch()->getTerminator(); + // Create alias scope domain. + MDBuilder MDB(I->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); + std::string Name = "LVAliasScope"; + DenseMap NewScopes; + SmallVector Scopes, NoAliases; + // Iterate over each instruction of loop. + // set no-alias for all load & store instructions. + for (Loop::block_iterator bb = VerLoop->block_begin(), + be = VerLoop->block_end(); + bb != be; ++bb) { + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + // Only interested in load & stores + if (!it->mayReadFromMemory() && !it->mayWriteToMemory()) + continue; + // create scope metadata. + Instruction *I = dyn_cast(it); + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + Scopes.push_back(NewScope); + NoAliases.push_back(NewScope); + // Set no-alias for current instruction. + I->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias), + MDNode::get(I->getContext(), NoAliases))); + // set alias-scope for current instruction. + I->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(I->getContext(), Scopes))); + } + } +} + +bool LoopVersioning::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + Changed = false; + // Get our Loop and Alias Analysis information... + LI = &getAnalysis().getLoopInfo(); + AA = &getAnalysis(); + SE = &getAnalysis(); + DT = &getAnalysis().getDomTree(); + TLI = &getAnalysis().getTLI(); + LAA = &getAnalysis(); + LAI = nullptr; + // Set Current Loop + CurLoop = L; + // Get the preheader block to move instructions into... + Preheader = L->getLoopPreheader(); + // Initial allocation + CurAST = new AliasSetTracker(*AA); + PointerSet = new ValueSet(); + + // Loop over the body of this loop, construct AST. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; + ++I) { + BasicBlock *BB = *I; + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. + CurAST->add(*BB); // Incorporate the specified basic block + } + // Check feasiblity of loop versioning. + // If versioning found to be feasible and beneficial then procees + // else simply return, by cleaning up memory. + if (isLegalForVersioning()) { + // Do loop versioning. + // Create memcheck for unique memory accessed inside loop. + // Clone original loop, and set blocks properly. + Loop *VerLoop = versionizeLoop(LPM); + // Set LoopVersioning metaData for original loop. + setLoopMetaData(CurLoop, ORIGINAL_LOOP_METADATA); + // Set LoopVersioning metaData for version loop. + setLoopMetaData(VerLoop, VERSION_LOOP_METADATA); + // Update version loop with aggressive aliasing. + setNoAliasToLoop(VerLoop); + // Perform Invariant code motion on new version of loop. + performInvariantCodeMotion(VerLoop); + // Delete newly created loop from loop pass manager. + LPM.deleteLoopFromQueue(VerLoop); + Changed = true; + } + // Delete allocated memory. + delete CurAST; + delete PointerSet; + return Changed; +} + +char LoopVersioning::ID = 0; +INITIALIZE_PASS_BEGIN(LoopVersioning, "do-loop-versioning", "Loop Versioning", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_END(LoopVersioning, "do-loop-versioning", "Loop Versioning", + false, false) + +Pass *llvm::createLoopVersioningPass() { return new LoopVersioning(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -54,6 +54,7 @@ initializeLoopRerollPass(Registry); initializeLoopUnrollPass(Registry); initializeLoopUnswitchPass(Registry); + initializeLoopVersioningPass(Registry); initializeLoopIdiomRecognizePass(Registry); initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); Index: test/Transforms/LoopVersioning/loopversioning1.ll =================================================================== --- test/Transforms/LoopVersioning/loopversioning1.ll +++ test/Transforms/LoopVersioning/loopversioning1.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -O1 -S -loop-versioning -debug-only=loop-versioning 2>&1 | FileCheck %s +; +; Test to confirm loop is a candidate for Loop versioning. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Loop Versioning found to be beneficial + +define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 { +entry: + %cmp14 = icmp eq i32 %itr, 0 + br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11 + %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %cmp212 = icmp ult i32 %j.016, %itr + br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %add = add i32 %i.015, %itr + %idxprom6 = zext i32 %i.015 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6 + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ] + %idxprom = zext i32 %j.113 to i64 + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom + store i32 %add, i32* %arrayidx, align 4 + %0 = load i32, i32* %arrayidx7, align 4 + %add8 = add nsw i32 %0, %add + store i32 %add8, i32* %arrayidx7, align 4 + %inc = add nuw i32 %j.113, 1 + %cmp2 = icmp ult i32 %inc, %itr + br i1 %cmp2, label %for.body3, label %for.inc11.loopexit + +for.inc11.loopexit: ; preds = %for.body3 + br label %for.inc11 + +for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ] + %inc12 = add nuw i32 %i.015, 1 + %cmp = icmp ult i32 %inc12, %itr + br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit + +for.end13.loopexit: ; preds = %for.inc11 + br label %for.end13 + +for.end13: ; preds = %for.end13.loopexit, %entry + ret i32 0 +} + Index: test/Transforms/LoopVersioning/loopversioning2.ll =================================================================== --- test/Transforms/LoopVersioning/loopversioning2.ll +++ test/Transforms/LoopVersioning/loopversioning2.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -O1 -S -loop-versioning -debug-only=loop-versioning 2>&1 | FileCheck %s +; +; Test to confirm loop is a good candidate for loop versioning +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Loop Versioning found to be beneficial +define i32 @foo(i32* nocapture %var1, i32* nocapture %var2, i32* nocapture %var3, i32 %itr) #0 { +entry: + %cmp17 = icmp eq i32 %itr, 0 + br i1 %cmp17, label %for.end16, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc14 + %j.019 = phi i32 [ %j.1.lcssa, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ] + %i.018 = phi i32 [ %inc15, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ] + %cmp215 = icmp ult i32 %j.019, %itr + br i1 %cmp215, label %for.body3.lr.ph, label %for.inc14 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %add = add i32 %i.018, %itr + %idxprom9 = zext i32 %i.018 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %var3, i64 %idxprom9 + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %j.116 = phi i32 [ %j.019, %for.body3.lr.ph ], [ %inc, %for.body3 ] + %idxprom = zext i32 %j.116 to i64 + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom + store i32 %add, i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %var2, i64 %idxprom + store i32 %add, i32* %arrayidx6, align 4 + %0 = load i32, i32* %arrayidx, align 4 + %1 = load i32, i32* %arrayidx10, align 4 + %add11 = add nsw i32 %1, %0 + store i32 %add11, i32* %arrayidx10, align 4 + %inc = add nuw i32 %j.116, 1 + %cmp2 = icmp ult i32 %inc, %itr + br i1 %cmp2, label %for.body3, label %for.inc14.loopexit + +for.inc14.loopexit: ; preds = %for.body3 + br label %for.inc14 + +for.inc14: ; preds = %for.inc14.loopexit, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.019, %for.cond1.preheader ], [ %itr, %for.inc14.loopexit ] + %inc15 = add nuw i32 %i.018, 1 + %cmp = icmp ult i32 %inc15, %itr + br i1 %cmp, label %for.cond1.preheader, label %for.end16.loopexit + +for.end16.loopexit: ; preds = %for.inc14 + br label %for.end16 + +for.end16: ; preds = %for.end16.loopexit, %entry + ret i32 0 +} Index: test/Transforms/LoopVersioning/loopversioning3.ll =================================================================== --- test/Transforms/LoopVersioning/loopversioning3.ll +++ test/Transforms/LoopVersioning/loopversioning3.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -O1 -S -loop-versioning -debug-only=loop-versioning 2>&1 | FileCheck %s +; +; Test to confirm loop doesn't has invariant store. +; loop is not a candidate for loop versioning. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Invariant not found !! +; CHECK-NEXT: Loop instructions not suitable for loop versioning + +define i32 @foo(i32* nocapture %var1, i32 %itr) #0 { +entry: + %cmp18 = icmp eq i32 %itr, 0 + br i1 %cmp18, label %for.end8, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc6 + %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ] + %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ] + %cmp216 = icmp ult i32 %j.020, %itr + br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %0 = zext i32 %j.020 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %1, %itr + store i32 %add, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc6, label %for.body3 + +for.inc6: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ] + %inc7 = add nuw i32 %i.019, 1 + %exitcond21 = icmp eq i32 %inc7, %itr + br i1 %exitcond21, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.inc6, %entry + ret i32 0 +} +