Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -171,6 +171,7 @@ void initializeLoopRerollPass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); +void initializeLoopVersioningLICMPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); void initializeLowerAtomicPass(PassRegistry&); void initializeLowerBitSetsPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -109,6 +109,7 @@ (void) llvm::createLoopRerollPass(); (void) llvm::createLoopUnrollPass(); (void) llvm::createLoopUnswitchPass(); + (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); (void) llvm::createLowerExpectIntrinsicPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -205,6 +205,12 @@ //===----------------------------------------------------------------------===// // +// LoopVersioningLICM - This pass is a loop versioning pass for LICM. +// +Pass *createLoopVersioningLICMPass(); + +//===----------------------------------------------------------------------===// +// // PromoteMemoryToRegister - This pass is used to promote memory references to // be register references. A simple example of the transformation performed by // this pass is: Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -99,6 +99,10 @@ cl::desc( "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); +static cl::opt UseLoopVersioningLICM( + "loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental Loop Versioning LICM pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -263,6 +267,10 @@ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createInstructionCombiningPass()); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + if (UseLoopVersioningLICM) { + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass()); // Hoist loop invariants + } MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops if (EnableLoopInterchange) { Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -26,6 +26,7 @@ LoopStrengthReduce.cpp LoopUnrollPass.cpp LoopUnswitch.cpp + LoopVersioningLICM.cpp LowerAtomic.cpp LowerExpectIntrinsic.cpp MemCpyOptimizer.cpp Index: lib/Transforms/Scalar/LoopVersioningLICM.cpp =================================================================== --- lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -0,0 +1,607 @@ +//===----------- LoopVersioningLICM.cpp - LICM Loop versioning ------------===// +// +// The LLVM Compiler Infrastructure +// +// Cases where memory aliasing wasn't sure compiler became conservative and +// didn't proceeds with invariant code motion. This results in possible +// missed optimization opportunities. +// +// Our main motivation is to exploit such cases and allow LICM optimization. +// +// Most of the time when alias analysis is unsure about memory access and it +// assumes may-alias. This un surety from alias analysis restrict LICM to +// proceed further. Cases where alias analysis is unsure we like to use loop +// versioning as an alternative. +// +// Loop Versioning will creates version of the loop with aggressive alias and +// the other with conservative (default) alias. Aggressive alias version of +// loop will have all the memory access marked as no-alias. These two version +// of loop will be preceded by a memory runtime check. +// This runtime check consists of bound checks for all unique memory accessed +// in loop, and it ensures aliasing of memory. Based on this check result at +// runtime any of the loops gets executed, if memory is non aliased then +// aggressive aliasing loop gets executed, else when memory is aliased then non +// aggressive aliased version gets executed. +// +// Following are the top level steps: +// +// a) Perform LoopVersioningLICM feasibility check. +// b) If loop is a candidate for versioning then create a memory bound check, +// by considering all the memory access in loop body. +// c) Clone original loop and set all memory access as no-alias in new loop. +// d) Set original loop & versioned loop as a branch target of runtime check +// result. +// +// It transform loop as shown below: +// +// +----------------+ +// |Runtime Memcheck| +// +----------------+ +// | +// +----------+----------------+----------+ +// | | +// +---------+----------+ +-----------+----------+ +// |Orig Loop Preheader | |Cloned Loop Preheader | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop Body | |Cloned Loop Body | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop|Exit Block| |Cloned Loop Exit Block| +// +--------------------+ +-----------+----------+ +// | | +// +----------+--------------+-----------+ +// | +// +-----+----+ +// |Join Block| +// +----------+ +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +#define DEBUG_TYPE "loop-versioning-licm" +#define ORIGINAL_LOOP_METADATA "LoopVersioningLICMOriginalLoop" +#define VERSION_LOOP_METADATA "LoopVersioningLICMVersionLoop" + +using namespace llvm; + +/// Loop Versioning invariant threshold +static cl::opt + LVInvarThreshold("-licm-versioning-invariant-threshold", + cl::desc("Loop Versioning invariant threshold"), + cl::init(25), cl::Hidden); + +/// Loop Versioning max loop depth threshold +static cl::opt + LVLoopDepthThreshold("loopver-max-depth-threshold", + cl::desc("Loop Versioning loop depth threshold"), + cl::init(2), cl::Hidden); + +/// Loop Versioning memcheck pointers threshold +static cl::opt LVMemchkPtrThreshold( + "loopver-memcheck-ptr-threshold", + cl::desc("Loop Versioning memcheck pointers threshold"), cl::init(5), + cl::Hidden); + +// Sets input string as meta data to loop latch terminator instruction. +static void setLoopMetaData(Loop *CurLoop, const char *MDString) { + assert(CurLoop->getLoopPreheader() != nullptr && + "Loop should have a preheader"); + Instruction *I = CurLoop->getLoopLatch()->getTerminator(); + LLVMContext &C = I->getContext(); + SmallVector Elts; + Elts.push_back( + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); + MDNode *Node = MDNode::get(C, Elts); + I->setMetadata(MDString, Node); +} + +namespace { +struct LoopVersioningLICM : public LoopPass { + static char ID; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + using llvm::Pass::doFinalization; + + bool doFinalization() override { return false; } + + LoopVersioningLICM() + : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr), + TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false), + Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr), + RuntimeMemoryCheckThreshold(LVMemchkPtrThreshold), + LoopDepthThreshold(LVLoopDepthThreshold), + InvariantThreshold(LVInvarThreshold), LoadnStoreCounter(0), + InvariantCounter(0), isReadOnlyLoop(true) { + initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry()); + } + + AliasAnalysis *AA; // Current AliasAnalysis information + ScalarEvolution *SE; // Current ScalarEvolution + LoopInfo *LI; // Current LoopInfo + DominatorTree *DT; // Dominator Tree for the current Loop. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + LoopAccessAnalysis *LAA; // Current LoopAccessAnalysis + const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo + + bool Changed; // Set to true when we change anything. + BasicBlock *Preheader; // The preheader block of the current loop. + Loop *CurLoop; // The current loop we are working on. + AliasSetTracker *CurAST; // AliasSet information for the current loop. + ValueToValueMap Strides; + + unsigned RuntimeMemoryCheckThreshold; // Pointer limit for runtime check. + unsigned LoopDepthThreshold; // Maximum loop nest threshold + unsigned InvariantThreshold; // Minimum invariant threshold + unsigned LoadnStoreCounter; // Counter to track num of load & store + unsigned InvariantCounter; // Counter to track num of invariant + bool isReadOnlyLoop; // to check loop is read only. + + bool isLegalForVersioning(); + bool loopStructureLegality(); + bool loopInstructionsLegality(); + bool loopMemoryLegality(); + void collectStridedAccess(Value *LoadOrStoreInst); + bool isLoopAlreadyVisited(); + void setNoAliasToLoop(Loop *); + bool instructionSafeForVersioning(Instruction *); + const char *getPassName() const override { return "Loop Versioning"; } +}; +} + +/// \brief Collects stride access from a given value. +void LoopVersioningLICM::collectStridedAccess(Value *MemAccess) { + Value *Ptr = nullptr; + if (LoadInst *LI = dyn_cast(MemAccess)) + Ptr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast(MemAccess)) + Ptr = SI->getPointerOperand(); + else + return; + + Value *Stride = getStrideFromPointer(Ptr, SE, CurLoop); + if (!Stride) + return; + + DEBUG(dbgs() << "Found a strided access that we can version"); + DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + Strides[Ptr] = Stride; +} + +/// \brief Check loop structure and confirms its good for Loop Versioning. +bool LoopVersioningLICM::loopStructureLegality() { + // Loop must have a preheader, if not return false. + if (!CurLoop->getLoopPreheader()) { + DEBUG(dbgs() << " loop preheader is missing\n"); + return false; + } + // Loop should be innermost loops, if not return false. + if (CurLoop->getSubLoops().size()) { + DEBUG(dbgs() << " loop is not innermost\n"); + return false; + } + // Loop should have a single backedge, if not return false. + if (CurLoop->getNumBackEdges() != 1) { + DEBUG(dbgs() << " loop has multiple backedges\n"); + return false; + } + // Loop must have a single exiting block, if not return false. + if (!CurLoop->getExitingBlock()) { + DEBUG(dbgs() << " loop has multiple exiting block\n"); + return false; + } + // We only handle bottom-tested loops, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) { + DEBUG(dbgs() + << " loop control flow is not understood by LoopVersioningLICM\n"); + return false; + } + // Loop depth more then LoopDepthThreshold are not allowed + if (CurLoop->getLoopDepth() > LoopDepthThreshold) { + DEBUG(dbgs() << " loop depth is more then threshold\n"); + return false; + } + // Loop should have a dedicated exit block, if not return false. + if (!CurLoop->hasDedicatedExits()) { + DEBUG(dbgs() << " loop does not has dedicated exit blocks\n"); + return false; + } + // Loop should have a trip count, if not return false. + // ScalarEvolution needs to be able to find the exit count. + const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); + if (ExitCount == SE->getCouldNotCompute()) { + DEBUG(dbgs() << " loop does not has trip count\n"); + return false; + } + return true; +} + +/// \brief Check loop memory accesses and confirms its good for +/// Loop versioning. +bool LoopVersioningLICM::loopMemoryLegality() { + // LoopAccessInfo should not be null. + assert(LAI != nullptr && "LoopAccessInfo should not be null"); + // Check if LoopAccessAnalysis finds loop memory access safe + // for LoopVersioningLICM. If unsafe return false. + if (!LAI->canVectorizeMemory()) { + DEBUG(dbgs() << " LAA found unsafe memory access.\n"); + return false; + } + bool HasMayAlias = 0; + bool TypeSafety = 0; + bool IsMod = 0; + SmallPtrSet PointerSet; + // Memory check: + // Iterate over alias set tracker and individual alias sets. + // 1) Make sure PointerSet should have atleast 2 pointers. + // 2) PointerSet should'nt have pointers more then RuntimeMemoryCheckThreshold + // 3) Make sure AliasSets should not have any must alias set. + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; + ++I) { + AliasSet &AS = *I; + // Skip Forward Alias Sets, dont consider ignored alias sets object. + if (AS.isForwardingAliasSet()) + continue; + // Return false in case of MustAlias + if (AS.isMustAlias()) + return false; + Value *SomePtr = AS.begin()->getValue(); + bool typeCheck = 1; + // Check for Ismod & MayAlias + HasMayAlias |= AS.isMayAlias(); + IsMod |= AS.isMod(); + for (auto A : AS) { + Value *Ptr = A.getValue(); + // alias tracker should have pointers of same data type. + typeCheck = (typeCheck && (SomePtr->getType() == Ptr->getType())); + // Insert memory to PointerSet + PointerSet.insert(Ptr); + // Check any memory modified in loop body. + } + // Atleast one alias tracker should have pointers of same data type. + TypeSafety |= typeCheck; + } + // Ensure types should be of same type. + if (!TypeSafety) { + DEBUG(dbgs() << " Alias tracker type safety failed!\n"); + return false; + } + // Ensure loop body should not be read only. + if (!IsMod) { + DEBUG(dbgs() << " No memory modified in loop body\n"); + return false; + } + // Make sure alias set has may alias case. + // If there no alias memory ambiguity, return false. + if (!HasMayAlias) { + DEBUG(dbgs() << " No ambiguity in memory access.\n"); + return false; + } + // Atleast 2 pointers needed for runtime check. + if (PointerSet.size() <= 1) { + DEBUG(dbgs() << " Didnt found sufficient pointers in loop body\n"); + return false; + } + // Check memory threshold, should be in limit. + if (PointerSet.size() > RuntimeMemoryCheckThreshold) { + DEBUG(dbgs() << " Loop body has pointers more then defined threshold\n"); + return false; + } + return true; +} + +/// \brief Check loop instructions safe for Loop versioning. +/// It returns true if its safe else returns false. +/// Consider following: +/// 1) Check all load store in loop body are non atomic & non volatile. +/// 2) We dont allow non-intrinsic, non-libfunc callsite. +/// 3) Loop body should not have any may throw instruction. +bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { + // Instruction should not be NULL. + assert(I != nullptr && "Null instruction found!"); + // Check parallel loop + const bool IsAnnotatedParallel = CurLoop->isAnnotatedParallel(); + // We dont support call instructions. however, we ignore few intrinsic + // and libfunc callsite. We dont allow non-intrinsic, non-libfunc callsite + CallInst *CI = dyn_cast(I); + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa(CI) && + !(CI->getCalledFunction() && TLI && + TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { + DEBUG(dbgs() << " Found a non-intrinsic, non-libfunc callsite.\n"); + return false; + } + if (I->mayThrow()) { + DEBUG(dbgs() << " may throw instruction found in loop body\n"); + return false; + } + // If current instruction is load instructions + // make sure its a simple load(non atomic & non volatile) + if (I->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast(I); + if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << " Found a non-simple load.\n"); + return false; + } + LoadnStoreCounter++; + collectStridedAccess(Ld); + Value *Ptr = Ld->getPointerOperand(); + // Check loop invariant. + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + } + // If current instruction is store instruction + // make sure its a simple store(non atomic & non volatile) + else if (I->mayWriteToMemory()) { + StoreInst *St = dyn_cast(I); + if (!St || (!St->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << " Found a non-simple store.\n"); + return false; + } + LoadnStoreCounter++; + collectStridedAccess(St); + Value *Ptr = St->getPointerOperand(); + // Check loop invariant. + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + + isReadOnlyLoop = false; + } + return true; +} + +/// \brief Check loop instructions and confirms its good for Loop versioning. +bool LoopVersioningLICM::loopInstructionsLegality() { + // Resetting counters. + LoadnStoreCounter = 0; + InvariantCounter = 0; + isReadOnlyLoop = true; + // Instruction check: Iterate over loop blocks and instructions of each block + // and check instruction safety. + for (Loop::block_iterator bb = CurLoop->block_begin(), + be = CurLoop->block_end(); + bb != be; ++bb) { + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + // If instruction in unsafe just return false. + if (!instructionSafeForVersioning(it)) + return false; + } + } + // Get LoopAccessInfo from current loop. + LAI = &LAA->getInfo(CurLoop, Strides); + // Check LoopAccessInfo for need of runtime check. + if (!LAI->getRuntimePointerChecking()->Need) { + DEBUG(dbgs() << " LAI: Runtime check not needed !!\n"); + return false; + } + // Loop should have atleast invariant store instruction. + if (!InvariantCounter) { + DEBUG(dbgs() << " Invariant not found !!\n"); + return false; + } + // Read only loop not allowed. + if (isReadOnlyLoop) { + DEBUG(dbgs() << " Found a read-only loop!\n"); + return false; + } + // Check invariant threshold, should be in limit. + if ((((float)(InvariantCounter) / (float)(LoadnStoreCounter)) * 100) < + InvariantThreshold) { + DEBUG(dbgs() + << " Invariant load & store are less then defined threshold\n"); + DEBUG(dbgs() << " Invariant loads & stores: " + << (int)(((float)(InvariantCounter) / + (float)(LoadnStoreCounter)) * + 100) + << "%\n"); + DEBUG(dbgs() << " Invariant loads & store threshold: " + << InvariantThreshold << "%\n"); + return false; + } + return true; +} + +/// \brief Checks loop is already visited or not. +/// It checks for loop meta data, in case of revisiting loop +/// returns true else false. +bool LoopVersioningLICM::isLoopAlreadyVisited() { + // Check for loop latch & terminator instruction. + if (!CurLoop->getLoopLatch() || !CurLoop->getLoopLatch()->getTerminator()) + return false; + // Check for LoopVersioningLICM meta data. + Instruction *I = CurLoop->getLoopLatch()->getTerminator(); + if (I && (I->getMetadata(ORIGINAL_LOOP_METADATA) || + I->getMetadata(VERSION_LOOP_METADATA))) { + return true; + } + return false; +} + +/// \brief Checks legality for LoopVersioningLICM by considering following: +/// a) loop structure legality b) loop instruction legality +/// c) loop memory access legality. +/// Return true if legal else returns false. +bool LoopVersioningLICM::isLegalForVersioning() { + DEBUG(dbgs() << "Loop: " << *CurLoop); + // Make sure not re-visiting same loop again. + if (isLoopAlreadyVisited()) { + DEBUG( + dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n"); + return false; + } + // Check loop structure leagality. + if (!loopStructureLegality()) { + DEBUG( + dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Check loop memory access leagality. + if (!loopInstructionsLegality()) { + DEBUG(dbgs() + << " Loop instructions not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Check loop memory access leagality. + if (!loopMemoryLegality()) { + DEBUG(dbgs() + << " Loop memory access not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Loop versioning is feasible, return true. + DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n"); + return true; +} + +/// \brief Update loop with aggressive aliasing. +/// It marks load & store memory instructions with no-alias. +void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { + // Get latch terminator instruction. + Instruction *I = VerLoop->getLoopLatch()->getTerminator(); + // Create alias scope domain. + MDBuilder MDB(I->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); + std::string Name = "LVAliasScope"; + SmallVector Scopes, NoAliases; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + // Iterate over each instruction of loop. + // set no-alias for all load & store instructions. + for (Loop::block_iterator bb = VerLoop->block_begin(), + be = VerLoop->block_end(); + bb != be; ++bb) { + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + // Only interested in instruction that may modify or read memory. + if (!it->mayReadFromMemory() && !it->mayWriteToMemory()) + continue; + + Instruction *I = dyn_cast(it); + Scopes.push_back(NewScope); + NoAliases.push_back(NewScope); + // Set no-alias for current instruction. + I->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias), + MDNode::get(I->getContext(), NoAliases))); + // set alias-scope for current instruction. + I->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(I->getContext(), Scopes))); + } + } +} + +bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + Changed = false; + // Get Analysis information. + LI = &getAnalysis().getLoopInfo(); + AA = &getAnalysis(); + SE = &getAnalysis().getSE(); + DT = &getAnalysis().getDomTree(); + TLI = &getAnalysis().getTLI(); + LAA = &getAnalysis(); + LAI = nullptr; + // Set Current Loop + CurLoop = L; + // Get the preheader block. + Preheader = L->getLoopPreheader(); + // Initial allocation + CurAST = new AliasSetTracker(*AA); + + // Loop over the body of this loop, construct AST. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; + ++I) { + BasicBlock *BB = *I; + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. + CurAST->add(*BB); // Incorporate the specified basic block + } + // Check feasiblity of LoopVersioningLICM. + // If versioning found to be feasible and beneficial then procees + // else simply return, by cleaning up memory. + if (isLegalForVersioning()) { + // Do loop versioning. + // Create memcheck for memory accessed inside loop. + // Clone original loop, and set blocks properly. + LoopVersioning LVer(*LAI, CurLoop, LI, DT); + LVer.versionLoop(); + // Set Loop Versioning metaData for original loop. + setLoopMetaData(LVer.getNonVersionedLoop(), ORIGINAL_LOOP_METADATA); + // Set Loop Versioning metaData for version loop. + setLoopMetaData(LVer.getVersionedLoop(), VERSION_LOOP_METADATA); + // Update version loop with aggressive aliasing. + setNoAliasToLoop(LVer.getVersionedLoop()); + // Update Dominator tree with new changes. + DT->recalculate(*CurLoop->getHeader()->getParent()); + Changed = true; + } + // Delete allocated memory. + delete CurAST; + return Changed; +} + +char LoopVersioningLICM::ID = 0; +INITIALIZE_PASS_BEGIN(LoopVersioningLICM, "do-loop-versioning-licm", + "Loop Versioning For LICM", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_END(LoopVersioningLICM, "do-loop-versioning-licm", + "Loop Versioning For LICM", false, false) + +Pass *llvm::createLoopVersioningLICMPass() { return new LoopVersioningLICM(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -57,6 +57,7 @@ initializeLoopRerollPass(Registry); initializeLoopUnrollPass(Registry); initializeLoopUnswitchPass(Registry); + initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizePass(Registry); initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); Index: test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is a candidate for LoopVersioningLICM. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Loop Versioning found to be beneficial + +define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 { +entry: + %cmp14 = icmp eq i32 %itr, 0 + br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11 + %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %cmp212 = icmp ult i32 %j.016, %itr + br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %add = add i32 %i.015, %itr + %idxprom6 = zext i32 %i.015 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6 + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ] + %idxprom = zext i32 %j.113 to i64 + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom + store i32 %add, i32* %arrayidx, align 4 + %0 = load i32, i32* %arrayidx7, align 4 + %add8 = add nsw i32 %0, %add + store i32 %add8, i32* %arrayidx7, align 4 + %inc = add nuw i32 %j.113, 1 + %cmp2 = icmp ult i32 %inc, %itr + br i1 %cmp2, label %for.body3, label %for.inc11.loopexit + +for.inc11.loopexit: ; preds = %for.body3 + br label %for.inc11 + +for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ] + %inc12 = add nuw i32 %i.015, 1 + %cmp = icmp ult i32 %inc12, %itr + br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit + +for.end13.loopexit: ; preds = %for.inc11 + br label %for.end13 + +for.end13: ; preds = %for.end13.loopexit, %entry + ret i32 0 +} + Index: test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is a good candidate for LoopVersioningLICM +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Loop Versioning found to be beneficial +define i32 @foo(i32* nocapture %var1, i32* nocapture %var2, i32* nocapture %var3, i32 %itr) #0 { +entry: + %cmp17 = icmp eq i32 %itr, 0 + br i1 %cmp17, label %for.end16, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc14 + %j.019 = phi i32 [ %j.1.lcssa, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ] + %i.018 = phi i32 [ %inc15, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ] + %cmp215 = icmp ult i32 %j.019, %itr + br i1 %cmp215, label %for.body3.lr.ph, label %for.inc14 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %add = add i32 %i.018, %itr + %idxprom9 = zext i32 %i.018 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %var3, i64 %idxprom9 + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %j.116 = phi i32 [ %j.019, %for.body3.lr.ph ], [ %inc, %for.body3 ] + %idxprom = zext i32 %j.116 to i64 + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom + store i32 %add, i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %var2, i64 %idxprom + store i32 %add, i32* %arrayidx6, align 4 + %0 = load i32, i32* %arrayidx, align 4 + %1 = load i32, i32* %arrayidx10, align 4 + %add11 = add nsw i32 %1, %0 + store i32 %add11, i32* %arrayidx10, align 4 + %inc = add nuw i32 %j.116, 1 + %cmp2 = icmp ult i32 %inc, %itr + br i1 %cmp2, label %for.body3, label %for.inc14.loopexit + +for.inc14.loopexit: ; preds = %for.body3 + br label %for.inc14 + +for.inc14: ; preds = %for.inc14.loopexit, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.019, %for.cond1.preheader ], [ %itr, %for.inc14.loopexit ] + %inc15 = add nuw i32 %i.018, 1 + %cmp = icmp ult i32 %inc15, %itr + br i1 %cmp, label %for.cond1.preheader, label %for.end16.loopexit + +for.end16.loopexit: ; preds = %for.inc14 + br label %for.end16 + +for.end16: ; preds = %for.end16.loopexit, %entry + ret i32 0 +} Index: test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is not a candidate for LoopVersioningLICM. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: LAI: Runtime check not needed !! +; CHECK-NEXT: Loop instructions not suitable for LoopVersioningLICM + +define i32 @foo(i32* nocapture %var1, i32 %itr) #0 { +entry: + %cmp18 = icmp eq i32 %itr, 0 + br i1 %cmp18, label %for.end8, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc6 + %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ] + %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ] + %cmp216 = icmp ult i32 %j.020, %itr + br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %0 = zext i32 %j.020 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %1, %itr + store i32 %add, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc6, label %for.body3 + +for.inc6: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ] + %inc7 = add nuw i32 %i.019, 1 + %exitcond21 = icmp eq i32 %inc7, %itr + br i1 %exitcond21, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.inc6, %entry + ret i32 0 +} +