Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -171,6 +171,7 @@ void initializeLoopRerollPass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); +void initializeLoopVersioningLICMPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); void initializeLowerAtomicPass(PassRegistry&); void initializeLowerBitSetsPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -107,6 +107,7 @@ (void) llvm::createLoopRerollPass(); (void) llvm::createLoopUnrollPass(); (void) llvm::createLoopUnswitchPass(); + (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); (void) llvm::createLowerExpectIntrinsicPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -205,6 +205,12 @@ //===----------------------------------------------------------------------===// // +// LoopVersioningLICM - This pass is a loop versioning pass for LICM. +// +Pass *createLoopVersioningLICMPass(); + +//===----------------------------------------------------------------------===// +// // PromoteMemoryToRegister - This pass is used to promote memory references to // be register references. A simple example of the transformation performed by // this pass is: Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -103,6 +103,10 @@ "enable-loop-load-elim", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopLoadElimination Pass")); +static cl::opt UseLoopVersioningLICM( + "loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental Loop Versioning LICM pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -267,6 +271,10 @@ MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + if (UseLoopVersioningLICM) { + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass()); // Hoist loop invariants + } MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops if (EnableLoopInterchange) { Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -27,6 +27,7 @@ LoopStrengthReduce.cpp LoopUnrollPass.cpp LoopUnswitch.cpp + LoopVersioningLICM.cpp LowerAtomic.cpp LowerExpectIntrinsic.cpp MemCpyOptimizer.cpp Index: lib/Transforms/Scalar/LoopVersioningLICM.cpp =================================================================== --- lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -0,0 +1,603 @@ +//===----------- LoopVersioningLICM.cpp - LICM Loop versioning ------------===// +// +// The LLVM Compiler Infrastructure +// +// Cases where memory aliasing wasn't sure compiler became conservative and +// didn't proceeds with invariant code motion. This results in possible +// missed optimization opportunities. +// +// Our main motivation is to exploit such cases and allow LICM optimization. +// +// Most of the time when alias analysis is unsure about memory access and it +// assumes may-alias. This un surety from alias analysis restrict LICM to +// proceed further. Cases where alias analysis is unsure we like to use loop +// versioning as an alternative. +// +// Loop Versioning will creates version of the loop with aggressive alias and +// the other with conservative (default) alias. Aggressive alias version of +// loop will have all the memory access marked as no-alias. These two version +// of loop will be preceded by a memory runtime check. +// This runtime check consists of bound checks for all unique memory accessed +// in loop, and it ensures aliasing of memory. Based on this check result at +// runtime any of the loop gets executed, if memory is non aliased then +// aggressive aliasing loop gets executed, else when memory is aliased then non +// aggressive aliased version gets executed. +// +// Following are the top level steps: +// +// a) Perform LoopVersioningLICM feasibility check. +// b) If loop is a candidate for versioning then create a memory bound check, +// by considering all the memory access in loop body. +// c) Clone original loop and set all memory access as no-alias in new loop. +// d) Set original loop & versioned loop as a branch target of runtime check +// result. +// +// It transform loop as shown below: +// +// +----------------+ +// |Runtime Memcheck| +// +----------------+ +// | +// +----------+----------------+----------+ +// | | +// +---------+----------+ +-----------+----------+ +// |Orig Loop Preheader | |Cloned Loop Preheader | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop Body | |Cloned Loop Body | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop|Exit Block| |Cloned Loop Exit Block| +// +--------------------+ +-----------+----------+ +// | | +// +----------+--------------+-----------+ +// | +// +-----+----+ +// |Join Block| +// +----------+ +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +#define DEBUG_TYPE "loop-versioning-licm" +#define LOOP_VERSIONING_LICM_METADATA "llvm.loop.licm_versioning" + +using namespace llvm; + +/// Threshold minimum allowed percentage for possible +/// invariant instruction in a loop. +static cl::opt + LVInvarThreshold("-licm-versioning-invariant-threshold", + cl::desc("LoopVersioningLICM threshold minimum " + "allowed percentage for possible " + "invariant instruction in a loop"), + cl::init(25), cl::Hidden); + +/// Threshold for maximum allowed loop nest/depth +static cl::opt LVLoopDepthThreshold( + "loopver-max-depth-threshold", + cl::desc( + "LoopVersioningLICM threshold for maximum allowed loop nest/depth"), + cl::init(2), cl::Hidden); + +/// Threshold for maximum possible pointers in allowed memcheck +static cl::opt + LVMemchkPtrThreshold("loopver-memcheck-ptr-threshold", + cl::desc("LoopVersioningLICM threshold for maximum " + "possible pointers allowed in memcheck"), + cl::init(5), cl::Hidden); + +// Sets input string as meta data to loop latch terminator instruction. +static void setLoopMetaData(Loop *CurLoop, const char *MDString) { + assert(CurLoop->getLoopPreheader() != nullptr && + "Loop should have a preheader"); + Instruction *I = CurLoop->getLoopLatch()->getTerminator(); + LLVMContext &C = I->getContext(); + SmallVector Elts; + Elts.push_back( + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); + MDNode *Node = MDNode::get(C, Elts); + I->setMetadata(MDString, Node); +} + +namespace { +struct LoopVersioningLICM : public LoopPass { + static char ID; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + using llvm::Pass::doFinalization; + + bool doFinalization() override { return false; } + + LoopVersioningLICM() + : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr), + TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false), + Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr), + RuntimeMemoryCheckThreshold(LVMemchkPtrThreshold), + LoopDepthThreshold(LVLoopDepthThreshold), + InvariantThreshold(LVInvarThreshold), LoadAndStoreCounter(0), + InvariantCounter(0), IsReadOnlyLoop(true) { + initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry()); + } + + AliasAnalysis *AA; // Current AliasAnalysis information + ScalarEvolution *SE; // Current ScalarEvolution + LoopInfo *LI; // Current LoopInfo + DominatorTree *DT; // Dominator Tree for the current Loop. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + LoopAccessAnalysis *LAA; // Current LoopAccessAnalysis + const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo + + bool Changed; // Set to true when we change anything. + BasicBlock *Preheader; // The preheader block of the current loop. + Loop *CurLoop; // The current loop we are working on. + AliasSetTracker *CurAST; // AliasSet information for the current loop. + ValueToValueMap Strides; + + unsigned RuntimeMemoryCheckThreshold; // Pointer limit for runtime check. + unsigned LoopDepthThreshold; // Maximum loop nest threshold + float InvariantThreshold; // Minimum invariant threshold + unsigned LoadAndStoreCounter; // Counter to track num of load & store + unsigned InvariantCounter; // Counter to track num of invariant + bool IsReadOnlyLoop; // Read only loop marker. + + bool isLegalForVersioning(); + bool legalLoopStructure(); + bool legalLoopInstructions(); + bool legalLoopMemoryAccesses(); + void collectStridedAccess(Value *LoadOrStoreInst); + bool isLoopAlreadyVisited(); + void setNoAliasToLoop(Loop *); + bool instructionSafeForVersioning(Instruction *); + const char *getPassName() const override { return "Loop Versioning"; } +}; +} + +/// \brief Collects stride access from a given value. +void LoopVersioningLICM::collectStridedAccess(Value *MemAccess) { + Value *Ptr = nullptr; + if (LoadInst *LI = dyn_cast(MemAccess)) + Ptr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast(MemAccess)) + Ptr = SI->getPointerOperand(); + else + return; + + Value *Stride = getStrideFromPointer(Ptr, SE, CurLoop); + if (!Stride) + return; + + DEBUG(dbgs() << "Found a strided access that we can version"); + DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + Strides[Ptr] = Stride; +} + +/// \brief Check loop structure and confirms it's good for LoopVersioningLICM. +bool LoopVersioningLICM::legalLoopStructure() { + // Loop must have a preheader, if not return false. + if (!CurLoop->getLoopPreheader()) { + DEBUG(dbgs() << " loop preheader is missing\n"); + return false; + } + // Loop should be innermost loop, if not return false. + if (CurLoop->getSubLoops().size()) { + DEBUG(dbgs() << " loop is not innermost\n"); + return false; + } + // Loop should have a single backedge, if not return false. + if (CurLoop->getNumBackEdges() != 1) { + DEBUG(dbgs() << " loop has multiple backedges\n"); + return false; + } + // Loop must have a single exiting block, if not return false. + if (!CurLoop->getExitingBlock()) { + DEBUG(dbgs() << " loop has multiple exiting block\n"); + return false; + } + // We only handle bottom-tested loop, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) { + DEBUG(dbgs() + << " loop control flow is not understood by LoopVersioningLICM\n"); + return false; + } + // Loop depth more then LoopDepthThreshold are not allowed + if (CurLoop->getLoopDepth() > LoopDepthThreshold) { + DEBUG(dbgs() << " loop depth is more then threshold\n"); + return false; + } + // Loop should have a dedicated exit block, if not return false. + if (!CurLoop->hasDedicatedExits()) { + DEBUG(dbgs() << " loop does not has dedicated exit blocks\n"); + return false; + } + // Loop should have a trip count, if not return false. + const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); + if (ExitCount == SE->getCouldNotCompute()) { + DEBUG(dbgs() << " loop does not has trip count\n"); + return false; + } + return true; +} + +/// \brief Check memory accesses in loop and confirms it's good for +/// LoopVersioningLICM. +bool LoopVersioningLICM::legalLoopMemoryAccesses() { + assert(LAI != nullptr && "LoopAccessInfo shouldn't be null"); + // Check if LoopAccessAnalysis finds loop memory access safe + // for LoopVersioningLICM. If unsafe return false. + if (!LAI->canVectorizeMemory()) { + DEBUG(dbgs() << " LAA found unsafe memory access.\n"); + return false; + } + bool HasMayAlias = false; + bool TypeSafety = false; + bool HasMod = false; + SmallPtrSet PointerSet; + // Memory check: + // Transform phase will generate a versioned loop and also a runtime check to + // ensure the pointers are independent and they don’t alias. + // In version variant of loop, alias meta data asserts that all access are + // mutually independent. + // + // Pointers aliasing in alias domain are avoided because with multiple + // aliasing domains we may not be able to hoist potential loop invariant + // access out of the loop. + // + // Iterate over alias tracker sets, and confirm following: + // 1) Make sure PointerSet should have at least minimum pointers for + // runtime bound check. + // 2) PointerSet shouldn't have pointers more then RuntimeMemoryCheckThreshold + // 3) Make sure AliasSets doesn't have any must alias set. + for (const auto &I : *CurAST) { + const AliasSet &AS = I; + // Skip Forward Alias Sets, as this should be ignored as part of + // the AliasSetTracker object. + if (AS.isForwardingAliasSet()) + continue; + // With MustAlias its not worth adding runtime bound check. + if (AS.isMustAlias()) + return false; + Value *SomePtr = AS.begin()->getValue(); + bool typeCheck = true; + // Check for Mod & MayAlias + HasMayAlias |= AS.isMayAlias(); + HasMod |= AS.isMod(); + for (const auto &A : AS) { + Value *Ptr = A.getValue(); + // Alias tracker should have pointers of same data type. + typeCheck = (typeCheck && (SomePtr->getType() == Ptr->getType())); + // Insert memory to PointerSet + PointerSet.insert(Ptr); + // Check any memory modified in loop body. + } + // At least one alias tracker should have pointers of same data type. + TypeSafety |= typeCheck; + } + // Ensure types should be of same type. + if (!TypeSafety) { + DEBUG(dbgs() << " Alias tracker type safety failed!\n"); + return false; + } + // Ensure loop body shouldn't be read only. + if (!HasMod) { + DEBUG(dbgs() << " No memory modified in loop body\n"); + return false; + } + // Make sure alias set has may alias case. + // If there no alias memory ambiguity, return false. + if (!HasMayAlias) { + DEBUG(dbgs() << " No ambiguity in memory access.\n"); + return false; + } + // At least 2 pointers needed for runtime check. + if (PointerSet.size() <= 1) { + DEBUG(dbgs() << " Didnt found sufficient pointers in loop body\n"); + return false; + } + // Check memory threshold, should be in limit. + if (PointerSet.size() > RuntimeMemoryCheckThreshold) { + DEBUG(dbgs() << " Loop body has pointers more then defined threshold\n"); + return false; + } + return true; +} + +/// \brief Check loop instructions safe for Loop versioning. +/// It returns true if it's safe else returns false. +/// Consider following: +/// 1) Check all load store in loop body are non atomic & non volatile. +/// 2) Check function call safety, by ensuring its not accessing memory. +/// 3) Loop body shouldn't have any may throw instruction. +bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { + assert(I != nullptr && "Null instruction found!"); + // Check function call safety + if(dyn_cast(I) && !AA->doesNotAccessMemory(CallSite(I))) { + DEBUG(dbgs() << " Unsafe call site found.\n"); + return false; + } + // Avoid loops with possiblity of throw + if (I->mayThrow()) { + DEBUG(dbgs() << " may throw instruction found in loop body\n"); + return false; + } + // Check parallel loop + const bool IsAnnotatedParallel = CurLoop->isAnnotatedParallel(); + // If current instruction is load instructions + // make sure it's a simple load(non atomic & non volatile) + if (I->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast(I); + if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << " Found a non-simple load.\n"); + return false; + } + LoadAndStoreCounter++; + collectStridedAccess(Ld); + Value *Ptr = Ld->getPointerOperand(); + // Check loop invariant. + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + } + // If current instruction is store instruction + // make sure it's a simple store(non atomic & non volatile) + else if (I->mayWriteToMemory()) { + StoreInst *St = dyn_cast(I); + if (!St || (!St->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << " Found a non-simple store.\n"); + return false; + } + LoadAndStoreCounter++; + collectStridedAccess(St); + Value *Ptr = St->getPointerOperand(); + // Check loop invariant. + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + + IsReadOnlyLoop = false; + } + return true; +} + +/// \brief Check loop instructions and confirms it's good for +/// LoopVersioningLICM. +bool LoopVersioningLICM::legalLoopInstructions() { + // Resetting counters. + LoadAndStoreCounter = 0; + InvariantCounter = 0; + IsReadOnlyLoop = true; + // Iterate over loop blocks and instructions of each block and check + // instruction safety. + for (auto *Block : CurLoop->getBlocks()) + for (auto &Inst : *Block) { + // If instruction in unsafe just return false. + if (!instructionSafeForVersioning(&Inst)) + return false; + } + // Get LoopAccessInfo from current loop. + LAI = &LAA->getInfo(CurLoop, Strides); + // Check LoopAccessInfo for need of runtime check. + if (!LAI->getRuntimePointerChecking()->Need) { + DEBUG(dbgs() << " LAI: Runtime check not needed !!\n"); + return false; + } + // Loop should have at least one invariant load or store instruction. + if (!InvariantCounter) { + DEBUG(dbgs() << " Invariant not found !!\n"); + return false; + } + // Read only loop not allowed. + if (IsReadOnlyLoop) { + DEBUG(dbgs() << " Found a read-only loop!\n"); + return false; + } + // Profitablity check: + // Check invariant threshold, should be in limit. + float InvariantPercentage = + ((float)(InvariantCounter) / (float)(LoadAndStoreCounter)) * 100; + if (InvariantPercentage < InvariantThreshold) { + DEBUG(dbgs() + << " Invariant load & store are less then defined threshold\n"); + DEBUG(dbgs() << " Invariant loads & stores: " << InvariantPercentage + << "%\n"); + DEBUG(dbgs() << " Invariant loads & store threshold: " + << InvariantThreshold << "%\n"); + return false; + } + return true; +} + +/// \brief It checks loop is already visited or not. +/// check loop meta data, If loop revisited return true +/// else false. +bool LoopVersioningLICM::isLoopAlreadyVisited() { + // Check for loop latch & terminator instruction. + if (!CurLoop->getLoopLatch() || !CurLoop->getLoopLatch()->getTerminator()) + return false; + // Check for LoopVersioningLICM meta data. + Instruction *I = CurLoop->getLoopLatch()->getTerminator(); + if (I && I->getMetadata(LOOP_VERSIONING_LICM_METADATA)) { + return true; + } + return false; +} + +/// \brief Checks legality for LoopVersioningLICM by considering following: +/// a) loop structure legality b) loop instruction legality +/// c) loop memory access legality. +/// Return true if legal else returns false. +bool LoopVersioningLICM::isLegalForVersioning() { + DEBUG(dbgs() << "Loop: " << *CurLoop); + // Make sure not re-visiting same loop again. + if (isLoopAlreadyVisited()) { + DEBUG( + dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n"); + return false; + } + // Check loop structure leagality. + if (!legalLoopStructure()) { + DEBUG( + dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Check loop instruction leagality. + if (!legalLoopInstructions()) { + DEBUG(dbgs() + << " Loop instructions not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Check loop memory access leagality. + if (!legalLoopMemoryAccesses()) { + DEBUG(dbgs() + << " Loop memory access not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Loop versioning is feasible, return true. + DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n"); + return true; +} + +/// \brief Update loop with aggressive aliasing. +/// It marks load & store memory instructions with no-alias. +void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { + // Get latch terminator instruction. + Instruction *I = VerLoop->getLoopLatch()->getTerminator(); + // Create alias scope domain. + MDBuilder MDB(I->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); + StringRef Name = "LVAliasScope"; + SmallVector Scopes, NoAliases; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + // Iterate over each instruction of loop. + // set no-alias for all load & store instructions. + for (auto *Block : CurLoop->getBlocks()) { + for (auto &Inst : *Block) { + // Only interested in instruction that may modify or read memory. + if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory()) + continue; + Scopes.push_back(NewScope); + NoAliases.push_back(NewScope); + // Set no-alias for current instruction. + Inst.setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias), + MDNode::get(Inst.getContext(), NoAliases))); + // set alias-scope for current instruction. + Inst.setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Inst.getContext(), Scopes))); + } + } +} + +bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + Changed = false; + // Get Analysis information. + LI = &getAnalysis().getLoopInfo(); + AA = &getAnalysis().getAAResults(); + SE = &getAnalysis().getSE(); + DT = &getAnalysis().getDomTree(); + TLI = &getAnalysis().getTLI(); + LAA = &getAnalysis(); + LAI = nullptr; + // Set Current Loop + CurLoop = L; + // Get the preheader block. + Preheader = L->getLoopPreheader(); + // Initial allocation + CurAST = new AliasSetTracker(*AA); + + // Loop over the body of this loop, construct AST. + for (auto *Block : L->getBlocks()) { + if (LI->getLoopFor(Block) == L) // Ignore blocks in subloop. + CurAST->add(*Block); // Incorporate the specified basic block + } + // Check feasiblity of LoopVersioningLICM. + // If versioning found to be feasible and beneficial then proceed + // else simply return, by cleaning up memory. + if (isLegalForVersioning()) { + // Do loop versioning. + // Create memcheck for memory accessed inside loop. + // Clone original loop, and set blocks properly. + LoopVersioning LVer(*LAI, CurLoop, LI, DT); + LVer.versionLoop(); + // Set Loop Versioning metaData for original loop. + setLoopMetaData(LVer.getNonVersionedLoop(), LOOP_VERSIONING_LICM_METADATA); + // Set Loop Versioning metaData for version loop. + setLoopMetaData(LVer.getVersionedLoop(), LOOP_VERSIONING_LICM_METADATA); + // Update version loop with aggressive aliasing. + setNoAliasToLoop(LVer.getVersionedLoop()); + // Update Dominator tree with new changes. + DT->recalculate(*CurLoop->getHeader()->getParent()); + Changed = true; + } + // Delete allocated memory. + delete CurAST; + return Changed; +} + +char LoopVersioningLICM::ID = 0; +INITIALIZE_PASS_BEGIN(LoopVersioningLICM, "do-loop-versioning-licm", + "Loop Versioning For LICM", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_END(LoopVersioningLICM, "do-loop-versioning-licm", + "Loop Versioning For LICM", false, false) + +Pass *llvm::createLoopVersioningLICMPass() { return new LoopVersioningLICM(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -56,6 +56,7 @@ initializeLoopRerollPass(Registry); initializeLoopUnrollPass(Registry); initializeLoopUnswitchPass(Registry); + initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizePass(Registry); initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); Index: test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is a candidate for LoopVersioningLICM. +; It also confirms invariant moved out of loop. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Loop Versioning found to be beneficial +; +; CHECK: for.body3: +; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ] +; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ] +; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom +; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !5, !noalias !5 +; CHECK-NEXT: %add8 = add nsw i32 %add86, %add +; CHECK-NEXT: %inc = add nuw i32 %j.113, 1 +; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr +; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop.licm_versioning !0 +define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 { +entry: + %cmp14 = icmp eq i32 %itr, 0 + br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11 + %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %cmp212 = icmp ult i32 %j.016, %itr + br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %add = add i32 %i.015, %itr + %idxprom6 = zext i32 %i.015 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6 + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ] + %idxprom = zext i32 %j.113 to i64 + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom + store i32 %add, i32* %arrayidx, align 4 + %0 = load i32, i32* %arrayidx7, align 4 + %add8 = add nsw i32 %0, %add + store i32 %add8, i32* %arrayidx7, align 4 + %inc = add nuw i32 %j.113, 1 + %cmp2 = icmp ult i32 %inc, %itr + br i1 %cmp2, label %for.body3, label %for.inc11.loopexit + +for.inc11.loopexit: ; preds = %for.body3 + br label %for.inc11 + +for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ] + %inc12 = add nuw i32 %i.015, 1 + %cmp = icmp ult i32 %inc12, %itr + br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit + +for.end13.loopexit: ; preds = %for.inc11 + br label %for.end13 + +for.end13: ; preds = %for.end13.loopexit, %entry + ret i32 0 +} + Index: test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll @@ -0,0 +1,63 @@ +; RUN: opt < %s -O2 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is a good candidate for LoopVersioningLICM +; It also confirms invariant moved out of loop. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Loop Versioning found to be beneficial +; +; CHECK: for.inc14.loopexit19: +; CHECK-NEXT: %add11.lcssa = phi i32 [ %add11, %for.body3 ] +; CHECK-NEXT: store i32 %add11.lcssa, i32* %arrayidx10, align 4, !alias.scope !6, !noalias !6 +; CHECK-NEXT: br label %for.inc14 +; +define i32 @foo(i32* nocapture %var1, i32* nocapture %var2, i32* nocapture %var3, i32 %itr) #0 { +entry: + %cmp17 = icmp eq i32 %itr, 0 + br i1 %cmp17, label %for.end16, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc14 + %j.019 = phi i32 [ %j.1.lcssa, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ] + %i.018 = phi i32 [ %inc15, %for.inc14 ], [ 0, %for.cond1.preheader.preheader ] + %cmp215 = icmp ult i32 %j.019, %itr + br i1 %cmp215, label %for.body3.lr.ph, label %for.inc14 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %add = add i32 %i.018, %itr + %idxprom9 = zext i32 %i.018 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %var3, i64 %idxprom9 + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %j.116 = phi i32 [ %j.019, %for.body3.lr.ph ], [ %inc, %for.body3 ] + %idxprom = zext i32 %j.116 to i64 + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom + store i32 %add, i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %var2, i64 %idxprom + store i32 %add, i32* %arrayidx6, align 4 + %0 = load i32, i32* %arrayidx, align 4 + %1 = load i32, i32* %arrayidx10, align 4 + %add11 = add nsw i32 %1, %0 + store i32 %add11, i32* %arrayidx10, align 4 + %inc = add nuw i32 %j.116, 1 + %cmp2 = icmp ult i32 %inc, %itr + br i1 %cmp2, label %for.body3, label %for.inc14.loopexit + +for.inc14.loopexit: ; preds = %for.body3 + br label %for.inc14 + +for.inc14: ; preds = %for.inc14.loopexit, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.019, %for.cond1.preheader ], [ %itr, %for.inc14.loopexit ] + %inc15 = add nuw i32 %i.018, 1 + %cmp = icmp ult i32 %inc15, %itr + br i1 %cmp, label %for.cond1.preheader, label %for.end16.loopexit + +for.end16.loopexit: ; preds = %for.inc14 + br label %for.end16 + +for.end16: ; preds = %for.end16.loopexit, %entry + ret i32 0 +} Index: test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is not a candidate for LoopVersioningLICM. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: LAI: Runtime check not needed !! +; CHECK-NEXT: Loop instructions not suitable for LoopVersioningLICM + +define i32 @foo(i32* nocapture %var1, i32 %itr) #0 { +entry: + %cmp18 = icmp eq i32 %itr, 0 + br i1 %cmp18, label %for.end8, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc6 + %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ] + %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ] + %cmp216 = icmp ult i32 %j.020, %itr + br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %0 = zext i32 %j.020 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %1, %itr + store i32 %add, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc6, label %for.body3 + +for.inc6: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ] + %inc7 = add nuw i32 %i.019, 1 + %exitcond21 = icmp eq i32 %inc7, %itr + br i1 %exitcond21, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.inc6, %entry + ret i32 0 +} +