Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -4528,6 +4528,20 @@ !0 = !{!"llvm.loop.unroll.full"} +'``llvm.loop.licm_versioning``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata indicates that the loop is already versioned by loop versioning +for loop invariant code motion (LICM). This metadata is applicable on both the +original loop and all other loop versions. The loop-versioning-licm pass will +not create additional loop versions of loops with this metadata. The metadata +has a single operand which is the string ``llvm.loop.licm_versioning``. +For example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.licm_versioning"} + '``llvm.mem``' ^^^^^^^^^^^^^^^ Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -174,6 +174,7 @@ void initializeLoopRerollPass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); +void initializeLoopVersioningLICMPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); void initializeLowerAtomicPass(PassRegistry&); void initializeLowerBitSetsPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -110,6 +110,7 @@ (void) llvm::createLoopRerollPass(); (void) llvm::createLoopUnrollPass(); (void) llvm::createLoopUnswitchPass(); + (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); (void) llvm::createLoopRotatePass(); (void) llvm::createLowerExpectIntrinsicPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -205,6 +205,12 @@ //===----------------------------------------------------------------------===// // +// LoopVersioningLICM - This pass is a loop versioning pass for LICM. +// +Pass *createLoopVersioningLICMPass(); + +//===----------------------------------------------------------------------===// +// // PromoteMemoryToRegister - This pass is used to promote memory references to // be register references. A simple example of the transformation performed by // this pass is: Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -374,6 +374,14 @@ /// \brief Returns the instructions that use values defined in the loop. SmallVector findDefsUsedOutsideOfLoop(Loop *L); + +/// \brief Check string metadata into loop, if it exist return true, +/// else return false. +bool checkStringMetadataIntoLoop(Loop * TheLoop, StringRef Name); + +/// \brief Set input string into loop metadata by keeping other values intact. +void writeStringMetadataIntoLoop(Loop *TheLoop, const char *MDString, + unsigned V = 0); } #endif Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -104,6 +104,10 @@ "enable-loop-load-elim", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopLoadElimination Pass")); +static cl::opt UseLoopVersioningLICM( + "loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental Loop Versioning LICM pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -339,6 +343,10 @@ // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (UseLoopVersioningLICM) { + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass()); // Hoist loop invariants + } if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -27,6 +27,7 @@ LoopStrengthReduce.cpp LoopUnrollPass.cpp LoopUnswitch.cpp + LoopVersioningLICM.cpp LowerAtomic.cpp LowerExpectIntrinsic.cpp MemCpyOptimizer.cpp Index: lib/Transforms/Scalar/LoopVersioningLICM.cpp =================================================================== --- lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -0,0 +1,646 @@ +//===----------- LoopVersioningLICM.cpp - LICM Loop Versioning ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Most of the time when alias analysis is uncertain about memory access and it +// assumes may-alias. This uncertainty from alias analysis restricts LICM from +// proceeding further. In cases where alias analysis is uncertain we will use +// loop versioning as an alternative. +// +// Loop Versioning will create a version of the loop with aggressive aliasing +// assumptions and the other with conservative (default) aliasing assumptions. +// The version of the loop making aggressive aliasing assumptions will have all +// the memory accesses marked as no-alias. These two versions of loop will be +// preceded by a memory runtime check. This runtime check consists of bound +// checks for all unique memory accessed in loop, and it ensures the lack of +// memory aliasing. Based on this check result at runtime any of the loop gets +// executed, If the runtime check detects any memory aliasing, then the original +// loop is executed. Otherwise, the version with aggressive aliasing assumptions +// is used. +// +// Following are the top level steps: +// +// a) Perform LoopVersioningLICM feasibility check. +// b) If loop is a candidate for versioning then create a memory bound check, +// by considering all the memory access in loop body. +// c) Clone original loop and set all memory access as no-alias in new loop. +// d) Set original loop & versioned loop as a branch target of runtime check +// result. +// +// It transform loop as shown below: +// +// +----------------+ +// |Runtime Memcheck| +// +----------------+ +// | +// +----------+----------------+----------+ +// | | +// +---------+----------+ +-----------+----------+ +// |Orig Loop Preheader | |Cloned Loop Preheader | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop Body | |Cloned Loop Body | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop|Exit Block| |Cloned Loop Exit Block| +// +--------------------+ +-----------+----------+ +// | | +// +----------+--------------+-----------+ +// | +// +-----+----+ +// |Join Block| +// +----------+ +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +#define DEBUG_TYPE "loop-versioning-licm" +#define LOOP_VERSIONING_LICM_METADATA "llvm.loop.licm_versioning" + +using namespace llvm; + +/// Threshold minimum allowed percentage for possible +/// invariant instruction in a loop. +static cl::opt + LVInvarThreshold("-licm-versioning-invariant-threshold", + cl::desc("LoopVersioningLICM threshold minimum " + "allowed percentage for possible " + "invariant instruction in a loop"), + cl::init(25), cl::Hidden); + +/// Threshold for maximum allowed loop nest/depth +static cl::opt LVLoopDepthThreshold( + "loopver-max-depth-threshold", + cl::desc( + "LoopVersioningLICM threshold for maximum allowed loop nest/depth"), + cl::init(2), cl::Hidden); + +/// Threshold for maximum possible pointers in allowed memcheck +static cl::opt + LVMemchkPtrThreshold("loopver-memcheck-ptr-threshold", + cl::desc("LoopVersioningLICM threshold for maximum " + "possible pointers allowed in memcheck"), + cl::init(5), cl::Hidden); + +/// \brief Create MDNode for input string. +static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + Metadata *MDs[] = { + MDString::get(Context, Name), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; + return MDNode::get(Context, MDs); +} + +/// \brief Check string metadata in loop, if it exist return true, +/// else return false. +bool llvm::checkStringMetadataIntoLoop(Loop *TheLoop, StringRef Name) { + MDNode *LoopID = TheLoop->getLoopID(); + // Return false if LoopID is false. + if (!LoopID) + return false; + // Iterate over LoopID operands and look for MDString Metadata + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast(LoopID->getOperand(i)); + if (!MD) + continue; + MDString *S = dyn_cast(MD->getOperand(0)); + if (!S) + continue; + // Return true if MDString holds expected MetaData. + if (Name.equals(S->getString())) + return true; + } + return false; +} + +/// \brief Set input string into loop metadata by keeping other values intact. +void llvm::writeStringMetadataIntoLoop(Loop *TheLoop, const char *MDString, + unsigned V) { + SmallVector MDs(1); + // If the loop already has metadata, retain it. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast(LoopID->getOperand(i)); + MDs.push_back(Node); + } + } + // Add new metadata. + MDs.push_back(createStringMetadata(TheLoop, MDString, V)); + // Replace current metadata node with new one. + LLVMContext &Context = TheLoop->getHeader()->getContext(); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + TheLoop->setLoopID(NewLoopID); +} + +namespace { +struct LoopVersioningLICM : public LoopPass { + static char ID; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } + + using llvm::Pass::doFinalization; + + bool doFinalization() override { return false; } + + LoopVersioningLICM() + : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr), + TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false), + Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr), + RuntimeMemoryCheckThreshold(LVMemchkPtrThreshold), + LoopDepthThreshold(LVLoopDepthThreshold), + InvariantThreshold(LVInvarThreshold), LoadAndStoreCounter(0), + InvariantCounter(0), IsReadOnlyLoop(true) { + initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry()); + } + + AliasAnalysis *AA; // Current AliasAnalysis information + ScalarEvolution *SE; // Current ScalarEvolution + LoopInfo *LI; // Current LoopInfo + DominatorTree *DT; // Dominator Tree for the current Loop. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + LoopAccessAnalysis *LAA; // Current LoopAccessAnalysis + const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo + + bool Changed; // Set to true when we change anything. + BasicBlock *Preheader; // The preheader block of the current loop. + Loop *CurLoop; // The current loop we are working on. + AliasSetTracker *CurAST; // AliasSet information for the current loop. + ValueToValueMap Strides; + + unsigned RuntimeMemoryCheckThreshold; // Pointer limit for runtime check. + unsigned LoopDepthThreshold; // Maximum loop nest threshold + float InvariantThreshold; // Minimum invariant threshold + unsigned LoadAndStoreCounter; // Counter to track num of load & store + unsigned InvariantCounter; // Counter to track num of invariant + bool IsReadOnlyLoop; // Read only loop marker. + + bool isLegalForVersioning(); + bool legalLoopStructure(); + bool legalLoopInstructions(); + bool legalLoopMemoryAccesses(); + void collectStridedAccess(Value *LoadOrStoreInst); + bool isLoopAlreadyVisited(); + void setNoAliasToLoop(Loop *); + bool instructionSafeForVersioning(Instruction *); + const char *getPassName() const override { return "Loop Versioning"; } +}; +} + +/// \brief Collects stride access from a given value. +void LoopVersioningLICM::collectStridedAccess(Value *MemAccess) { + Value *Ptr = nullptr; + if (LoadInst *LI = dyn_cast(MemAccess)) + Ptr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast(MemAccess)) + Ptr = SI->getPointerOperand(); + else + return; + + Value *Stride = getStrideFromPointer(Ptr, SE, CurLoop); + if (!Stride) + return; + + DEBUG(dbgs() << "Found a strided access that we can version"); + DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + Strides[Ptr] = Stride; +} + +/// \brief Check loop structure and confirms it's good for LoopVersioningLICM. +bool LoopVersioningLICM::legalLoopStructure() { + // Loop must have a preheader, if not return false. + if (!CurLoop->getLoopPreheader()) { + DEBUG(dbgs() << " loop preheader is missing\n"); + return false; + } + // Loop should be innermost loop, if not return false. + if (CurLoop->getSubLoops().size()) { + DEBUG(dbgs() << " loop is not innermost\n"); + return false; + } + // Loop should have a single backedge, if not return false. + if (CurLoop->getNumBackEdges() != 1) { + DEBUG(dbgs() << " loop has multiple backedges\n"); + return false; + } + // Loop must have a single exiting block, if not return false. + if (!CurLoop->getExitingBlock()) { + DEBUG(dbgs() << " loop has multiple exiting block\n"); + return false; + } + // We only handle bottom-tested loop, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) { + DEBUG(dbgs() << " loop is not bottom tested\n"); + return false; + } + // Loop depth more then LoopDepthThreshold are not allowed + if (CurLoop->getLoopDepth() > LoopDepthThreshold) { + DEBUG(dbgs() << " loop depth is more then threshold\n"); + return false; + } + // Loop should have a dedicated exit block, if not return false. + if (!CurLoop->hasDedicatedExits()) { + DEBUG(dbgs() << " loop does not has dedicated exit blocks\n"); + return false; + } + // Loop should have a trip count, if not return false. + const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); + if (ExitCount == SE->getCouldNotCompute()) { + DEBUG(dbgs() << " loop does not has trip count\n"); + return false; + } + return true; +} + +/// \brief Check memory accesses in loop and confirms it's good for +/// LoopVersioningLICM. +bool LoopVersioningLICM::legalLoopMemoryAccesses() { + assert(LAI != nullptr && "LoopAccessInfo shouldn't be null"); + // Check if LoopAccessAnalysis finds loop memory access safe + // for LoopVersioningLICM. If unsafe return false. + if (!LAI->canVectorizeMemory()) { + DEBUG(dbgs() << " LAA found unsafe memory access.\n"); + return false; + } + bool HasMayAlias = false; + bool TypeSafety = false; + bool HasMod = false; + SmallPtrSet PointerSet; + // Memory check: + // Transform phase will generate a versioned loop and also a runtime check to + // ensure the pointers are independent and they don’t alias. + // In version variant of loop, alias meta data asserts that all access are + // mutually independent. + // + // Pointers aliasing in alias domain are avoided because with multiple + // aliasing domains we may not be able to hoist potential loop invariant + // access out of the loop. + // + // Iterate over alias tracker sets, and confirm following: + // 1) Make sure PointerSet should have at least minimum pointers for + // runtime bound check. + // 2) PointerSet shouldn't have pointers more then RuntimeMemoryCheckThreshold + // 3) Make sure AliasSets doesn't have any must alias set. + for (const auto &I : *CurAST) { + const AliasSet &AS = I; + // Skip Forward Alias Sets, as this should be ignored as part of + // the AliasSetTracker object. + if (AS.isForwardingAliasSet()) + continue; + // With MustAlias its not worth adding runtime bound check. + if (AS.isMustAlias()) + return false; + Value *SomePtr = AS.begin()->getValue(); + bool TypeCheck = true; + // Check for Mod & MayAlias + HasMayAlias |= AS.isMayAlias(); + HasMod |= AS.isMod(); + for (const auto &A : AS) { + Value *Ptr = A.getValue(); + // Alias tracker should have pointers of same data type. + TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType())); + // Insert memory to PointerSet + PointerSet.insert(Ptr); + // Check any memory modified in loop body. + } + // At least one alias tracker should have pointers of same data type. + TypeSafety |= TypeCheck; + } + // Ensure types should be of same type. + if (!TypeSafety) { + DEBUG(dbgs() << " Alias tracker type safety failed!\n"); + return false; + } + // Ensure loop body shouldn't be read only. + if (!HasMod) { + DEBUG(dbgs() << " No memory modified in loop body\n"); + return false; + } + // Make sure alias set has may alias case. + // If there no alias memory ambiguity, return false. + if (!HasMayAlias) { + DEBUG(dbgs() << " No ambiguity in memory access.\n"); + return false; + } + // At least 2 pointers needed for runtime check. + if (PointerSet.size() <= 1) { + DEBUG(dbgs() << " Didn't found sufficient pointers in loop body\n"); + return false; + } + // Check memory threshold, should be in limit. + if (PointerSet.size() > RuntimeMemoryCheckThreshold) { + DEBUG(dbgs() << " Loop body has pointers more then defined threshold\n"); + return false; + } + return true; +} + +/// \brief Check loop instructions safe for Loop versioning. +/// It returns true if it's safe else returns false. +/// Consider following: +/// 1) Check all load store in loop body are non atomic & non volatile. +/// 2) Check function call safety, by ensuring its not accessing memory. +/// 3) Loop body shouldn't have any may throw instruction. +bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { + assert(I != nullptr && "Null instruction found!"); + // Check function call safety + if (dyn_cast(I) && !AA->doesNotAccessMemory(CallSite(I))) { + DEBUG(dbgs() << " Unsafe call site found.\n"); + return false; + } + // Avoid loops with possiblity of throw + if (I->mayThrow()) { + DEBUG(dbgs() << " May throw instruction found in loop body\n"); + return false; + } + // Parallel loops must not have aliasing loop-invariant memory accesses. + // Hence we don't need to version anything in this case. + if (CurLoop->isAnnotatedParallel()) { + DEBUG(dbgs() << " Parallel loop is not worth versioning\n"); + return false; + } + // If current instruction is load instructions + // make sure it's a simple load (non atomic & non volatile) + if (I->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast(I); + if (!Ld || !Ld->isSimple()) { + DEBUG(dbgs() << " Found a non-simple load.\n"); + return false; + } + LoadAndStoreCounter++; + collectStridedAccess(Ld); + Value *Ptr = Ld->getPointerOperand(); + // Check loop invariant. + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + } + // If current instruction is store instruction + // make sure it's a simple store (non atomic & non volatile) + else if (I->mayWriteToMemory()) { + StoreInst *St = dyn_cast(I); + if (!St || !St->isSimple()) { + DEBUG(dbgs() << " Found a non-simple store.\n"); + return false; + } + LoadAndStoreCounter++; + collectStridedAccess(St); + Value *Ptr = St->getPointerOperand(); + // Check loop invariant. + if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) + InvariantCounter++; + + IsReadOnlyLoop = false; + } + return true; +} + +/// \brief Check loop instructions and confirms it's good for +/// LoopVersioningLICM. +bool LoopVersioningLICM::legalLoopInstructions() { + // Resetting counters. + LoadAndStoreCounter = 0; + InvariantCounter = 0; + IsReadOnlyLoop = true; + // Iterate over loop blocks and instructions of each block and check + // instruction safety. + for (auto *Block : CurLoop->getBlocks()) + for (auto &Inst : *Block) { + // If instruction in unsafe just return false. + if (!instructionSafeForVersioning(&Inst)) + return false; + } + // Get LoopAccessInfo from current loop. + LAI = &LAA->getInfo(CurLoop, Strides); + // Check LoopAccessInfo for need of runtime check. + if (!LAI->getRuntimePointerChecking()->Need) { + DEBUG(dbgs() << " LAI: Runtime check not needed !!\n"); + return false; + } + // Loop should have at least one invariant load or store instruction. + if (!InvariantCounter) { + DEBUG(dbgs() << " Invariant not found !!\n"); + return false; + } + // Read only loop not allowed. + if (IsReadOnlyLoop) { + DEBUG(dbgs() << " Found a read-only loop!\n"); + return false; + } + // Profitablity check: + // Check invariant threshold, should be in limit. + if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) { + DEBUG(dbgs() + << " Invariant load & store are less then defined threshold\n"); + DEBUG(dbgs() << " Invariant loads & stores: " + << ((InvariantCounter * 100) / LoadAndStoreCounter) << "%\n"); + DEBUG(dbgs() << " Invariant loads & store threshold: " + << InvariantThreshold << "%\n"); + return false; + } + return true; +} + +/// \brief It checks loop is already visited or not. +/// check loop meta data, If loop revisited return true +/// else false. +bool LoopVersioningLICM::isLoopAlreadyVisited() { + // Check LoopVersioningLICM metadata into loop + if (checkStringMetadataIntoLoop(CurLoop, LOOP_VERSIONING_LICM_METADATA)) { + return true; + } + return false; +} + +/// \brief Checks legality for LoopVersioningLICM by considering following: +/// a) loop structure legality b) loop instruction legality +/// c) loop memory access legality. +/// Return true if legal else returns false. +bool LoopVersioningLICM::isLegalForVersioning() { + DEBUG(dbgs() << "Loop: " << *CurLoop); + // Make sure not re-visiting same loop again. + if (isLoopAlreadyVisited()) { + DEBUG( + dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n"); + return false; + } + // Check loop structure leagality. + if (!legalLoopStructure()) { + DEBUG( + dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Check loop instruction leagality. + if (!legalLoopInstructions()) { + DEBUG(dbgs() + << " Loop instructions not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Check loop memory access leagality. + if (!legalLoopMemoryAccesses()) { + DEBUG(dbgs() + << " Loop memory access not suitable for LoopVersioningLICM\n\n"); + return false; + } + // Loop versioning is feasible, return true. + DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n"); + return true; +} + +/// \brief Update loop with aggressive aliasing assumptions. +/// It marks load & store memory instructions with no-alias. +void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { + // Get latch terminator instruction. + Instruction *I = VerLoop->getLoopLatch()->getTerminator(); + // Create alias scope domain. + MDBuilder MDB(I->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); + StringRef Name = "LVAliasScope"; + SmallVector Scopes, NoAliases; + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + // Iterate over each instruction of loop. + // set no-alias for all load & store instructions. + for (auto *Block : CurLoop->getBlocks()) { + for (auto &Inst : *Block) { + // Only interested in instruction that may modify or read memory. + if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory()) + continue; + Scopes.push_back(NewScope); + NoAliases.push_back(NewScope); + // Set no-alias for current instruction. + Inst.setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias), + MDNode::get(Inst.getContext(), NoAliases))); + // set alias-scope for current instruction. + Inst.setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Inst.getContext(), Scopes))); + } + } +} + +bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + Changed = false; + // Get Analysis information. + LI = &getAnalysis().getLoopInfo(); + AA = &getAnalysis().getAAResults(); + SE = &getAnalysis().getSE(); + DT = &getAnalysis().getDomTree(); + TLI = &getAnalysis().getTLI(); + LAA = &getAnalysis(); + LAI = nullptr; + // Set Current Loop + CurLoop = L; + // Get the preheader block. + Preheader = L->getLoopPreheader(); + // Initial allocation + CurAST = new AliasSetTracker(*AA); + + // Loop over the body of this loop, construct AST. + for (auto *Block : L->getBlocks()) { + if (LI->getLoopFor(Block) == L) // Ignore blocks in subloop. + CurAST->add(*Block); // Incorporate the specified basic block + } + // Check feasiblity of LoopVersioningLICM. + // If versioning found to be feasible and beneficial then proceed + // else simply return, by cleaning up memory. + if (isLegalForVersioning()) { + // Do loop versioning. + // Create memcheck for memory accessed inside loop. + // Clone original loop, and set blocks properly. + LoopVersioning LVer(*LAI, CurLoop, LI, DT, SE, true); + LVer.versionLoop(); + // Set Loop Versioning metaData for original loop. + writeStringMetadataIntoLoop(LVer.getNonVersionedLoop(), + LOOP_VERSIONING_LICM_METADATA); + // Set Loop Versioning metaData for version loop. + writeStringMetadataIntoLoop(LVer.getVersionedLoop(), + LOOP_VERSIONING_LICM_METADATA); + // Set "llvm.mem.parallel_loop_access" metaData to versioned loop. + writeStringMetadataIntoLoop(LVer.getVersionedLoop(), + "llvm.mem.parallel_loop_access"); + // Update version loop with aggressive aliasing assumption. + setNoAliasToLoop(LVer.getVersionedLoop()); + // Update Dominator tree with new changes. + DT->recalculate(*CurLoop->getHeader()->getParent()); + Changed = true; + } + // Delete allocated memory. + delete CurAST; + return Changed; +} + +char LoopVersioningLICM::ID = 0; +INITIALIZE_PASS_BEGIN(LoopVersioningLICM, "do-loop-versioning-licm", + "Loop Versioning For LICM", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(LoopVersioningLICM, "do-loop-versioning-licm", + "Loop Versioning For LICM", false, false) + +Pass *llvm::createLoopVersioningLICMPass() { return new LoopVersioningLICM(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -56,6 +56,7 @@ initializeLoopRerollPass(Registry); initializeLoopUnrollPass(Registry); initializeLoopUnswitchPass(Registry); + initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizePass(Registry); initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); Index: test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is a candidate for LoopVersioningLICM. +; It also confirms invariant moved out of loop. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: Loop Versioning found to be beneficial +; +; CHECK: for.body3: +; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ] +; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ] +; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom +; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !6, !noalias !6 +; CHECK-NEXT: %add8 = add nsw i32 %add86, %add +; CHECK-NEXT: %inc = add nuw i32 %j.113, 1 +; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr +; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !7 +define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 { +entry: + %cmp14 = icmp eq i32 %itr, 0 + br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11 + %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ] + %cmp212 = icmp ult i32 %j.016, %itr + br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %add = add i32 %i.015, %itr + %idxprom6 = zext i32 %i.015 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6 + br label %for.body3 + +for.body3: ; preds = %for.body3.lr.ph, %for.body3 + %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ] + %idxprom = zext i32 %j.113 to i64 + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom + store i32 %add, i32* %arrayidx, align 4 + %0 = load i32, i32* %arrayidx7, align 4 + %add8 = add nsw i32 %0, %add + store i32 %add8, i32* %arrayidx7, align 4 + %inc = add nuw i32 %j.113, 1 + %cmp2 = icmp ult i32 %inc, %itr + br i1 %cmp2, label %for.body3, label %for.inc11.loopexit + +for.inc11.loopexit: ; preds = %for.body3 + br label %for.inc11 + +for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ] + %inc12 = add nuw i32 %i.015, 1 + %cmp = icmp ult i32 %inc12, %itr + br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit + +for.end13.loopexit: ; preds = %for.inc11 + br label %for.end13 + +for.end13: ; preds = %for.end13.loopexit, %entry + ret i32 0 +} + Index: test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm -disable-loop-unrolling 2>&1 | FileCheck %s +; +; Test to confirm loop is a good candidate for LoopVersioningLICM +; It also confirms invariant moved out of loop. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3.us
+; CHECK-NEXT: Loop Versioning found to be beneficial +; +; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit5: ; preds = %for.body3.us +; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ] +; CHECK-NEXT: store float %add14.us.lcssa, float* %arrayidx.us, align 4, !alias.scope !7, !noalias !8 +; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us +; +define i32 @foo(float* nocapture %var2, float** nocapture readonly %var3, i32 %itr) #0 { +entry: + %cmp38 = icmp sgt i32 %itr, 1 + br i1 %cmp38, label %for.body3.lr.ph.us, label %for.end19 + +for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us + %0 = phi float [ %.pre, %for.body3.lr.ph.us ], [ %add14.us, %for.body3.us ] + %indvars.iv = phi i64 [ 1, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ] + %1 = trunc i64 %indvars.iv to i32 + %conv.us = sitofp i32 %1 to float + %add.us = fadd float %conv.us, %0 + %arrayidx7.us = getelementptr inbounds float, float* %3, i64 %indvars.iv + store float %add.us, float* %arrayidx7.us, align 4 + %2 = load float, float* %arrayidx.us, align 4 + %add14.us = fadd float %2, %add.us + store float %add14.us, float* %arrayidx.us, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.cond1.for.inc17_crit_edge.us, label %for.body3.us + +for.body3.lr.ph.us: ; preds = %entry, %for.cond1.for.inc17_crit_edge.us + %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.cond1.for.inc17_crit_edge.us ], [ 1, %entry ] + %arrayidx.us = getelementptr inbounds float, float* %var2, i64 %indvars.iv40 + %arrayidx6.us = getelementptr inbounds float*, float** %var3, i64 %indvars.iv40 + %3 = load float*, float** %arrayidx6.us, align 8 + %.pre = load float, float* %arrayidx.us, align 4 + br label %for.body3.us + +for.cond1.for.inc17_crit_edge.us: ; preds = %for.body3.us + %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1 + %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32 + %exitcond43 = icmp eq i32 %lftr.wideiv42, %itr + br i1 %exitcond43, label %for.end19, label %for.body3.lr.ph.us + +for.end19: ; preds = %for.cond1.for.inc17_crit_edge.us, %entry + ret i32 0 +} Index: test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll =================================================================== --- test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll +++ test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -O1 -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s +; +; Test to confirm loop is not a candidate for LoopVersioningLICM. +; +; CHECK: Loop: Loop at depth 2 containing: %for.body3
+; CHECK-NEXT: LAI: Runtime check not needed !! +; CHECK-NEXT: Loop instructions not suitable for LoopVersioningLICM + +define i32 @foo(i32* nocapture %var1, i32 %itr) #0 { +entry: + %cmp18 = icmp eq i32 %itr, 0 + br i1 %cmp18, label %for.end8, label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc6 + %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ] + %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ] + %cmp216 = icmp ult i32 %j.020, %itr + br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6 + +for.body3.lr.ph: ; preds = %for.cond1.preheader + %0 = zext i32 %j.020 to i64 + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.body3.lr.ph + %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] + %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv + %1 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %1, %itr + store i32 %add, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %itr + br i1 %exitcond, label %for.inc6, label %for.body3 + +for.inc6: ; preds = %for.body3, %for.cond1.preheader + %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ] + %inc7 = add nuw i32 %i.019, 1 + %exitcond21 = icmp eq i32 %inc7, %itr + br i1 %exitcond21, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.inc6, %entry + ret i32 0 +} +