Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -155,6 +155,7 @@ void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAPass(PassRegistry&); void initializeLICMPass(PassRegistry&); +void initializeLoopVersioningPass(PassRegistry&); void initializeLazyValueInfoPass(PassRegistry&); void initializeLibCallAliasAnalysisPass(PassRegistry&); void initializeLintPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -95,6 +95,7 @@ (void) llvm::createJumpInstrTablesPass(); (void) llvm::createLCSSAPass(); (void) llvm::createLICMPass(); + (void) llvm::createLoopVersioningPass(); (void) llvm::createLazyValueInfoPass(); (void) llvm::createLoopExtractorPass(); (void) llvm::createLoopSimplifyPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -140,6 +140,13 @@ //===----------------------------------------------------------------------===// // +// LoopVersioning - This pass is a loop multi versioning pass. +// +Pass *createLoopVersioningPass(); +extern char &LoopVersioningID; + +//===----------------------------------------------------------------------===// +// // LoopStrengthReduce - This pass is strength reduces GEP instructions that use // a loop's canonical induction variable as one of their indices. // Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -55,6 +55,10 @@ cl::init(true), cl::Hidden, cl::desc("Enable the new, experimental SROA pass")); +static cl::opt UseLoopVersioning("loop-versioning", + cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental Loop Versioning pass")); + static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); @@ -194,9 +198,15 @@ MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createGlobalOptimizerPass()); // Optimize out global vars - MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - + // Do loop versioning if right option provided, default disabled. + if (UseLoopVersioning) + { + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopVersioningPass()); // Do Loop Versioning + } MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -14,6 +14,7 @@ IndVarSimplify.cpp JumpThreading.cpp LICM.cpp + LoopVersioning.cpp LoadCombine.cpp LoopDeletion.cpp LoopIdiomRecognize.cpp Index: lib/Transforms/Scalar/LoopVersioning.cpp =================================================================== --- lib/Transforms/Scalar/LoopVersioning.cpp +++ lib/Transforms/Scalar/LoopVersioning.cpp @@ -0,0 +1,855 @@ +//===-- LoopVersioning.cpp - Creates new version of a loop ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/MDBuilder.h" + + +using namespace llvm; + +#define DEBUG_TYPE "LoopVersioning" +typedef SmallPtrSet ValueSet; + +// Maximum memory/pointer limit for memory check. +static const unsigned RuntimeMemoryCheckThreshold = 5; +// Maximum loop nest threshold +static const unsigned LoopDepthThreshold = 2; + +/// \brief Find the operand of the GEP that should be checked for consecutive +/// stores. This ignores trailing indices that have no effect on the final +/// pointer. +static unsigned getGEPInductionOperand(const DataLayout *DL, + const GetElementPtrInst *Gep) { + unsigned LastOperand = Gep->getNumOperands() - 1; + unsigned GEPAllocSize = DL->getTypeAllocSize( + cast(Gep->getType()->getScalarType())->getElementType()); + + // Walk backwards and try to peel off zeros. + while (LastOperand > 1 && + match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) { + // Find the type we're currently indexing into. + gep_type_iterator GEPTI = gep_type_begin(Gep); + std::advance(GEPTI, LastOperand - 1); + + // If it's a type with the same allocation size as the result of the GEP we + // can peel off the zero index. + if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize) + break; + --LastOperand; + } + + return LastOperand; +} + +///\brief Remove GEPs whose indices but the last one are loop invariant and +/// return the induction operand of the gep pointer. +static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, + const DataLayout *DL, Loop *Lp) { + GetElementPtrInst *GEP = dyn_cast(Ptr); + if (!GEP) + return Ptr; + + unsigned InductionOperand = getGEPInductionOperand(DL, GEP); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) + return Ptr; + return GEP->getOperand(InductionOperand); +} + +static Instruction * getFirstInst(Instruction *FirstInst, Value *V, + Instruction *Loc) { + if (FirstInst) + return FirstInst; + if (Instruction *I = dyn_cast(V)) + return I->getParent() == Loc->getParent() ? I : nullptr; + return nullptr; +} + +/// cloneLoop - Recursively clone the specified loop and all of its children, +/// mapping the blocks with the specified map. +static Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, + LoopInfo *LI, LPPassManager *LPM) { + Loop *New = new Loop(); + LPM->insertLoop(New, PL); + + // Add all of the blocks in L to the new loop. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) + New->addBasicBlockToLoop(cast(VM[*I]), *LI); + + // Add all of the subloops to the new loop. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + cloneLoop(*I, New, VM, LI, LPM); + + return New; +} + +struct RuntimeMemoryCheck { + private: + Loop *CurLoop; + ScalarEvolution *SE; + const DataLayout *DL; + ValueSet *PointerSet; + SmallVector , 2> Starts; + SmallVector , 2> Ends; + public: + std::pair createRuntimeCheck(Instruction *Loc); + RuntimeMemoryCheck(Loop *CurLoop, ScalarEvolution *SE, const DataLayout *, + ValueSet *IPointerSet); + }; + +RuntimeMemoryCheck::RuntimeMemoryCheck(Loop *ICurLoop, ScalarEvolution *ISE, + const DataLayout *IDL, + ValueSet *IPointerSet) + :CurLoop(ICurLoop), SE(ISE), + DL(IDL), PointerSet(IPointerSet) { + Starts.clear(); + Ends.clear(); +} + +std::pair +RuntimeMemoryCheck::createRuntimeCheck(Instruction *Loc) +{ + LLVMContext &Ctx = Loc->getContext(); + SCEVExpander Exp(*SE, "LVer.induction"); + Instruction *FirstInst = nullptr; + for (ValueSet::const_iterator I = PointerSet->begin(), E = PointerSet->end(); + I != E; ++I) { + Value* Ptr = *I; + const SCEV *Sc = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast(Sc); + + assert(AR != 0 && "Null SCEVAddRecExpr found"); + + if(CurLoop->isLoopInvariant(Ptr)) + { + Starts.push_back(Ptr); + Ends.push_back(Ptr); + } + else + { + const SCEV *Ex = SE->getBackedgeTakenCount(CurLoop); + const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc); + Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc); + Starts.push_back(Start); + Ends.push_back(End); + } + } + + IRBuilder<> ChkBuilder(Loc); + + Value *MemoryRuntimeCheck = nullptr; + for (unsigned i = 0; i < PointerSet->size(); ++i) { + for (unsigned j = i+1; j < PointerSet->size(); ++j) { + + unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); + unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); + + assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && + (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && + "Trying to bounds check pointers with different address spaces"); + + Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); + Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + + Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "LVer.bound0"); + FirstInst = getFirstInst(FirstInst, Cmp0, Loc); + + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "LVer.bound1"); + FirstInst = getFirstInst(FirstInst, Cmp1, Loc); + + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "LVer.found.conflict"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + + if (MemoryRuntimeCheck) { + IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, + "LVer.conflict.rdx"); + FirstInst = getFirstInst(FirstInst, IsConflict, Loc); + } + MemoryRuntimeCheck = IsConflict; + } + } + + + Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck, + ConstantInt::getTrue(Ctx)); + ChkBuilder.Insert(Check, "LVer.memcheck.conflict"); + FirstInst = getFirstInst(FirstInst, Check, Loc); + return std::make_pair(FirstInst, Check); +} + + +namespace { + struct LoopVersioning : public LoopPass { + static char ID; + + LoopVersioning() : LoopPass(ID) { + initializeLoopVersioningPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + using llvm::Pass::doFinalization; + + bool doFinalization() override { + return false; + } + + AliasAnalysis *AA; // Current AliasAnalysis information + ScalarEvolution *SE; + LoopInfo *LI; // Current LoopInfo + DominatorTree *DT; // Dominator Tree for the current Loop. + const DataLayout *DL; // DataLayout for constant folding. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + + bool Changed; // Set to true when we change anything. + BasicBlock *Preheader; // The preheader block of the current loop... + Loop *CurLoop; // The current loop we are working on... + AliasSetTracker *CurAST; // AliasSet information for the current loop... + ValueSet *PointerSet; + MapVector * UniquePointer; + + bool hasComputableBounds(ScalarEvolution *, Value *); + bool isUniquePointer(Value *); + bool isGuaranteedToExecute(Instruction &Inst); + bool isVersioningBeneficial(); + Loop * versionizeLoop(LPPassManager &); + void splitExitEdges(Loop *, const SmallVectorImpl &); + + const char *getPassName() const override { + return "Loop Versioning"; + } + }; +} + +bool LoopVersioning::hasComputableBounds(ScalarEvolution *SE, Value *Ptr) { + const SCEV *PtrScev = SE->getSCEV(Ptr); + const SCEVAddRecExpr *AR = dyn_cast(PtrScev); + if (AR) + return AR->isAffine(); + return false; +} + +/// splitExitEdges - Split all of the edges from inside the loop to their exit +/// blocks. Update the appropriate Phi nodes as we do so. +void LoopVersioning::splitExitEdges(Loop *L, + const SmallVectorImpl &ExitBlocks){ + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + SmallVector Preds(pred_begin(ExitBlock), + pred_end(ExitBlock)); + + // Although SplitBlockPredecessors doesn't preserve loop-simplify in + // general, if we call it on all predecessors of all exits then it does. + if (!ExitBlock->isLandingPad()) { + SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", AA, DT, + LI, this->mustPreserveAnalysisID(LCSSAID)); + } else { + SmallVector NewBBs; + SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", + NewBBs, AA, DT, LI); + } + } +} + +bool LoopVersioning::isUniquePointer(Value * Ptr) +{ + // Get GetElementPtrInst, If not available return false. + GetElementPtrInst *I = dyn_cast(Ptr); + if (!I) + return false; + + // Check for input Value pointer: + // Query unique pointer map to see pointer already visited. + // if its a visited pointer, simply return false. + MapVector::const_iterator It = UniquePointer->find(Ptr); + if(It != UniquePointer->end()) + { + ++(*UniquePointer)[Ptr]; + return false; + } + // Its a new memory, update map with it. + ++(*UniquePointer)[Ptr]; + + // Check for GetElementPtrInst operand: + // Get pointer operand from instruction + // Query unique pointer map to see pointer operand already visited. + // if visited, simply return false. + Value *PtrOprnd = I->getPointerOperand(); + It = UniquePointer->find(PtrOprnd); + if(It != UniquePointer->end()) + { + ++(*UniquePointer)[PtrOprnd]; + return false; + } + // Its a new memory, update map with it. + ++(*UniquePointer)[PtrOprnd]; + + // Check for GEP induction operand: + // Get pointer operand from GEP for induction operand + // Query unique pointer map to see pointer operand already visited. + // if visited, simply return false. + Value *StripPtr = stripGetElementPtr(Ptr, SE, DL, CurLoop); + It = UniquePointer->find(StripPtr); + if(It != UniquePointer->end()) + { + ++(*UniquePointer)[StripPtr]; + return false; + } + // Its a new memory, update map with it. + ++(*UniquePointer)[StripPtr]; + return true; +} + +bool LoopVersioning::isVersioningBeneficial() +{ + // Loop must have a preheader, if not return false. + if (!CurLoop->getLoopPreheader()) { + DEBUG(dbgs() << "loop preheader is missing"); + return false; + } + // Loop should be innermost loops, if not return false. + if(CurLoop->getSubLoops().size()){ + DEBUG(dbgs() << "loop is not innermost"); + return false; + } + // Loop should have a single backedge, if not return false. + if (CurLoop->getNumBackEdges() != 1) { + DEBUG(dbgs() << "loop has multiple backedges"); + return false; + } + // Loop must have a single exiting block, if not return false. + if (!CurLoop->getExitingBlock()) { + DEBUG(dbgs() << "loop has multiple exiting block"); + return false; + } + // We only handle bottom-tested loops, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) { + DEBUG(dbgs() << "loop control flow is not understood by LoopVersioning"); + return false; + } + // Loop more then LoopDepthThreshold are not allowed + if(CurLoop->getLoopDepth() > LoopDepthThreshold) { + DEBUG(dbgs() << "loop depth is more then threshold"); + return false; + } + // PAS: Loop should have a dedicated exit block, if not return false. + if(!CurLoop->hasDedicatedExits()) { + DEBUG(dbgs() << "loop does not has dedicated exit blocks"); + return false; + } + // Loop should have a trip count, if not return false. + // ScalarEvolution needs to be able to find the exit count. + const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); + if (ExitCount == SE->getCouldNotCompute()) { + DEBUG(dbgs() << "loop does not has trip count"); + return false; + } + + // Counters with initial values. + unsigned NumLoads = 0; + unsigned NumStores = 0; + bool StoreToInvar = false; + bool GuaranteedToExecution = false; + // Check parallel loop + const bool IsAnnotatedParallel = CurLoop->isAnnotatedParallel(); + // Instruction check: + // Iterate over loop blocks and instructions of each block. + // 1) Check all load store in loop body are non atomic & non volatile. + // 2) Make sure loop body should not have any call instruction. + // 3) Check store instruction execution guarantee. + // 4) Loop body should have a invariant store. + // 5) Loop body should not have any may throw instruction. + for(Loop::block_iterator bb = CurLoop->block_begin(), + be = CurLoop->block_end(); bb != be; ++bb) { + for(BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); + it != e; ++it) { + // We dont support call instructions. however, we ignore dbg intrinsic. + // If any call instruction found then simply return false. + CallInst *CI = dyn_cast(it); + if (CI && !isa(CI)) { + DEBUG(dbgs() << "loop body has a call instruction"); + return false; + } + if(it->mayThrow()){ + DEBUG(dbgs() << "may throw instruction found in loop body"); + return false; + } + // If current instruction is load instructions + // make sure its a simple load(non atomic & non volatile) + if (it->mayReadFromMemory()) { + LoadInst *Ld = dyn_cast(it); + if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << "Found a non-simple load.\n"); + return false; + } + NumLoads++; + } + // If current instruction is store instruction + // make sure its a simple store(non atomic & non volatile) + else if (it->mayWriteToMemory()) { + StoreInst *St = dyn_cast(it); + if (!St || (!St->isSimple() && !IsAnnotatedParallel)) { + DEBUG(dbgs() << "Found a non-simple store.\n"); + return false; + } + // Check store guaranteed execution, here we are interested + // in atleast one guaranteed execution store. + GuaranteedToExecution |= isGuaranteedToExecute(*St); + // Check for store to a loop invariant, again interested in + // atleast one loop invariant store. + Value* Ptr = St->getPointerOperand(); + StoreToInvar |= SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop); + NumStores++; + } + } // Next instr. + } // Next block. + + // Loop should have store instruction, if not return false. + if(!NumStores){ + DEBUG(dbgs() << "Found a read-only loop!"); + return false; + } + // PAS: Make sure atleast one store guarnteed to execute. + if(!GuaranteedToExecution){ + DEBUG(dbgs() << "Non guaranteed store execution found"); + return false; + } + // Loop should have atleast invariant store instruction. + if(!StoreToInvar){ + DEBUG(dbgs() << "Invariant store not found !!"); + return false; + } + + // Counters with initial values. + bool IsMod = 0; + bool HasMayAlias = 0; + bool TypeSafety = 0; + PointerSet->clear(); + // Memory check: + // Iterate over alias set tracker and individual alias sets. + // 1) Make sure all pointers has computable bounds, if not return false. + // 2) Make sure atleast one alias tracker should have pointers of same data type. + // 3) Make sure atlease one alias tracker pointer should modified. + // 4) Collect all unique memory/pointers in PointerSet for memCheck. + // 5) Make sure PointerSet should have atleast 2 pointers. + // 6) PointerSet should'nt have pointers more then RuntimeMemoryCheckThreshold + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); + I != E; ++I) { + AliasSet &AS = *I; + // Skip Forward Alias Sets, dont consider ignored alias sets object. + if(AS.isForwardingAliasSet()) + continue; + // Return false in case of MustAlias + if(AS.isMustAlias()) + return false; + Value *SomePtr = AS.begin()->getValue(); + bool typeCheck = 1; + + HasMayAlias |= AS.isMayAlias(); + IsMod |= AS.isMod(); + + for (auto A : AS) { + Value *Ptr = A.getValue(); + // alias tracker should have pointers of same data type. + typeCheck = (typeCheck && (SomePtr->getType() == Ptr->getType())); + // Check for repeatation of pointer. + // If the same pointer already visited then continue. + if(!isUniquePointer(Ptr)) + continue; + // Check pointer has computable bounds + // In case of non computable bounds, return false. + if(!hasComputableBounds(SE, Ptr)) { + DEBUG(dbgs() << "Non computable bound found !"); + return false; + } + // Insert memory to PointerSet + PointerSet->insert(Ptr); + // Check any memory modified in loop body. + } + // Atleast one alias tracker should have pointers of same data type. + TypeSafety |= typeCheck; + } + // PAS: Ensure types should be of same type. + if(!TypeSafety) { + DEBUG(dbgs() << "Alias tracker type safety failed!"); + return false; + } + // PAS: Ensure loop body should not be ready only. + if(!IsMod) { + DEBUG(dbgs() << "No memory modified in loop body"); + return false; + } + // PAS: Make sure alias set has may alias case. + // If there no alias memory ambiguity, return false. + if(!HasMayAlias) { + DEBUG(dbgs() << "No ambiguity in memory access."); + return false; + } + // Atleast 2 pointers needed for runtime check. + if(PointerSet->size() <= 1) { + DEBUG(dbgs() << "Didnt found sufficient pointers in loop body"); + return false; + } + // Check memory threshold, should be in limit. + if(PointerSet->size() > RuntimeMemoryCheckThreshold) { + DEBUG(dbgs() << "Loop body has pointers more then defined threshold"); + return false; + } + // Collected pointers should be in the same address space. + // If different address space pointers found then return false. + for (ValueSet::const_iterator I = PointerSet->begin(), + E = PointerSet->end(); I != E;) { + Value *PtrI = *I; + ValueSet::const_iterator J = ++I; + for (; J != E; ++J) { + Value *PtrJ = *J; + unsigned ASi = PtrI->getType()->getPointerAddressSpace(); + unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); + if (ASi != ASj) { + DEBUG(dbgs() << "Different address space pointer memcheck not possible\n"); + return false; + } + } + } + // Loop versioning is feasible, return true. + DEBUG(dbgs() << "Loop Versioning found to be beneficial"); + return true; +} + +bool LoopVersioning::isGuaranteedToExecute(Instruction &Inst) { + + // Otherwise we have to check to make sure that the instruction dominates all + // of the exit blocks. If it doesn't, then there is a path out of the loop + // which does not execute this instruction, so we can't hoist it. + + // If the instruction is in the header block for the loop (which is very + // common), it is always guaranteed to dominate the exit blocks. Since this + // is a common case, and can save some work, check it now. + if (Inst.getParent() == CurLoop->getHeader()) + return true; + + // Get the exit blocks for the current loop. + SmallVector ExitBlocks; + CurLoop->getExitBlocks(ExitBlocks); + + // Verify that the block dominates each of the exit blocks of the loop. + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!DT->dominates(Inst.getParent(), ExitBlocks[i])) + return false; + + // As a degenerate case, if the loop is statically infinite then we haven't + // proven anything since there are no exit blocks. + if (ExitBlocks.empty()) + return false; + + return true; +} + +// Here we does actual loop versioning. +// 1) Clone original loop +// 2) Create a runtime memory check. +// 3) Add both loops under runtime check results target. +// 4) Set all blocks properly. +// It transform loop as shown below +// +----------------+ +// |Runtime Memcheck| +// +----------------+ +// | +// +----------+----------------+----------+ +// | | +// +---------+----------+ +-----------+----------+ +// |Orig Loop Preheader | |Cloned Loop Preheader | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop Body | |Cloned Loop Body | +// +--------------------+ +----------------------+ +// | | +// +--------------------+ +----------------------+ +// |Orig Loop|Exit Block| |Cloned Loop Exit Block| +// +--------------------+ +-----------+----------+ +// | | +// +----------+--------------+-----------+ +// | +// +-----+----+ +// |Join Block| +// +----------+ + +Loop * LoopVersioning::versionizeLoop(LPPassManager &LPM) +{ + std::vector LoopBlocks; + std::vector NewBlocks; + SmallVector ExitBlocks; + LoopBlocks.clear(); + NewBlocks.clear(); + ExitBlocks.clear(); + + BasicBlock *LoopHeader = CurLoop->getHeader(); + BasicBlock *LoopPreheader = CurLoop->getLoopPreheader(); + Function *F = LoopHeader->getParent(); + // First step, split the preheader and exit blocks, and add these blocks to + // the LoopBlocks list. + BasicBlock *NewPreheader = SplitEdge(LoopPreheader, LoopHeader, DT, LI); + LoopBlocks.push_back(NewPreheader); + // We want the loop to come after the preheader, but before the exit blocks. + LoopBlocks.insert(LoopBlocks.end(), CurLoop->block_begin(), + CurLoop->block_end()); + + CurLoop->getUniqueExitBlocks(ExitBlocks); + // Split all of the edges from inside the loop to their exit blocks. Update + // the appropriate Phi nodes as we do so. + splitExitEdges(CurLoop, ExitBlocks); + + // The exit blocks may have been changed due to edge splitting, recompute. + ExitBlocks.clear(); + CurLoop->getUniqueExitBlocks(ExitBlocks); + + // Add exit blocks to the loop blocks. + LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); + + // Next step, clone all of the basic blocks that make up the loop (including + // the loop preheader and exit blocks), keeping track of the mapping between + // the instructions and blocks. + NewBlocks.reserve(LoopBlocks.size()); + ValueToValueMapTy VMap; + for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".loopVersion", F); + NewBlocks.push_back(NewBB); + VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + LPM.cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, CurLoop); + } + // Splice the newly inserted blocks into the function right before the + // original preheader. + F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Now we create the new Loop object for the versioned loop. + Loop *NewLoop = cloneLoop(CurLoop, CurLoop->getParentLoop(), VMap, LI, &LPM); + + Loop *ParentLoop = CurLoop->getParentLoop(); + if (ParentLoop) { + // Make sure to add the cloned preheader and exit blocks to the parent loop + // as well. + ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); + } + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *NewExit = cast(VMap[ExitBlocks[i]]); + // The new exit block should be in the same loop as the old one. + if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) + ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); + + assert(NewExit->getTerminator()->getNumSuccessors() == 1 && + "Exit block should have been split to have one successor!"); + BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); + + // If the successor of the exit block had PHI nodes, add an entry for + // NewExit. + for (BasicBlock::iterator I = ExitSucc->begin(); + PHINode *PN = dyn_cast(I); ++I) { + Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); + ValueToValueMapTy::iterator It = VMap.find(V); + if (It != VMap.end()) V = It->second; + PN->addIncoming(V, NewExit); + } + + if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { + PHINode *PN = PHINode::Create(LPad->getType(), 0, "", + ExitSucc->getFirstInsertionPt()); + for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); + I != E; ++I) { + BasicBlock *BB = *I; + LandingPadInst *LPI = BB->getLandingPadInst(); + LPI->replaceAllUsesWith(PN); + PN->addIncoming(LPI, BB); + } + } + } + // Rewrite the code to refer to itself. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) + RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + + // Rewrite the original preheader to select between versions of the loop. + BranchInst *OldBR = cast(LoopPreheader->getTerminator()); + assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && + "Preheader splitting did not work correctly!"); + // create runtime check. + RuntimeMemoryCheck RTCheck (CurLoop, SE, DL, PointerSet); + Instruction *MemRuntimeCheck; + Instruction *FirstCheckInst; + std::tie(FirstCheckInst, MemRuntimeCheck) = + RTCheck.createRuntimeCheck(LoopPreheader->getTerminator()); + + if (MemRuntimeCheck) { + BasicBlock * CheckBlock = + LoopPreheader->splitBasicBlock(MemRuntimeCheck, "LVer.memcheck"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); + BranchInst::Create(LoopBlocks[0], NewBlocks[0], MemRuntimeCheck, CheckBlock); + LPM.deleteSimpleAnalysisValue(OldBR, CurLoop); + OldBR->eraseFromParent(); + } + + return NewLoop; +} + +bool LoopVersioning::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + Changed = false; + // Get our Loop and Alias Analysis information... + LI = &getAnalysis().getLoopInfo(); + AA = &getAnalysis(); + SE = &getAnalysis(); + DT = &getAnalysis().getDomTree(); + TLI = &getAnalysis().getTLI(); + + DataLayoutPass *DLP = getAnalysisIfAvailable(); + if(!DLP) { + DEBUG(dbgs() << "DataLayout missing, loop versioning not possible !\n"); + return false; + } + DL = &DLP->getDataLayout(); + // Set Current Loop + CurLoop = L; + // Get the preheader block to move instructions into... + Preheader = L->getLoopPreheader(); + // Initial allocation + CurAST = new AliasSetTracker(*AA); + PointerSet = new ValueSet(); + UniquePointer = new MapVector; + + // Loop over the body of this loop, construct AST. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. + CurAST->add(*BB); // Incorporate the specified basic block + } + // Check feasiblity of loop versioning. + // If versioning found to be feasible and beneficial then procees + // else simply return, by cleaning up memory. + if(isVersioningBeneficial()) + { + // Do loop versioning. + // Create memcheck for unique memory accessed inside loop. + // Clone original loop, and set blocks properly. + Loop * VerLoop = versionizeLoop(LPM); + // Set metadata to both loop versions. + Function *F = L->getHeader()->getParent(); + // Get latch terminator instruction. + Instruction *I = VerLoop->getLoopLatch()->getTerminator(); + // Create alias scope domain. + MDBuilder MDB(I->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); + DenseMap NewScopes; + SmallVector Scopes, NoAliases; + std::string Name = "LVAliasScope"; + // Iterate over each instruction of loop. + // set no-alias for all load & store instructions. + for(Loop::block_iterator bb = VerLoop->block_begin(), + be = VerLoop->block_end(); bb != be; ++bb) { + for(BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); + it != e; ++it) { + // Only interested in load & stores + if(!it->mayReadFromMemory() && !it->mayWriteToMemory()) + continue; + // Create alias scope. + Instruction *I = dyn_cast(it); + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + NewScopes.insert(std::make_pair(I, NewScope)); + NoAliases.push_back(NewScope); + Scopes.push_back(NewScope); + // Set no-alias for current instruction. + I->setMetadata(LLVMContext::MD_noalias, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias), + MDNode::get(I->getContext(), NoAliases))); + // set alias-scope for current instruction. + I->setMetadata(LLVMContext::MD_alias_scope, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(I->getContext(), Scopes))); + } + } + // Recalculate dominator tree. + DT->recalculate(*F); + // Delete newly created loop from loop pass manager. + LPM.deleteLoopFromQueue(VerLoop); + Changed = true; + } + // Delete allocated memory. + delete CurAST; + delete PointerSet; + delete UniquePointer; + return Changed; +} + + +char LoopVersioning::ID = 0; +INITIALIZE_PASS_BEGIN(LoopVersioning, "do-loop-versioning", "Loop Versioning", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LoopVersioning, "do-loop-versioning", "Loop Versioning", false, false) + +Pass *llvm::createLoopVersioningPass() { return new LoopVersioning(); } +char &llvm::LoopVersioningID = LoopVersioning::ID; + Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -45,6 +45,7 @@ initializeIndVarSimplifyPass(Registry); initializeJumpThreadingPass(Registry); initializeLICMPass(Registry); + initializeLoopVersioningPass(Registry); initializeLoopDeletionPass(Registry); initializeLoopAccessAnalysisPass(Registry); initializeLoopInstSimplifyPass(Registry);