Index: lib/Transforms/Utils/LoopSimplify.cpp =================================================================== --- lib/Transforms/Utils/LoopSimplify.cpp +++ lib/Transforms/Utils/LoopSimplify.cpp @@ -232,6 +232,123 @@ return nullptr; } +static uint64_t getIntMDOperand(MDNode* MD, int operand) { + Constant *C = cast(MD->getOperand(operand))->getValue(); + return cast(C)->getSExtValue(); +} + +static bool isLoopLatch(Loop &L, BasicBlock *BB) { + assert(L.contains(BB) && "must be in loop"); + // A basic block is a latch exactly when one of it's successors is the loop + // header. + for (auto SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; SI++) + if(*SI == L.getHeader()) + return true; + return false; +} + +/// Rareness - the ratio between the expected number of times the loop header +/// executes and the expected number of times this latch block executes. +/// This is based on *profiling* information and may be wrong, but this routine +/// general attempts to compute a lower bound on this ratio - i.e. the latch is +/// at least this rare (according to the profile data.) A larger number +/// implies the latch executes less frequently. +static uint64_t computeLatchRareness(DominatorTree &DT, Loop &L, + BasicBlock *Latch) { + // The basic structure of the estimation algorithm is a walk of the + // dominating conditions which control this latch. We only consider + // dominating conditions in LoopExit and Latch blocks so that we don't need + // to worry about the other path contributing to the execution count of the + // Latch we're interested in. i.e. We know that to reach the Latch in + // question, the given edge must be traversed. We do loose some + // information to gain this simplicity. + + BasicBlock *Header = L.getHeader(); + if (Latch == Header) + return 1; + + BasicBlock *Current = DT.getNode(Latch)->getIDom()->getBlock(); + BasicBlock *Last = Latch; + assert(DT.dominates(Header, Current) && "malformed loop"); + + uint64_t Rareness = 1; + while (true) { + //How likely is the path to Latch *not* to be taken? + uint64_t Likelyhood = 1; + if (L.isLoopExiting(Current) || isLoopLatch(L, Current)) + if (BranchInst *BR = dyn_cast(Current->getTerminator())) + if (BR->isConditional() && + BR->getSuccessor(0) != BR->getSuccessor(1)) + if (MDNode *MD = BR->getMetadata(LLVMContext::MD_prof)) { + uint64_t Dest1 = getIntMDOperand(MD, 1); + uint64_t Dest2 = getIntMDOperand(MD, 2); + if (BR->getSuccessor(0) == Last) { + Likelyhood = Dest2/Dest1; + } else if (BR->getSuccessor(1) == Last) { + Likelyhood = Dest1/Dest2; + } else { + // We get no information from this branch + } + } + // If the path we're interested was the most frequent one, just count it as + // not contributing to the rareness at all (rather than zeroing the count + // entirely). + Likelyhood = std::max((uint64_t)1, Likelyhood); + // TODO: Should cap this to avoid overflow + Rareness *= Likelyhood; + + if (Current == Header) + break; + Last = Current; + Current = DT.getNode(Current)->getIDom()->getBlock(); + } + + return Rareness; +} + +// Go looking for a 'rare' latch block. We could do this via block frequency, +// but that pass is relatively expensive and not preserved. Instead, directly +// examine dominating prof metadata nodes to determine an upper bound on the +// fraction of times a backedge is taken that the loop executes. If that's +// less than 1/10 (a randomly choosen threshold), split it out as an +// independent outer loop. +static BasicBlock *findRareLatchBlock(DominatorTree &DT, Loop &L) { + SmallVector Latches; + L.getLoopLatches(Latches); + + uint64_t MostRare = 1; + BasicBlock *CandidateBB = nullptr; + for (BasicBlock *Latch : Latches) { + uint64_t Likelyhood = computeLatchRareness(DT, L, Latch); + // TODO: If this is a conditional latch, we're actually interested in the + // rareness of the *backedge*. + if (Likelyhood > MostRare) + CandidateBB = Latch; + MostRare = std::max(MostRare, Likelyhood); + + errs() << Latch->getName() << ": " << Likelyhood << "\n"; + } + + // If each latch was executed equal often, each latch would have a rareness + // which is equal and #Latch*(1/R) == 1 + const uint64_t ExpectedAvgRareness = Latches.size(); + + // If we found a disporpotionately rare backedge, we return it's Latch as + // being the one we one to split into an outer loop. The reason we're + // checking relative frequency is to avoid loops with equally disributed + // latch probabilies where each latch is rare, but no one latch is unusually + // rare. The threshold of '8' is chosen such that a single __builtin_expect + // is enough to indicate a latch is rare in a two latch loop with no other + // information. + if (MostRare >= ExpectedAvgRareness * 8) { + assert(CandidateBB); + return CandidateBB; + } + + return nullptr; +} + /// \brief If this loop has multiple backedges, try to pull one of them out into /// a nested loop. /// @@ -259,26 +376,48 @@ if (!Preheader) return nullptr; + BasicBlock *Header = L->getHeader(); + // The header is not a landing pad; preheader insertion should ensure this. - assert(!L->getHeader()->isLandingPad() && + assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); - PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AT); - if (!PN) return nullptr; // No known way to partition. - - // Pull out all predecessors that have varying values in the loop. This - // handles the case when a PHI node has multiple instances of itself as - // arguments. SmallVector OuterLoopPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) { - // We can't split indirectbr edges. - if (isa(PN->getIncomingBlock(i)->getTerminator())) - return nullptr; - OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + if (PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AT)) { + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirectbr edges. + if (isa(PN->getIncomingBlock(i)->getTerminator())) + return nullptr; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } } - } + } else if (BasicBlock *RareLatch = findRareLatchBlock(*DT, *L)) { + // If we can find an unusually rarely taken latch, pull that out to it's + // own loop so that we can optimize the inner loop without worrying about + // rarely executed code that dominates the rare latch. + // Gather the list of predeccessors we want to be outside the new inner + // loop, this *must* include all the blocks outside the current loop, plus + // any predeccessors in L we want to become exits from the inner loop. + OuterLoopPreds.push_back(RareLatch); + for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); + PI!=E; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { + // We can't split indirectbr edges, but a preheader should never end + // with one. + if (isa(P->getTerminator())) + return nullptr; + OuterLoopPreds.push_back(P); + } + } + } else + return nullptr; // No known way to partition. + DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in @@ -287,7 +426,6 @@ if (SE) SE->forgetLoop(L); - BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP);