Index: llvm/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -38,6 +38,8 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -45,6 +47,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -1880,6 +1883,68 @@ } } +struct RewriteInfo { + DenseMap Defines; + SmallPtrSet Uses; + Instruction *I; + RewriteInfo(Instruction *OrigI) : I(OrigI) {} +}; + +static void +ComputeLiveInBlocks(const SmallPtrSetImpl &UsingBlocks, + const SmallPtrSetImpl &DefBlocks, + SmallPtrSetImpl &LiveInBlocks) { + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector LiveInBlockWorklist(UsingBlocks.begin(), + UsingBlocks.end()); + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (BasicBlock *P : predecessors(BB)) { + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +// Compute value at the given block BB. We either should already know it, or we +// should be able to recursively reach it going up dominator tree. +static Value *computeValueAt(BasicBlock *BB, + DenseMap &KnownValues, + DominatorTree *DT) { + if (!KnownValues.count(BB)) { + BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock(); + KnownValues[BB] = computeValueAt(IDom, KnownValues, DT); + } + return KnownValues[BB]; +} + +static BasicBlock *getUserBB(Use *U) { + Instruction *User = cast(U->getUser()); + + if (PHINode *UserPN = dyn_cast(User)) + return UserPN->getIncomingBlock(*U); + else + return User->getParent(); +} + /// ThreadEdge - We have decided that it is safe and profitable to factor the /// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB /// across BB. Transform the IR to reflect this change. @@ -1988,9 +2053,13 @@ // now have to update all uses of the value to use either the original value, // the cloned value, or some PHI derived value. This can require arbitrary // PHI insertion, of which we are prepared to do, clean these up now. - SSAUpdater SSAUpdate; SmallVector UsesToRename; + + SmallVector Rewrites; for (Instruction &I : *BB) { + RewriteInfo RewritesForI(&I); + UsesToRename.clear(); + // Scan all uses of this instruction to see if it is used outside of its // block, and if so, record them in UsesToRename. for (Use &U : I.uses()) { @@ -2002,24 +2071,18 @@ continue; UsesToRename.push_back(&U); + RewritesForI.Uses.insert(&U); } // If there are no uses outside the block, we're done with this instruction. if (UsesToRename.empty()) continue; - DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n"); - - // We found a use of I outside of BB. Rename all uses of I that are outside - // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks - // with the two values we know. - SSAUpdate.Initialize(I.getType(), I.getName()); - SSAUpdate.AddAvailableValue(BB, &I); - SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]); - - while (!UsesToRename.empty()) - SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); - DEBUG(dbgs() << "\n"); + // We found a use of I outside of BB - ee need to rename all uses of I that + // are outside its block to be uses of the appropriate PHI node etc. + RewritesForI.Defines[BB] = &I; + RewritesForI.Defines[NewBB] = ValueMapping[&I]; + Rewrites.push_back(RewritesForI); } // Ok, NewBB is good to go. Update the terminator of PredBB to jump to @@ -2036,6 +2099,94 @@ {DominatorTree::Insert, PredBB, NewBB}, {DominatorTree::Delete, PredBB, BB}}); + // Now we need to update SSA. + // We will use the classic algorithm for placing PHI-nodes: "Efficiently + // Computing Static Single Assignment Form and the Control Dependence Graph" + // by Cytron et al. + // + // All the uses we need to rewrite along with the information about available + // definitions are stored in Rewrites. In a general case, to properly insert + // all necessary PHI-nodes, we need to compute IDF for iterated joins of all + // definitions for each instruction - the blocks in IDF will be the blocks + // where PHI-nodes need to be inserted. + // + // In the case of JumpThreading, all definitions for all instructions are + // located in just two basic blocks: BB and NewBB. We can exploit this + // information to avoid computing IDF for each instruction and compute it only + // once. UsingBlocks, which are used to limit the part of CFG for which IDF is + // computed, will be a union of UsingBlocks for all instructions. As a side + // effect of this, we might insert PHI-nodes without actual uses, but we can + // quickly wipe them out in the end. + DominatorTree *DT = &DDT->flush(); + + ForwardIDFCalculator IDF(*DT); + PredIteratorCache PredCache; + + // DefBlocks contain only two blocks: BB and its clone NewBB. + SmallPtrSet DefBlocks; + DefBlocks.insert(BB); + DefBlocks.insert(NewBB); + IDF.setDefiningBlocks(DefBlocks); + + // UsingBlocks contain all blocks with uses of instructions we are rewriting. + SmallPtrSet UsingBlocks; + for (auto R : Rewrites) { + for (auto U : R.Uses) + UsingBlocks.insert(getUserBB(U)); + } + + // Compute iterated dominance frontier, IDF. + SmallVector IDFBlocks; + SmallPtrSet LiveInBlocks; + ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks); + IDF.resetLiveInBlocks(); + IDF.setLiveInBlocks(LiveInBlocks); + IDF.calculate(IDFBlocks); + + // Now, go through all instructions and perform actual IR updates. + for (auto R : Rewrites) { + DenseMap KnownValues; + + DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *R.I << "\n"); + + // Fill in known values for the initial blocks. + KnownValues[BB] = R.I; + KnownValues[NewBB] = ValueMapping[R.I]; + + // Insert PHIs into blocks from IDF and record them as known values. + SmallVector InsertedPHIs; + for (auto FrontierBB : IDFBlocks) { + IRBuilder<> B(FrontierBB, FrontierBB->begin()); + PHINode *PN = B.CreatePHI(R.I->getType(), 0, R.I->getName()); + KnownValues[FrontierBB] = PN; + InsertedPHIs.push_back(PN); + } + + // Fill in arguments of the inserted PHIs. + for (auto PN : InsertedPHIs) { + BasicBlock *PBB = PN->getParent(); + for (BasicBlock *Pred : PredCache.get(PBB)) + PN->addIncoming(computeValueAt(Pred, KnownValues, DT), Pred); + } + + // Rewrite actual uses with the inserted definitions. + for (auto U : R.Uses) { + Value *V = computeValueAt(getUserBB(U), KnownValues, DT); + Value *OldVal = U->get(); + // Notify that users of the existing value that it is being replaced. + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + U->set(V); + } + + // Clean up unused PHIs. + for (auto PN : InsertedPHIs) { + if (PN->use_empty()) + PN->eraseFromParent(); + } + } + // SSA Update is finished! + // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation.