diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -143,7 +143,7 @@ /// recursively. Return true if any instructions were deleted. bool RecursivelyDeleteTriviallyDeadInstructions( Value *V, const TargetLibraryInfo *TLI = nullptr, - MemorySSAUpdater *MSSAU = nullptr); + MemorySSAUpdater *MSSAU = nullptr, AssumptionCache *AC = nullptr); /// Delete all of the instructions in `DeadInsts`, and all other instructions /// that deleting these in turn causes to be trivially dead. @@ -155,7 +155,8 @@ /// empty afterward. void RecursivelyDeleteTriviallyDeadInstructions( SmallVectorImpl &DeadInsts, - const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr); + const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr, + AssumptionCache *AC = nullptr); /// Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow /// instructions that are not trivially dead. These will be ignored. @@ -163,7 +164,8 @@ /// were found and deleted. bool RecursivelyDeleteTriviallyDeadInstructionsPermissive( SmallVectorImpl &DeadInsts, - const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr); + const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr, + AssumptionCache *AC = nullptr); /// If the specified value is an effectively dead PHI node, due to being a /// def-use chain of single-use nodes that either forms a cycle or is terminated @@ -242,7 +244,8 @@ /// branches to us and one of our successors, fold the setcc into the /// predecessor and use logical operations to pick the right destination. bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr, - unsigned BonusInstThreshold = 1); + unsigned BonusInstThreshold = 1, + AssumptionCache *AC = nullptr); /// This function takes a virtual register computed by an Instruction and /// replaces it with a slot in the stack frame, allocated via alloca. diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -7987,7 +7987,7 @@ << ToBeDeletedInsts.size() << " instructions and " << ToBeChangedUses.size() << " uses\n"); - SmallVector DeadInsts; + SmallVector DeadInsts; SmallVector TerminatorsToFold; for (auto &It : ToBeChangedUses) { @@ -8072,7 +8072,11 @@ } } - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + for (Instruction *I : DeadInsts) { + AssumptionCache *AC = + InfoCache.AG.getAnalysis(*I->getFunction()); + RecursivelyDeleteTriviallyDeadInstructions(I, nullptr, nullptr, AC); + } if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) { SmallVector ToBeDeletedBBs; diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp --- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -66,7 +66,8 @@ } } } - RecursivelyDeleteTriviallyDeadInstructions(DeadInstsInBB, SQ.TLI); + RecursivelyDeleteTriviallyDeadInstructions(DeadInstsInBB, SQ.TLI, nullptr, + SQ.AC); } // Place the list of instructions to simplify on the next loop iteration diff --git a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp --- a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -152,7 +152,7 @@ // iteration over all instructions in all the loop blocks. if (!DeadInsts.empty()) { Changed = true; - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, &TLI, MSSAU, &AC); } if (MSSAU && VerifyMemorySSA) diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp --- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -243,7 +243,7 @@ WeakVH NewIExist = NewI; // If SeenExprs/NewIExist contains I's WeakTrackingVH/WeakVH, that // entry will be replaced with nullptr if deleted. - RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI); + RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI, nullptr, AC); if (!NewIExist) { // Rare occation where the new instruction (NewI) have been removed, // probably due to parts of the input code was dead from the diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp --- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp +++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp @@ -134,8 +134,11 @@ auto addAttrList = [&](AttributeList AttrList) { for (unsigned Idx = AttributeList::FirstArgIndex; Idx < AttrList.getNumAttrSets(); Idx++) - for (Attribute Attr : AttrList.getAttributes(Idx)) + for (Attribute Attr : AttrList.getAttributes(Idx)) { + if (!Call->getArgOperand(Idx - 1)) + continue; addAttribute(Attr, Call->getArgOperand(Idx - 1)); + } for (Attribute Attr : AttrList.getFnAttributes()) addAttribute(Attr, nullptr); }; @@ -198,9 +201,18 @@ addAttr(Attribute::Alignment, Pointer, MA.valueOrOne().value()); } + bool instructionHasNullOperand(Instruction *I) { + for (Value *V : I->operand_values()) + if (!V) + return true; + return false; + } + void addInstruction(Instruction *I) { if (auto *Call = dyn_cast(I)) return addCall(Call); + if (instructionHasNullOperand(I)) + return; if (auto *Load = dyn_cast(I)) return addAccessedPtr(I, Load->getPointerOperand(), Load->getType(), Load->getAlign()); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -78,6 +78,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include #include #include @@ -441,21 +442,22 @@ /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. bool llvm::RecursivelyDeleteTriviallyDeadInstructions( - Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) { + Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU, + AssumptionCache *AC) { Instruction *I = dyn_cast(V); if (!I || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector DeadInsts; DeadInsts.push_back(I); - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, AC); return true; } bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( SmallVectorImpl &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, AssumptionCache *AC) { unsigned S = 0, E = DeadInsts.size(), Alive = 0; for (; S != E; ++S) { auto *I = cast(DeadInsts[S]); @@ -466,13 +468,13 @@ } if (Alive == E) return false; - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU); + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU, AC); return true; } void llvm::RecursivelyDeleteTriviallyDeadInstructions( SmallVectorImpl &DeadInsts, const TargetLibraryInfo *TLI, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, AssumptionCache *AC) { // Process the dead instruction list until empty. while (!DeadInsts.empty()) { Value *V = DeadInsts.pop_back_val(); @@ -485,6 +487,7 @@ // Don't lose the debug info while deleting the instructions. salvageDebugInfo(*I); + salvageKnowledge(I, AC); // Null out all of the instruction's operands to see if any operand becomes // dead as we go. @@ -571,6 +574,7 @@ const TargetLibraryInfo *TLI) { if (isInstructionTriviallyDead(I, TLI)) { salvageDebugInfo(*I); + salvageKnowledge(I); // Null out all of the instruction's operands to see if any operand becomes // dead as we go. diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -215,7 +215,7 @@ while (!DeadInsts.empty()) { Value *V = DeadInsts.pop_back_val(); if (Instruction *Inst = dyn_cast_or_null(V)) - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, nullptr, nullptr, AC); } } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -203,10 +203,23 @@ bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); + bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder); bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); + bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI); + bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI); + bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, uint32_t FalseWeight); + bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL); + bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select); + bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI); + bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder); + public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, SmallPtrSetImpl *LoopHeaders, @@ -677,10 +690,8 @@ } }; -} // end anonymous namespace - -static void EraseTerminatorAndDCECond(Instruction *TI, - MemorySSAUpdater *MSSAU = nullptr) { +void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU, + AssumptionCache *AC) { Instruction *Cond = nullptr; if (SwitchInst *SI = dyn_cast(TI)) { Cond = dyn_cast(SI->getCondition()); @@ -864,7 +875,7 @@ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); - EraseTerminatorAndDCECond(TI); + EraseTerminatorAndDCECond(TI, nullptr, Options.AC); return true; } @@ -929,12 +940,10 @@ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); - EraseTerminatorAndDCECond(TI); + EraseTerminatorAndDCECond(TI, nullptr, Options.AC); return true; } -namespace { - /// This class implements a stable ordering of constant /// integers that does not depend on their address. This is important for /// applications that sort ConstantInt's to ensure uniqueness. @@ -1190,7 +1199,7 @@ setBranchWeights(NewSI, MDWeights); } - EraseTerminatorAndDCECond(PTI); + EraseTerminatorAndDCECond(PTI, nullptr, Options.AC); // Okay, last check. If BB is still a successor of PSI, then we must // have an infinite loop case. If so, add an infinitely looping block @@ -1236,8 +1245,8 @@ /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function /// guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI, - const TargetTransformInfo &TTI) { +bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, + const TargetTransformInfo &TTI) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1427,7 +1436,7 @@ for (BasicBlock *Succ : successors(BB1)) AddPredecessorToBlock(Succ, BIParent, BB1); - EraseTerminatorAndDCECond(BI); + EraseTerminatorAndDCECond(BI, nullptr, Options.AC); return true; } @@ -1970,8 +1979,8 @@ /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const TargetTransformInfo &TTI) { +bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -2463,8 +2472,8 @@ /// If we found a conditional branch that goes to two returning blocks, /// try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, - IRBuilder<> &Builder) { +bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, + IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); @@ -2487,7 +2496,7 @@ TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); Builder.CreateRetVoid(); - EraseTerminatorAndDCECond(BI); + EraseTerminatorAndDCECond(BI, nullptr, Options.AC); return true; } @@ -2543,7 +2552,7 @@ << "\n " << *BI << "\nNewRet = " << *RI << "\nTRUEBLOCK: " << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc); - EraseTerminatorAndDCECond(BI); + EraseTerminatorAndDCECond(BI, nullptr, Options.AC); return true; } @@ -2588,495 +2597,188 @@ } } -/// If this basic block is simple enough, and if a predecessor branches to us -/// and one of our successors, fold the block into the predecessor and use -/// logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, - unsigned BonusInstThreshold) { - BasicBlock *BB = BI->getParent(); +// If there is only one store in BB1 and BB2, return it, otherwise return +// nullptr. +static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { + StoreInst *S = nullptr; + for (auto *BB : {BB1, BB2}) { + if (!BB) + continue; + for (auto &I : *BB) + if (auto *SI = dyn_cast(&I)) { + if (S) + // Multiple stores seen. + return nullptr; + else + S = SI; + } + } + return S; +} - const unsigned PredCount = pred_size(BB); +static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, + Value *AlternativeV = nullptr) { + // PHI is going to be a PHI node that allows the value V that is defined in + // BB to be referenced in BB's only successor. + // + // If AlternativeV is nullptr, the only value we care about in PHI is V. It + // doesn't matter to us what the other operand is (it'll never get used). We + // could just create a new PHI with an undef incoming value, but that could + // increase register pressure if EarlyCSE/InstCombine can't fold it with some + // other PHI. So here we directly look for some PHI in BB's successor with V + // as an incoming operand. If we find one, we use it, else we create a new + // one. + // + // If AlternativeV is not nullptr, we care about both incoming values in PHI. + // PHI must be exactly: phi [ %BB, %V ], [ %OtherBB, %AlternativeV] + // where OtherBB is the single other predecessor of BB's only successor. + PHINode *PHI = nullptr; + BasicBlock *Succ = BB->getSingleSuccessor(); - Instruction *Cond = nullptr; - if (BI->isConditional()) - Cond = dyn_cast(BI->getCondition()); - else { - // For unconditional branch, check for a simple CFG pattern, where - // BB has a single predecessor and BB's successor is also its predecessor's - // successor. If such pattern exists, check for CSE between BB and its - // predecessor. - if (BasicBlock *PB = BB->getSinglePredecessor()) - if (BranchInst *PBI = dyn_cast(PB->getTerminator())) - if (PBI->isConditional() && - (BI->getSuccessor(0) == PBI->getSuccessor(0) || - BI->getSuccessor(0) == PBI->getSuccessor(1))) { - for (auto I = BB->instructionsWithoutDebug().begin(), - E = BB->instructionsWithoutDebug().end(); - I != E;) { - Instruction *Curr = &*I++; - if (isa(Curr)) { - Cond = Curr; - break; - } - // Quit if we can't remove this instruction. - if (!tryCSEWithPredecessor(Curr, PB)) - return false; - } - } + for (auto I = Succ->begin(); isa(I); ++I) + if (cast(I)->getIncomingValueForBlock(BB) == V) { + PHI = cast(I); + if (!AlternativeV) + break; - if (!Cond) - return false; - } + assert(Succ->hasNPredecessors(2)); + auto PredI = pred_begin(Succ); + BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; + if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) + break; + PHI = nullptr; + } + if (PHI) + return PHI; - if (!Cond || (!isa(Cond) && !isa(Cond)) || - Cond->getParent() != BB || !Cond->hasOneUse()) - return false; + // If V is not an instruction defined in BB, just return it. + if (!AlternativeV && + (!isa(V) || cast(V)->getParent() != BB)) + return V; - // Make sure the instruction after the condition is the cond branch. - BasicBlock::iterator CondIt = ++Cond->getIterator(); + PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); + PHI->addIncoming(V, BB); + for (BasicBlock *PredBB : predecessors(Succ)) + if (PredBB != BB) + PHI->addIncoming( + AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB); + return PHI; +} - // Ignore dbg intrinsics. - while (isa(CondIt)) - ++CondIt; +static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, + BasicBlock *QTB, BasicBlock *QFB, + BasicBlock *PostBB, Value *Address, + bool InvertPCond, bool InvertQCond, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + // For every pointer, there must be exactly two stores, one coming from + // PTB or PFB, and the other from QTB or QFB. We don't support more than one + // store (to any address) in PTB,PFB or QTB,QFB. + // FIXME: We could relax this restriction with a bit more work and performance + // testing. + StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); + StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); + if (!PStore || !QStore) + return false; - if (&*CondIt != BI) + // Now check the stores are compatible. + if (!QStore->isUnordered() || !PStore->isUnordered()) return false; - // Only allow this transformation if computing the condition doesn't involve - // too many instructions and these involved instructions can be executed - // unconditionally. We denote all involved instructions except the condition - // as "bonus instructions", and only allow this transformation when the - // number of the bonus instructions we'll need to create when cloning into - // each predecessor does not exceed a certain threshold. - unsigned NumBonusInsts = 0; - for (auto I = BB->begin(); Cond != &*I; ++I) { - // Ignore dbg intrinsics. - if (isa(I)) - continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) + // Check that sinking the store won't cause program behavior changes. Sinking + // the store out of the Q blocks won't change any behavior as we're sinking + // from a block to its unconditional successor. But we're moving a store from + // the P blocks down through the middle block (QBI) and past both QFB and QTB. + // So we need to check that there are no aliasing loads or stores in + // QBI, QTB and QFB. We also need to check there are no conflicting memory + // operations between PStore and the end of its parent block. + // + // The ideal way to do this is to query AliasAnalysis, but we don't + // preserve AA currently so that is dangerous. Be super safe and just + // check there are no other memory operations at all. + for (auto &I : *QFB->getSinglePredecessor()) + if (I.mayReadOrWriteMemory()) return false; - // I has only one use and can be executed unconditionally. - Instruction *User = dyn_cast(I->user_back()); - if (User == nullptr || User->getParent() != BB) + for (auto &I : *QFB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + if (QTB) + for (auto &I : *QTB) + if (&I != QStore && I.mayReadOrWriteMemory()) + return false; + for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); + I != E; ++I) + if (&*I != PStore && I->mayReadOrWriteMemory()) return false; - // I is used in the same BB. Since BI uses Cond and doesn't have more slots - // to use any other instruction, User must be an instruction between next(I) - // and Cond. - // Account for the cost of duplicating this instruction into each - // predecessor. - NumBonusInsts += PredCount; - // Early exits once we reach the limit. - if (NumBonusInsts > BonusInstThreshold) + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef FreeStores) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + for (auto &I : BB->instructionsWithoutDebug()) { + // Consider terminator instruction to be free. + if (I.isTerminator()) + continue; + // If this is one the stores that we want to speculate out of this BB, + // then don't count it's cost, consider it to be free. + if (auto *S = dyn_cast(&I)) + if (llvm::find(FreeStores, S)) + continue; + // Else, we have a white-list of instructions that we are ak speculating. + if (!isa(I) && !isa(I)) + return false; // Not in white-list - not worthwhile folding. + // And finally, if this is a non-free instruction that we are okay + // speculating, ensure that we consider the speculation budget. + BudgetRemaining -= TTI.getUserCost(&I); + if (BudgetRemaining < 0) + return false; // Eagerly refuse to fold as soon as we're out of budget. + } + assert(BudgetRemaining >= 0 && + "When we run out of budget we will eagerly return from within the " + "per-instruction loop."); + return true; + }; + + const SmallVector FreeStores = {PStore, QStore}; + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || + !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) + return false; + + // If PostBB has more than two predecessors, we need to split it so we can + // sink the store. + if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { + // We know that QFB's only successor is PostBB. And QFB has a single + // predecessor. If QTB exists, then its only successor is also PostBB. + // If QTB does not exist, then QFB's only predecessor has a conditional + // branch to QFB and PostBB. + BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); + BasicBlock *NewBB = + SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split"); + if (!NewBB) return false; + PostBB = NewBB; } - // Cond is known to be a compare or binary operator. Check to make sure that - // neither operand is a potentially-trapping constant expression. - if (ConstantExpr *CE = dyn_cast(Cond->getOperand(0))) - if (CE->canTrap()) - return false; - if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) - if (CE->canTrap()) - return false; + // OK, we're going to sink the stores to PostBB. The store has to be + // conditional though, so first create the predicate. + Value *PCond = cast(PFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); + Value *QCond = cast(QFB->getSinglePredecessor()->getTerminator()) + ->getCondition(); - // Finally, don't infinitely unroll conditional loops. - BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; - if (TrueDest == BB || FalseDest == BB) - return false; + Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), + PStore->getParent()); + Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), + QStore->getParent(), PPHI); - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *PredBlock = *PI; - BranchInst *PBI = dyn_cast(PredBlock->getTerminator()); - - // Check that we have two conditional branches. If there is a PHI node in - // the common successor, verify that the same value flows in from both - // blocks. - SmallVector PHIs; - if (!PBI || PBI->isUnconditional() || - (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || - (!BI->isConditional() && - !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) - continue; - - // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; - bool InvertPredCond = false; - - if (BI->isConditional()) { - if (PBI->getSuccessor(0) == TrueDest) { - Opc = Instruction::Or; - } else if (PBI->getSuccessor(1) == FalseDest) { - Opc = Instruction::And; - } else if (PBI->getSuccessor(0) == FalseDest) { - Opc = Instruction::And; - InvertPredCond = true; - } else if (PBI->getSuccessor(1) == TrueDest) { - Opc = Instruction::Or; - InvertPredCond = true; - } else { - continue; - } - } else { - if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) - continue; - } - - LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); - - // If we need to invert the condition in the pred block to match, do so now. - if (InvertPredCond) { - Value *NewCond = PBI->getCondition(); - - if (NewCond->hasOneUse() && isa(NewCond)) { - CmpInst *CI = cast(NewCond); - CI->setPredicate(CI->getInversePredicate()); - } else { - NewCond = - Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); - } - - PBI->setCondition(NewCond); - PBI->swapSuccessors(); - } - - // If we have bonus instructions, clone them into the predecessor block. - // Note that there may be multiple predecessor blocks, so we cannot move - // bonus instructions to a predecessor block. - ValueToValueMapTy VMap; // maps original values to cloned values - // We already make sure Cond is the last instruction before BI. Therefore, - // all instructions before Cond other than DbgInfoIntrinsic are bonus - // instructions. - for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { - if (isa(BonusInst)) - continue; - Instruction *NewBonusInst = BonusInst->clone(); - RemapInstruction(NewBonusInst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - VMap[&*BonusInst] = NewBonusInst; - - // If we moved a load, we cannot any longer claim any knowledge about - // its potential value. The previous information might have been valid - // only given the branch precondition. - // For an analogous reason, we must also drop all the metadata whose - // semantics we don't understand. - NewBonusInst->dropUnknownNonDebugMetadata(); - - PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); - NewBonusInst->takeName(&*BonusInst); - BonusInst->setName(BonusInst->getName() + ".old"); - } - - // Clone Cond into the predecessor basic block, and or/and the - // two conditions together. - Instruction *CondInPred = Cond->clone(); - RemapInstruction(CondInPred, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - PredBlock->getInstList().insert(PBI->getIterator(), CondInPred); - CondInPred->takeName(Cond); - Cond->setName(CondInPred->getName() + ".old"); - - if (BI->isConditional()) { - Instruction *NewCond = cast( - Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond")); - PBI->setCondition(NewCond); - - uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; - bool HasWeights = - extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, - SuccTrueWeight, SuccFalseWeight); - SmallVector NewWeights; - - if (PBI->getSuccessor(0) == BB) { - if (HasWeights) { - // PBI: br i1 %x, BB, FalseDest - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * TotalWeight for BI + - // TrueWeight for PBI * FalseWeight for BI. - // We assume that total weights of a BranchInst can fit into 32 bits. - // Therefore, we will not have overflow using 64-bit arithmetic. - NewWeights.push_back(PredFalseWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredTrueWeight * SuccFalseWeight); - } - AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(0, TrueDest); - } - if (PBI->getSuccessor(1) == BB) { - if (HasWeights) { - // PBI: br i1 %x, TrueDest, BB - // BI: br i1 %y, TrueDest, FalseDest - // TrueWeight is TrueWeight for PBI * TotalWeight for BI + - // FalseWeight for PBI * TrueWeight for BI. - NewWeights.push_back(PredTrueWeight * - (SuccFalseWeight + SuccTrueWeight) + - PredFalseWeight * SuccTrueWeight); - // FalseWeight is FalseWeight for PBI * FalseWeight for BI. - NewWeights.push_back(PredFalseWeight * SuccFalseWeight); - } - AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU); - PBI->setSuccessor(1, FalseDest); - } - if (NewWeights.size() == 2) { - // Halve the weights if any of them cannot fit in an uint32_t - FitWeights(NewWeights); - - SmallVector MDWeights(NewWeights.begin(), - NewWeights.end()); - setBranchWeights(PBI, MDWeights[0], MDWeights[1]); - } else - PBI->setMetadata(LLVMContext::MD_prof, nullptr); - } else { - // Update PHI nodes in the common successors. - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - ConstantInt *PBI_C = cast( - PHIs[i]->getIncomingValueForBlock(PBI->getParent())); - assert(PBI_C->getType()->isIntegerTy(1)); - Instruction *MergedCond = nullptr; - if (PBI->getSuccessor(0) == TrueDest) { - // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) - // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) - // is false: !PBI_Cond and BI_Value - Instruction *NotCond = cast( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast( - Builder.CreateBinOp(Instruction::And, NotCond, CondInPred, - "and.cond")); - if (PBI_C->isOne()) - MergedCond = cast(Builder.CreateBinOp( - Instruction::Or, PBI->getCondition(), MergedCond, "or.cond")); - } else { - // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) - // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) - // is false: PBI_Cond and BI_Value - MergedCond = cast(Builder.CreateBinOp( - Instruction::And, PBI->getCondition(), CondInPred, "and.cond")); - if (PBI_C->isOne()) { - Instruction *NotCond = cast( - Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = cast(Builder.CreateBinOp( - Instruction::Or, NotCond, MergedCond, "or.cond")); - } - } - // Update PHI Node. - PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond); - } - - // PBI is changed to branch to TrueDest below. Remove itself from - // potential phis from all other successors. - if (MSSAU) - MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest); - - // Change PBI from Conditional to Unconditional. - BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); - EraseTerminatorAndDCECond(PBI, MSSAU); - PBI = New_PBI; - } - - // If BI was a loop latch, it may have had associated loop metadata. - // We need to copy it to the new latch, that is, PBI. - if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) - PBI->setMetadata(LLVMContext::MD_loop, LoopMD); - - // TODO: If BB is reachable from all paths through PredBlock, then we - // could replace PBI's branch probabilities with BI's. - - // Copy any debug value intrinsics into the end of PredBlock. - for (Instruction &I : *BB) - if (isa(I)) - I.clone()->insertBefore(PBI); - - return true; - } - return false; -} - -// If there is only one store in BB1 and BB2, return it, otherwise return -// nullptr. -static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { - StoreInst *S = nullptr; - for (auto *BB : {BB1, BB2}) { - if (!BB) - continue; - for (auto &I : *BB) - if (auto *SI = dyn_cast(&I)) { - if (S) - // Multiple stores seen. - return nullptr; - else - S = SI; - } - } - return S; -} - -static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, - Value *AlternativeV = nullptr) { - // PHI is going to be a PHI node that allows the value V that is defined in - // BB to be referenced in BB's only successor. - // - // If AlternativeV is nullptr, the only value we care about in PHI is V. It - // doesn't matter to us what the other operand is (it'll never get used). We - // could just create a new PHI with an undef incoming value, but that could - // increase register pressure if EarlyCSE/InstCombine can't fold it with some - // other PHI. So here we directly look for some PHI in BB's successor with V - // as an incoming operand. If we find one, we use it, else we create a new - // one. - // - // If AlternativeV is not nullptr, we care about both incoming values in PHI. - // PHI must be exactly: phi [ %BB, %V ], [ %OtherBB, %AlternativeV] - // where OtherBB is the single other predecessor of BB's only successor. - PHINode *PHI = nullptr; - BasicBlock *Succ = BB->getSingleSuccessor(); - - for (auto I = Succ->begin(); isa(I); ++I) - if (cast(I)->getIncomingValueForBlock(BB) == V) { - PHI = cast(I); - if (!AlternativeV) - break; - - assert(Succ->hasNPredecessors(2)); - auto PredI = pred_begin(Succ); - BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; - if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) - break; - PHI = nullptr; - } - if (PHI) - return PHI; - - // If V is not an instruction defined in BB, just return it. - if (!AlternativeV && - (!isa(V) || cast(V)->getParent() != BB)) - return V; - - PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); - PHI->addIncoming(V, BB); - for (BasicBlock *PredBB : predecessors(Succ)) - if (PredBB != BB) - PHI->addIncoming( - AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB); - return PHI; -} - -static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, - BasicBlock *QTB, BasicBlock *QFB, - BasicBlock *PostBB, Value *Address, - bool InvertPCond, bool InvertQCond, - const DataLayout &DL, - const TargetTransformInfo &TTI) { - // For every pointer, there must be exactly two stores, one coming from - // PTB or PFB, and the other from QTB or QFB. We don't support more than one - // store (to any address) in PTB,PFB or QTB,QFB. - // FIXME: We could relax this restriction with a bit more work and performance - // testing. - StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); - StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); - if (!PStore || !QStore) - return false; - - // Now check the stores are compatible. - if (!QStore->isUnordered() || !PStore->isUnordered()) - return false; - - // Check that sinking the store won't cause program behavior changes. Sinking - // the store out of the Q blocks won't change any behavior as we're sinking - // from a block to its unconditional successor. But we're moving a store from - // the P blocks down through the middle block (QBI) and past both QFB and QTB. - // So we need to check that there are no aliasing loads or stores in - // QBI, QTB and QFB. We also need to check there are no conflicting memory - // operations between PStore and the end of its parent block. - // - // The ideal way to do this is to query AliasAnalysis, but we don't - // preserve AA currently so that is dangerous. Be super safe and just - // check there are no other memory operations at all. - for (auto &I : *QFB->getSinglePredecessor()) - if (I.mayReadOrWriteMemory()) - return false; - for (auto &I : *QFB) - if (&I != QStore && I.mayReadOrWriteMemory()) - return false; - if (QTB) - for (auto &I : *QTB) - if (&I != QStore && I.mayReadOrWriteMemory()) - return false; - for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); - I != E; ++I) - if (&*I != PStore && I->mayReadOrWriteMemory()) - return false; - - // If we're not in aggressive mode, we only optimize if we have some - // confidence that by optimizing we'll allow P and/or Q to be if-converted. - auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef FreeStores) { - if (!BB) - return true; - // Heuristic: if the block can be if-converted/phi-folded and the - // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to - // thread this store. - int BudgetRemaining = - PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; - for (auto &I : BB->instructionsWithoutDebug()) { - // Consider terminator instruction to be free. - if (I.isTerminator()) - continue; - // If this is one the stores that we want to speculate out of this BB, - // then don't count it's cost, consider it to be free. - if (auto *S = dyn_cast(&I)) - if (llvm::find(FreeStores, S)) - continue; - // Else, we have a white-list of instructions that we are ak speculating. - if (!isa(I) && !isa(I)) - return false; // Not in white-list - not worthwhile folding. - // And finally, if this is a non-free instruction that we are okay - // speculating, ensure that we consider the speculation budget. - BudgetRemaining -= TTI.getUserCost(&I); - if (BudgetRemaining < 0) - return false; // Eagerly refuse to fold as soon as we're out of budget. - } - assert(BudgetRemaining >= 0 && - "When we run out of budget we will eagerly return from within the " - "per-instruction loop."); - return true; - }; - - const SmallVector FreeStores = {PStore, QStore}; - if (!MergeCondStoresAggressively && - (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || - !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) - return false; - - // If PostBB has more than two predecessors, we need to split it so we can - // sink the store. - if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { - // We know that QFB's only successor is PostBB. And QFB has a single - // predecessor. If QTB exists, then its only successor is also PostBB. - // If QTB does not exist, then QFB's only predecessor has a conditional - // branch to QFB and PostBB. - BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); - BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred}, - "condstore.split"); - if (!NewBB) - return false; - PostBB = NewBB; - } - - // OK, we're going to sink the stores to PostBB. The store has to be - // conditional though, so first create the predicate. - Value *PCond = cast(PFB->getSinglePredecessor()->getTerminator()) - ->getCondition(); - Value *QCond = cast(QFB->getSinglePredecessor()->getTerminator()) - ->getCondition(); - - Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), - PStore->getParent()); - Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), - QStore->getParent(), PPHI); - - IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); + IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); @@ -3523,10 +3225,11 @@ // Takes care of updating the successors and removing the old terminator. // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. -static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB, - uint32_t TrueWeight, - uint32_t FalseWeight) { +bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, + Value *Cond, BasicBlock *TrueBB, + BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight) { // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -3578,7 +3281,7 @@ Builder.CreateBr(FalseBB); } - EraseTerminatorAndDCECond(OldTerm); + EraseTerminatorAndDCECond(OldTerm, nullptr, Options.AC); return true; } @@ -3586,7 +3289,8 @@ // (switch (select cond, X, Y)) on constant X, Y // with a branch - conditional if X and Y lead to distinct BBs, // unconditional otherwise. -static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { +bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI, + SelectInst *Select) { // Check for constant integer values in the select. ConstantInt *TrueVal = dyn_cast(Select->getTrueValue()); ConstantInt *FalseVal = dyn_cast(Select->getFalseValue()); @@ -3622,7 +3326,8 @@ // blockaddress(@fn, BlockB))) // with // (br cond, BlockA, BlockB). -static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { +bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, + SelectInst *SI) { // Check that both operands of the select are block addresses. BlockAddress *TBA = dyn_cast(SI->getTrueValue()); BlockAddress *FBA = dyn_cast(SI->getFalseValue()); @@ -3757,8 +3462,9 @@ /// The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, - const DataLayout &DL) { +bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, + IRBuilder<> &Builder, + const DataLayout &DL) { Instruction *Cond = dyn_cast(BI->getCondition()); if (!Cond) return false; @@ -3866,7 +3572,7 @@ } // Erase the old branch instruction. - EraseTerminatorAndDCECond(BI); + EraseTerminatorAndDCECond(BI, nullptr, Options.AC); LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; @@ -4307,7 +4013,7 @@ Builder.CreateAssumption(Cond); Builder.CreateBr(BI->getSuccessor(0)); } - EraseTerminatorAndDCECond(BI); + EraseTerminatorAndDCECond(BI, nullptr, Options.AC); Changed = true; } } else if (auto *SI = dyn_cast(TI)) { @@ -4390,7 +4096,7 @@ return true; } -static void createUnreachableSwitchDefault(SwitchInst *Switch) { +void createUnreachableSwitchDefault(SwitchInst *Switch, AssumptionCache *AC) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); BasicBlock *NewDefaultBlock = SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), ""); @@ -4398,12 +4104,13 @@ SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front()); auto *NewTerminator = NewDefaultBlock->getTerminator(); new UnreachableInst(Switch->getContext(), NewTerminator); - EraseTerminatorAndDCECond(NewTerminator); + EraseTerminatorAndDCECond(NewTerminator, nullptr, AC); } /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. -static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { +bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, + IRBuilder<> &Builder) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); bool HasDefault = @@ -4512,7 +4219,7 @@ // Clean up the default block - it may have phis or other instructions before // the unreachable terminator. if (!HasDefault) - createUnreachableSwitchDefault(SI); + createUnreachableSwitchDefault(SI, Options.AC); // Drop the switch. SI->eraseFromParent(); @@ -4558,7 +4265,7 @@ if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { - createUnreachableSwitchDefault(SI); + createUnreachableSwitchDefault(SI, AC); return true; } @@ -5641,561 +5348,869 @@ if (isSwitchDense(Values)) return false; - // First, transform the values such that they start at zero and ascend. - int64_t Base = Values[0]; - for (auto &V : Values) - V -= (uint64_t)(Base); + // First, transform the values such that they start at zero and ascend. + int64_t Base = Values[0]; + for (auto &V : Values) + V -= (uint64_t)(Base); + + // Now we have signed numbers that have been shifted so that, given enough + // precision, there are no negative values. Since the rest of the transform + // is bitwise only, we switch now to an unsigned representation. + + // This transform can be done speculatively because it is so cheap - it + // results in a single rotate operation being inserted. + // FIXME: It's possible that optimizing a switch on powers of two might also + // be beneficial - flag values are often powers of two and we could use a CLZ + // as the key function. + + // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than + // one element and LLVM disallows duplicate cases, Shift is guaranteed to be + // less than 64. + unsigned Shift = 64; + for (auto &V : Values) + Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); + assert(Shift < 64); + if (Shift > 0) + for (auto &V : Values) + V = (int64_t)((uint64_t)V >> Shift); + + if (!isSwitchDense(Values)) + // Transform didn't create a dense switch. + return false; + + // The obvious transform is to shift the switch condition right and emit a + // check that the condition actually cleanly divided by GCD, i.e. + // C & (1 << Shift - 1) == 0 + // inserting a new CFG edge to handle the case where it didn't divide cleanly. + // + // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the + // shift and puts the shifted-off bits in the uppermost bits. If any of these + // are nonzero then the switch condition will be very large and will hit the + // default case. + + auto *Ty = cast(SI->getCondition()->getType()); + Builder.SetInsertPoint(SI); + auto *ShiftC = ConstantInt::get(Ty, Shift); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + auto *LShr = Builder.CreateLShr(Sub, ShiftC); + auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); + auto *Rot = Builder.CreateOr(LShr, Shl); + SI->replaceUsesOfWith(SI->getCondition(), Rot); + + for (auto Case : SI->cases()) { + auto *Orig = Case.getCaseValue(); + auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + Case.setValue( + cast(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); + } + return true; +} + +bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { + BasicBlock *BB = SI->getParent(); + + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return requestResimplify(); + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return requestResimplify(); + + // If the block only contains the switch, see if we can fold the block + // away into any preds. + if (SI == &*BB->instructionsWithoutDebug().begin()) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return requestResimplify(); + } + + // Try to transform the switch into an icmp and a branch. + if (TurnSwitchRangeIntoICmp(SI, Builder)) + return requestResimplify(); + + // Remove unreachable cases. + if (eliminateDeadSwitchCases(SI, Options.AC, DL)) + return requestResimplify(); + + if (switchToSelect(SI, Builder, DL, TTI)) + return requestResimplify(); + + if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) + return requestResimplify(); + + // The conversion from switch to lookup tables results in difficult-to-analyze + // code and makes pruning branches much harder. This is a problem if the + // switch expression itself can still be restricted as a result of inlining or + // CVP. Therefore, only apply this transformation during late stages of the + // optimisation pipeline. + if (Options.ConvertSwitchToLookupTable && + SwitchToLookupTable(SI, Builder, DL, TTI)) + return requestResimplify(); + + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + + return false; +} + +bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + + // Eliminate redundant destinations. + SmallPtrSet Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; + --e; + Changed = true; + } + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + EraseTerminatorAndDCECond(IBI, nullptr, Options.AC); + return true; + } + + if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + EraseTerminatorAndDCECond(IBI, nullptr, Options.AC); + return true; + } + + if (SelectInst *SI = dyn_cast(IBI->getAddress())) { + if (SimplifyIndirectBrOnSelect(IBI, SI)) + return requestResimplify(); + } + return Changed; +} + +/// Given an block with only a single landing pad and a unconditional branch +/// try to find another basic block which this one can be merged with. This +/// handles cases where we have multiple invokes with unique landing pads, but +/// a shared handler. +/// +/// We specifically choose to not worry about merging non-empty blocks +/// here. That is a PRE/scheduling problem and is best solved elsewhere. In +/// practice, the optimizer produces empty landing pad blocks quite frequently +/// when dealing with exception dense code. (see: instcombine, gvn, if-else +/// sinking in this file) +/// +/// This is primarily a code size optimization. We need to avoid performing +/// any transform which might inhibit optimization (such as our ability to +/// specialize a particular handler via tail commoning). We do this by not +/// merging any blocks which require us to introduce a phi. Since the same +/// values are flowing through both blocks, we don't lose any ability to +/// specialize. If anything, we make such specialization more likely. +/// +/// TODO - This transformation could remove entries from a phi in the target +/// block when the inputs in the phi are the same for the two blocks being +/// merged. In some cases, this could result in removal of the PHI entirely. +static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, + BasicBlock *BB) { + auto Succ = BB->getUniqueSuccessor(); + assert(Succ); + // If there's a phi in the successor block, we'd likely have to introduce + // a phi into the merged landing pad block. + if (isa(*Succ->begin())) + return false; + + for (BasicBlock *OtherPred : predecessors(Succ)) { + if (BB == OtherPred) + continue; + BasicBlock::iterator I = OtherPred->begin(); + LandingPadInst *LPad2 = dyn_cast(I); + if (!LPad2 || !LPad2->isIdenticalTo(LPad)) + continue; + for (++I; isa(I); ++I) + ; + BranchInst *BI2 = dyn_cast(I); + if (!BI2 || !BI2->isIdenticalTo(BI)) + continue; + + // We've found an identical block. Update our predecessors to take that + // path instead and make ourselves dead. + SmallPtrSet Preds; + Preds.insert(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : Preds) { + InvokeInst *II = cast(Pred->getTerminator()); + assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && + "unexpected successor"); + II->setUnwindDest(OtherPred); + } + + // The debug info in OtherPred doesn't cover the merged control flow that + // used to go through BB. We need to delete it or update it. + for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) { + Instruction &Inst = *I; + I++; + if (isa(Inst)) + Inst.eraseFromParent(); + } + + SmallPtrSet Succs; + Succs.insert(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : Succs) { + Succ->removePredecessor(BB); + } - // Now we have signed numbers that have been shifted so that, given enough - // precision, there are no negative values. Since the rest of the transform - // is bitwise only, we switch now to an unsigned representation. + IRBuilder<> Builder(BI); + Builder.CreateUnreachable(); + BI->eraseFromParent(); + return true; + } + return false; +} - // This transform can be done speculatively because it is so cheap - it - // results in a single rotate operation being inserted. - // FIXME: It's possible that optimizing a switch on powers of two might also - // be beneficial - flag values are often powers of two and we could use a CLZ - // as the key function. +bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) { + return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder) + : simplifyCondBranch(Branch, Builder); +} - // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than - // one element and LLVM disallows duplicate cases, Shift is guaranteed to be - // less than 64. - unsigned Shift = 64; - for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); - assert(Shift < 64); - if (Shift > 0) - for (auto &V : Values) - V = (int64_t)((uint64_t)V >> Shift); +bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, + IRBuilder<> &Builder) { + BasicBlock *BB = BI->getParent(); + BasicBlock *Succ = BI->getSuccessor(0); - if (!isSwitchDense(Values)) - // Transform didn't create a dense switch. - return false; + // If the Terminator is the only non-phi instruction, simplify the block. + // If LoopHeader is provided, check if the block or its successor is a loop + // header. (This is for early invocations before loop simplify and + // vectorization to keep canonical loop forms for nested loops. These blocks + // can be eliminated when the pass is invoked later in the back-end.) + // Note that if BB has only one predecessor then we do not introduce new + // backedge, so we can eliminate BB. + bool NeedCanonicalLoop = + Options.NeedCanonicalLoop && + (LoopHeaders && BB->hasNPredecessorsOrMore(2) && + (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); + BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); + if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && + !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) + return true; - // The obvious transform is to shift the switch condition right and emit a - // check that the condition actually cleanly divided by GCD, i.e. - // C & (1 << Shift - 1) == 0 - // inserting a new CFG edge to handle the case where it didn't divide cleanly. - // - // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the - // shift and puts the shifted-off bits in the uppermost bits. If any of these - // are nonzero then the switch condition will be very large and will hit the - // default case. + // If the only instruction in the block is a seteq/setne comparison against a + // constant, try to simplify the block. + if (ICmpInst *ICI = dyn_cast(I)) + if (ICI->isEquality() && isa(ICI->getOperand(1))) { + for (++I; isa(I); ++I) + ; + if (I->isTerminator() && + tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder)) + return true; + } - auto *Ty = cast(SI->getCondition()->getType()); - Builder.SetInsertPoint(SI); - auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); - auto *LShr = Builder.CreateLShr(Sub, ShiftC); - auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); - auto *Rot = Builder.CreateOr(LShr, Shl); - SI->replaceUsesOfWith(SI->getCondition(), Rot); + // See if we can merge an empty landing pad block with another which is + // equivalent. + if (LandingPadInst *LPad = dyn_cast(I)) { + for (++I; isa(I); ++I) + ; + if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) + return true; + } - for (auto Case : SI->cases()) { - auto *Orig = Case.getCaseValue(); - auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); - Case.setValue( - cast(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and our successor, fold the comparison into the + // predecessor and use logical operations to update the incoming value + // for PHI nodes in common successor. + if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) + return requestResimplify(); + return false; +} + +static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { + BasicBlock *PredPred = nullptr; + for (auto *P : predecessors(BB)) { + BasicBlock *PPred = P->getSinglePredecessor(); + if (!PPred || (PredPred && PredPred != PPred)) + return nullptr; + PredPred = PPred; } - return true; + return PredPred; } -bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { - BasicBlock *BB = SI->getParent(); +bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { + BasicBlock *BB = BI->getParent(); + const Function *Fn = BB->getParent(); + if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) + return false; - if (isValueEqualityComparison(SI)) { + // Conditional branch + if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. + // see if that predecessor totally determines the outcome of this + // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return requestResimplify(); - Value *Cond = SI->getCondition(); - if (SelectInst *Select = dyn_cast(Cond)) - if (SimplifySwitchOnSelect(SI, Select)) + // This block must be empty, except for the setcond inst, if it exists. + // Ignore dbg intrinsics. + auto I = BB->instructionsWithoutDebug().begin(); + if (&*I == BI) { + if (FoldValueComparisonIntoPredecessors(BI, Builder)) return requestResimplify(); - - // If the block only contains the switch, see if we can fold the block - // away into any preds. - if (SI == &*BB->instructionsWithoutDebug().begin()) - if (FoldValueComparisonIntoPredecessors(SI, Builder)) + } else if (&*I == cast(BI->getCondition())) { + ++I; + if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) return requestResimplify(); + } } - // Try to transform the switch into an icmp and a branch. - if (TurnSwitchRangeIntoICmp(SI, Builder)) - return requestResimplify(); + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. + if (SimplifyBranchOnICmpChain(BI, Builder, DL)) + return true; - // Remove unreachable cases. - if (eliminateDeadSwitchCases(SI, Options.AC, DL)) + // If this basic block has dominating predecessor blocks and the dominating + // blocks' conditions imply BI's condition, we know the direction of BI. + Optional Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL); + if (Imp) { + // Turn this into a branch on constant. + auto *OldCond = BI->getCondition(); + ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext()) + : ConstantInt::getFalse(BB->getContext()); + BI->setCondition(TorF); + RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr, nullptr, + Options.AC); return requestResimplify(); + } - if (switchToSelect(SI, Builder, DL, TTI)) + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and one of our successors, fold the comparison into the + // predecessor and use logical operations to pick the right destination. + if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) return requestResimplify(); - if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) - return requestResimplify(); + // We have a conditional branch to two blocks that are only reachable + // from BI. We know that the condbr dominates the two blocks, so see if + // there is any identical code in the "then" and "else" blocks. If so, we + // can hoist it up to the branching block. + if (BI->getSuccessor(0)->getSinglePredecessor()) { + if (BI->getSuccessor(1)->getSinglePredecessor()) { + if (HoistThenElseCodeToIf(BI, TTI)) + return requestResimplify(); + } else { + // If Successor #1 has multiple preds, we may be able to conditionally + // execute Successor #0 if it branches to Successor #1. + Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator(); + if (Succ0TI->getNumSuccessors() == 1 && + Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + return requestResimplify(); + } + } else if (BI->getSuccessor(1)->getSinglePredecessor()) { + // If Successor #0 has multiple preds, we may be able to conditionally + // execute Successor #1 if it branches to Successor #0. + Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator(); + if (Succ1TI->getNumSuccessors() == 1 && + Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + return requestResimplify(); + } + + // If this is a branch on a phi node in the current block, thread control + // through this block if any PHI node entries are constants. + if (PHINode *PN = dyn_cast(BI->getCondition())) + if (PN->getParent() == BI->getParent()) + if (FoldCondBranchOnPHI(BI, DL, Options.AC)) + return requestResimplify(); + + // Scan predecessor blocks for conditional branches. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) + return requestResimplify(); + + // Look for diamond patterns. + if (MergeCondStores) + if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) + if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) + if (PBI != BI && PBI->isConditional()) + if (mergeConditionalStores(PBI, BI, DL, TTI)) + return requestResimplify(); + + return false; +} + +/// Check if passing a value to an instruction will cause undefined behavior. +static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { + Constant *C = dyn_cast(V); + if (!C) + return false; + + if (I->use_empty()) + return false; + + if (C->isNullValue() || isa(C)) { + // Only look at the first use, avoid hurting compile time with long uselists + User *Use = *I->user_begin(); + + // Now make sure that there are no instructions in between that can alter + // control flow (eg. calls) + for (BasicBlock::iterator + i = ++BasicBlock::iterator(I), + UI = BasicBlock::iterator(dyn_cast(Use)); + i != UI; ++i) + if (i == I->getParent()->end() || i->mayHaveSideEffects()) + return false; + + // Look through GEPs. A load from a GEP derived from NULL is still undefined + if (GetElementPtrInst *GEP = dyn_cast(Use)) + if (GEP->getPointerOperand() == I) + return passingValueIsAlwaysUndefined(V, GEP); + + // Look through bitcasts. + if (BitCastInst *BC = dyn_cast(Use)) + return passingValueIsAlwaysUndefined(V, BC); + + // Load from null is undefined. + if (LoadInst *LI = dyn_cast(Use)) + if (!LI->isVolatile()) + return !NullPointerIsDefined(LI->getFunction(), + LI->getPointerAddressSpace()); - // The conversion from switch to lookup tables results in difficult-to-analyze - // code and makes pruning branches much harder. This is a problem if the - // switch expression itself can still be restricted as a result of inlining or - // CVP. Therefore, only apply this transformation during late stages of the - // optimisation pipeline. - if (Options.ConvertSwitchToLookupTable && - SwitchToLookupTable(SI, Builder, DL, TTI)) - return requestResimplify(); + // Store to null is undefined. + if (StoreInst *SI = dyn_cast(Use)) + if (!SI->isVolatile()) + return (!NullPointerIsDefined(SI->getFunction(), + SI->getPointerAddressSpace())) && + SI->getPointerOperand() == I; - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); + // A call to null is undefined. + if (auto CS = CallSite(Use)) + return !NullPointerIsDefined(CS->getFunction()) && + CS.getCalledValue() == I; + } + return false; +} + +/// If BB has an incoming value that will always trigger undefined behavior +/// (eg. null pointer dereference), remove the branch leading here. +static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { + for (PHINode &PHI : BB->phis()) + for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { + Instruction *T = PHI.getIncomingBlock(i)->getTerminator(); + IRBuilder<> Builder(T); + if (BranchInst *BI = dyn_cast(T)) { + BB->removePredecessor(PHI.getIncomingBlock(i)); + // Turn uncoditional branches into unreachables and remove the dead + // destination from conditional branches. + if (BI->isUnconditional()) + Builder.CreateUnreachable(); + else + Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) + : BI->getSuccessor(0)); + BI->eraseFromParent(); + return true; + } + // TODO: SwitchInst. + } return false; } -bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { - BasicBlock *BB = IBI->getParent(); +bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { bool Changed = false; - // Eliminate redundant destinations. - SmallPtrSet Succs; - for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - BasicBlock *Dest = IBI->getDestination(i); - if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { - Dest->removePredecessor(BB); - IBI->removeDestination(i); - --i; - --e; - Changed = true; - } - } - - if (IBI->getNumDestinations() == 0) { - // If the indirectbr has no successors, change it to unreachable. - new UnreachableInst(IBI->getContext(), IBI); - EraseTerminatorAndDCECond(IBI); - return true; - } + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); - if (IBI->getNumDestinations() == 1) { - // If the indirectbr has one successor, change it to a direct branch. - BranchInst::Create(IBI->getDestination(0), IBI); - EraseTerminatorAndDCECond(IBI); + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || + BB->getSinglePredecessor() == BB) { + LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); + DeleteDeadBlock(BB); return true; } - if (SelectInst *SI = dyn_cast(IBI->getAddress())) { - if (SimplifyIndirectBrOnSelect(IBI, SI)) - return requestResimplify(); - } - return Changed; -} - -/// Given an block with only a single landing pad and a unconditional branch -/// try to find another basic block which this one can be merged with. This -/// handles cases where we have multiple invokes with unique landing pads, but -/// a shared handler. -/// -/// We specifically choose to not worry about merging non-empty blocks -/// here. That is a PRE/scheduling problem and is best solved elsewhere. In -/// practice, the optimizer produces empty landing pad blocks quite frequently -/// when dealing with exception dense code. (see: instcombine, gvn, if-else -/// sinking in this file) -/// -/// This is primarily a code size optimization. We need to avoid performing -/// any transform which might inhibit optimization (such as our ability to -/// specialize a particular handler via tail commoning). We do this by not -/// merging any blocks which require us to introduce a phi. Since the same -/// values are flowing through both blocks, we don't lose any ability to -/// specialize. If anything, we make such specialization more likely. -/// -/// TODO - This transformation could remove entries from a phi in the target -/// block when the inputs in the phi are the same for the two blocks being -/// merged. In some cases, this could result in removal of the PHI entirely. -static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, - BasicBlock *BB) { - auto Succ = BB->getUniqueSuccessor(); - assert(Succ); - // If there's a phi in the successor block, we'd likely have to introduce - // a phi into the merged landing pad block. - if (isa(*Succ->begin())) - return false; - - for (BasicBlock *OtherPred : predecessors(Succ)) { - if (BB == OtherPred) - continue; - BasicBlock::iterator I = OtherPred->begin(); - LandingPadInst *LPad2 = dyn_cast(I); - if (!LPad2 || !LPad2->isIdenticalTo(LPad)) - continue; - for (++I; isa(I); ++I) - ; - BranchInst *BI2 = dyn_cast(I); - if (!BI2 || !BI2->isIdenticalTo(BI)) - continue; - - // We've found an identical block. Update our predecessors to take that - // path instead and make ourselves dead. - SmallPtrSet Preds; - Preds.insert(pred_begin(BB), pred_end(BB)); - for (BasicBlock *Pred : Preds) { - InvokeInst *II = cast(Pred->getTerminator()); - assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && - "unexpected successor"); - II->setUnwindDest(OtherPred); - } + // Check to see if we can constant propagate this terminator instruction + // away... + Changed |= ConstantFoldTerminator(BB, true); - // The debug info in OtherPred doesn't cover the merged control flow that - // used to go through BB. We need to delete it or update it. - for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) { - Instruction &Inst = *I; - I++; - if (isa(Inst)) - Inst.eraseFromParent(); - } + // Check for and eliminate duplicate PHI nodes in this block. + Changed |= EliminateDuplicatePHINodes(BB); - SmallPtrSet Succs; - Succs.insert(succ_begin(BB), succ_end(BB)); - for (BasicBlock *Succ : Succs) { - Succ->removePredecessor(BB); - } + // Check for and remove branches that will always cause undefined behavior. + Changed |= removeUndefIntroducingPredecessor(BB); - IRBuilder<> Builder(BI); - Builder.CreateUnreachable(); - BI->eraseFromParent(); + // Merge basic blocks into their predecessor if there is only one distinct + // pred, and if there is only one distinct successor of the predecessor, and + // if there are no PHI nodes. + if (MergeBlockIntoPredecessor(BB)) return true; - } - return false; -} - -bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) { - return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder) - : simplifyCondBranch(Branch, Builder); -} -bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, - IRBuilder<> &Builder) { - BasicBlock *BB = BI->getParent(); - BasicBlock *Succ = BI->getSuccessor(0); + if (SinkCommon && Options.SinkCommonInsts) + Changed |= SinkCommonCodeFromPredecessors(BB); - // If the Terminator is the only non-phi instruction, simplify the block. - // If LoopHeader is provided, check if the block or its successor is a loop - // header. (This is for early invocations before loop simplify and - // vectorization to keep canonical loop forms for nested loops. These blocks - // can be eliminated when the pass is invoked later in the back-end.) - // Note that if BB has only one predecessor then we do not introduce new - // backedge, so we can eliminate BB. - bool NeedCanonicalLoop = - Options.NeedCanonicalLoop && - (LoopHeaders && BB->hasNPredecessorsOrMore(2) && - (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); - if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) - return true; + IRBuilder<> Builder(BB); - // If the only instruction in the block is a seteq/setne comparison against a - // constant, try to simplify the block. - if (ICmpInst *ICI = dyn_cast(I)) - if (ICI->isEquality() && isa(ICI->getOperand(1))) { - for (++I; isa(I); ++I) - ; - if (I->isTerminator() && - tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder)) - return true; - } + // If there is a trivial two-entry PHI node in this basic block, and we can + // eliminate it, do so now. + if (auto *PN = dyn_cast(BB->begin())) + if (PN->getNumIncomingValues() == 2) + Changed |= FoldTwoEntryPHINode(PN, TTI, DL); - // See if we can merge an empty landing pad block with another which is - // equivalent. - if (LandingPadInst *LPad = dyn_cast(I)) { - for (++I; isa(I); ++I) - ; - if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) - return true; + Instruction *Terminator = BB->getTerminator(); + Builder.SetInsertPoint(Terminator); + switch (Terminator->getOpcode()) { + case Instruction::Br: + Changed |= simplifyBranch(cast(Terminator), Builder); + break; + case Instruction::Ret: + Changed |= simplifyReturn(cast(Terminator), Builder); + break; + case Instruction::Resume: + Changed |= simplifyResume(cast(Terminator), Builder); + break; + case Instruction::CleanupRet: + Changed |= simplifyCleanupReturn(cast(Terminator)); + break; + case Instruction::Switch: + Changed |= simplifySwitch(cast(Terminator), Builder); + break; + case Instruction::Unreachable: + Changed |= simplifyUnreachable(cast(Terminator)); + break; + case Instruction::IndirectBr: + Changed |= simplifyIndirectBr(cast(Terminator)); + break; } - // If this basic block is ONLY a compare and a branch, and if a predecessor - // branches to us and our successor, fold the comparison into the - // predecessor and use logical operations to update the incoming value - // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) - return requestResimplify(); - return false; + return Changed; } -static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { - BasicBlock *PredPred = nullptr; - for (auto *P : predecessors(BB)) { - BasicBlock *PPred = P->getSinglePredecessor(); - if (!PPred || (PredPred && PredPred != PPred)) - return nullptr; - PredPred = PPred; - } - return PredPred; +bool SimplifyCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + + // Repeated simplify BB as long as resimplification is requested. + do { + Resimplify = false; + + // Perform one round of simplifcation. Resimplify flag will be set if + // another iteration is requested. + Changed |= simplifyOnce(BB); + } while (Resimplify); + + return Changed; } -bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { +/// If this basic block is simple enough, and if a predecessor branches to us +/// and one of our successors, fold the block into the predecessor and use +/// logical operations to pick the right destination. +bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU, + unsigned BonusInstThreshold, + AssumptionCache *AC) { BasicBlock *BB = BI->getParent(); - const Function *Fn = BB->getParent(); - if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) - return false; - - // Conditional branch - if (isValueEqualityComparison(BI)) { - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this - // switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return requestResimplify(); - // This block must be empty, except for the setcond inst, if it exists. - // Ignore dbg intrinsics. - auto I = BB->instructionsWithoutDebug().begin(); - if (&*I == BI) { - if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return requestResimplify(); - } else if (&*I == cast(BI->getCondition())) { - ++I; - if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return requestResimplify(); - } - } + const unsigned PredCount = pred_size(BB); - // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. - if (SimplifyBranchOnICmpChain(BI, Builder, DL)) - return true; + Instruction *Cond = nullptr; + if (BI->isConditional()) + Cond = dyn_cast(BI->getCondition()); + else { + // For unconditional branch, check for a simple CFG pattern, where + // BB has a single predecessor and BB's successor is also its predecessor's + // successor. If such pattern exists, check for CSE between BB and its + // predecessor. + if (BasicBlock *PB = BB->getSinglePredecessor()) + if (BranchInst *PBI = dyn_cast(PB->getTerminator())) + if (PBI->isConditional() && + (BI->getSuccessor(0) == PBI->getSuccessor(0) || + BI->getSuccessor(0) == PBI->getSuccessor(1))) { + for (auto I = BB->instructionsWithoutDebug().begin(), + E = BB->instructionsWithoutDebug().end(); + I != E;) { + Instruction *Curr = &*I++; + if (isa(Curr)) { + Cond = Curr; + break; + } + // Quit if we can't remove this instruction. + if (!tryCSEWithPredecessor(Curr, PB)) + return false; + } + } - // If this basic block has dominating predecessor blocks and the dominating - // blocks' conditions imply BI's condition, we know the direction of BI. - Optional Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL); - if (Imp) { - // Turn this into a branch on constant. - auto *OldCond = BI->getCondition(); - ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext()) - : ConstantInt::getFalse(BB->getContext()); - BI->setCondition(TorF); - RecursivelyDeleteTriviallyDeadInstructions(OldCond); - return requestResimplify(); + if (!Cond) + return false; } - // If this basic block is ONLY a compare and a branch, and if a predecessor - // branches to us and one of our successors, fold the comparison into the - // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold)) - return requestResimplify(); + if (!Cond || (!isa(Cond) && !isa(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) + return false; - // We have a conditional branch to two blocks that are only reachable - // from BI. We know that the condbr dominates the two blocks, so see if - // there is any identical code in the "then" and "else" blocks. If so, we - // can hoist it up to the branching block. - if (BI->getSuccessor(0)->getSinglePredecessor()) { - if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, TTI)) - return requestResimplify(); - } else { - // If Successor #1 has multiple preds, we may be able to conditionally - // execute Successor #0 if it branches to Successor #1. - Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator(); - if (Succ0TI->getNumSuccessors() == 1 && - Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) - return requestResimplify(); - } - } else if (BI->getSuccessor(1)->getSinglePredecessor()) { - // If Successor #0 has multiple preds, we may be able to conditionally - // execute Successor #1 if it branches to Successor #0. - Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator(); - if (Succ1TI->getNumSuccessors() == 1 && - Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) - return requestResimplify(); - } + // Make sure the instruction after the condition is the cond branch. + BasicBlock::iterator CondIt = ++Cond->getIterator(); - // If this is a branch on a phi node in the current block, thread control - // through this block if any PHI node entries are constants. - if (PHINode *PN = dyn_cast(BI->getCondition())) - if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DL, Options.AC)) - return requestResimplify(); + // Ignore dbg intrinsics. + while (isa(CondIt)) + ++CondIt; - // Scan predecessor blocks for conditional branches. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) - if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) - return requestResimplify(); + if (&*CondIt != BI) + return false; - // Look for diamond patterns. - if (MergeCondStores) - if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) - if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) - if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL, TTI)) - return requestResimplify(); + // Only allow this transformation if computing the condition doesn't involve + // too many instructions and these involved instructions can be executed + // unconditionally. We denote all involved instructions except the condition + // as "bonus instructions", and only allow this transformation when the + // number of the bonus instructions we'll need to create when cloning into + // each predecessor does not exceed a certain threshold. + unsigned NumBonusInsts = 0; + for (auto I = BB->begin(); Cond != &*I; ++I) { + // Ignore dbg intrinsics. + if (isa(I)) + continue; + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I)) + return false; + // I has only one use and can be executed unconditionally. + Instruction *User = dyn_cast(I->user_back()); + if (User == nullptr || User->getParent() != BB) + return false; + // I is used in the same BB. Since BI uses Cond and doesn't have more slots + // to use any other instruction, User must be an instruction between next(I) + // and Cond. - return false; -} + // Account for the cost of duplicating this instruction into each + // predecessor. + NumBonusInsts += PredCount; + // Early exits once we reach the limit. + if (NumBonusInsts > BonusInstThreshold) + return false; + } -/// Check if passing a value to an instruction will cause undefined behavior. -static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { - Constant *C = dyn_cast(V); - if (!C) - return false; + // Cond is known to be a compare or binary operator. Check to make sure that + // neither operand is a potentially-trapping constant expression. + if (ConstantExpr *CE = dyn_cast(Cond->getOperand(0))) + if (CE->canTrap()) + return false; + if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) + if (CE->canTrap()) + return false; - if (I->use_empty()) + // Finally, don't infinitely unroll conditional loops. + BasicBlock *TrueDest = BI->getSuccessor(0); + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; + if (TrueDest == BB || FalseDest == BB) return false; - if (C->isNullValue() || isa(C)) { - // Only look at the first use, avoid hurting compile time with long uselists - User *Use = *I->user_begin(); - - // Now make sure that there are no instructions in between that can alter - // control flow (eg. calls) - for (BasicBlock::iterator - i = ++BasicBlock::iterator(I), - UI = BasicBlock::iterator(dyn_cast(Use)); - i != UI; ++i) - if (i == I->getParent()->end() || i->mayHaveSideEffects()) - return false; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBlock = *PI; + BranchInst *PBI = dyn_cast(PredBlock->getTerminator()); - // Look through GEPs. A load from a GEP derived from NULL is still undefined - if (GetElementPtrInst *GEP = dyn_cast(Use)) - if (GEP->getPointerOperand() == I) - return passingValueIsAlwaysUndefined(V, GEP); + // Check that we have two conditional branches. If there is a PHI node in + // the common successor, verify that the same value flows in from both + // blocks. + SmallVector PHIs; + if (!PBI || PBI->isUnconditional() || + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || + (!BI->isConditional() && + !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) + continue; - // Look through bitcasts. - if (BitCastInst *BC = dyn_cast(Use)) - return passingValueIsAlwaysUndefined(V, BC); + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; + bool InvertPredCond = false; - // Load from null is undefined. - if (LoadInst *LI = dyn_cast(Use)) - if (!LI->isVolatile()) - return !NullPointerIsDefined(LI->getFunction(), - LI->getPointerAddressSpace()); + if (BI->isConditional()) { + if (PBI->getSuccessor(0) == TrueDest) { + Opc = Instruction::Or; + } else if (PBI->getSuccessor(1) == FalseDest) { + Opc = Instruction::And; + } else if (PBI->getSuccessor(0) == FalseDest) { + Opc = Instruction::And; + InvertPredCond = true; + } else if (PBI->getSuccessor(1) == TrueDest) { + Opc = Instruction::Or; + InvertPredCond = true; + } else { + continue; + } + } else { + if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) + continue; + } - // Store to null is undefined. - if (StoreInst *SI = dyn_cast(Use)) - if (!SI->isVolatile()) - return (!NullPointerIsDefined(SI->getFunction(), - SI->getPointerAddressSpace())) && - SI->getPointerOperand() == I; + LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + IRBuilder<> Builder(PBI); - // A call to null is undefined. - if (auto CS = CallSite(Use)) - return !NullPointerIsDefined(CS->getFunction()) && - CS.getCalledValue() == I; - } - return false; -} + // If we need to invert the condition in the pred block to match, do so now. + if (InvertPredCond) { + Value *NewCond = PBI->getCondition(); -/// If BB has an incoming value that will always trigger undefined behavior -/// (eg. null pointer dereference), remove the branch leading here. -static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { - for (PHINode &PHI : BB->phis()) - for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) - if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { - Instruction *T = PHI.getIncomingBlock(i)->getTerminator(); - IRBuilder<> Builder(T); - if (BranchInst *BI = dyn_cast(T)) { - BB->removePredecessor(PHI.getIncomingBlock(i)); - // Turn uncoditional branches into unreachables and remove the dead - // destination from conditional branches. - if (BI->isUnconditional()) - Builder.CreateUnreachable(); - else - Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) - : BI->getSuccessor(0)); - BI->eraseFromParent(); - return true; - } - // TODO: SwitchInst. + if (NewCond->hasOneUse() && isa(NewCond)) { + CmpInst *CI = cast(NewCond); + CI->setPredicate(CI->getInversePredicate()); + } else { + NewCond = + Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); } - return false; -} - -bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { - bool Changed = false; - - assert(BB && BB->getParent() && "Block not embedded in function!"); - assert(BB->getTerminator() && "Degenerate basic block encountered!"); + PBI->setCondition(NewCond); + PBI->swapSuccessors(); + } - // Remove basic blocks that have no predecessors (except the entry block)... - // or that just have themself as a predecessor. These are unreachable. - if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || - BB->getSinglePredecessor() == BB) { - LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); - DeleteDeadBlock(BB); - return true; - } + // If we have bonus instructions, clone them into the predecessor block. + // Note that there may be multiple predecessor blocks, so we cannot move + // bonus instructions to a predecessor block. + ValueToValueMapTy VMap; // maps original values to cloned values + // We already make sure Cond is the last instruction before BI. Therefore, + // all instructions before Cond other than DbgInfoIntrinsic are bonus + // instructions. + for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) { + if (isa(BonusInst)) + continue; + Instruction *NewBonusInst = BonusInst->clone(); + RemapInstruction(NewBonusInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + VMap[&*BonusInst] = NewBonusInst; - // Check to see if we can constant propagate this terminator instruction - // away... - Changed |= ConstantFoldTerminator(BB, true); + // If we moved a load, we cannot any longer claim any knowledge about + // its potential value. The previous information might have been valid + // only given the branch precondition. + // For an analogous reason, we must also drop all the metadata whose + // semantics we don't understand. + NewBonusInst->dropUnknownNonDebugMetadata(); - // Check for and eliminate duplicate PHI nodes in this block. - Changed |= EliminateDuplicatePHINodes(BB); + PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst); + NewBonusInst->takeName(&*BonusInst); + BonusInst->setName(BonusInst->getName() + ".old"); + } - // Check for and remove branches that will always cause undefined behavior. - Changed |= removeUndefIntroducingPredecessor(BB); + // Clone Cond into the predecessor basic block, and or/and the + // two conditions together. + Instruction *CondInPred = Cond->clone(); + RemapInstruction(CondInPred, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + PredBlock->getInstList().insert(PBI->getIterator(), CondInPred); + CondInPred->takeName(Cond); + Cond->setName(CondInPred->getName() + ".old"); - // Merge basic blocks into their predecessor if there is only one distinct - // pred, and if there is only one distinct successor of the predecessor, and - // if there are no PHI nodes. - if (MergeBlockIntoPredecessor(BB)) - return true; + if (BI->isConditional()) { + Instruction *NewCond = cast( + Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond")); + PBI->setCondition(NewCond); - if (SinkCommon && Options.SinkCommonInsts) - Changed |= SinkCommonCodeFromPredecessors(BB); + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool HasWeights = + extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, + SuccTrueWeight, SuccFalseWeight); + SmallVector NewWeights; - IRBuilder<> Builder(BB); + if (PBI->getSuccessor(0) == BB) { + if (HasWeights) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, TrueDest, FalseDest + // TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredTrueWeight * SuccFalseWeight); + } + AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU); + PBI->setSuccessor(0, TrueDest); + } + if (PBI->getSuccessor(1) == BB) { + if (HasWeights) { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, FalseDest + // TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * + (SuccFalseWeight + SuccTrueWeight) + + PredFalseWeight * SuccTrueWeight); + // FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } + AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU); + PBI->setSuccessor(1, FalseDest); + } + if (NewWeights.size() == 2) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); - // If there is a trivial two-entry PHI node in this basic block, and we can - // eliminate it, do so now. - if (auto *PN = dyn_cast(BB->begin())) - if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, TTI, DL); + SmallVector MDWeights(NewWeights.begin(), + NewWeights.end()); + setBranchWeights(PBI, MDWeights[0], MDWeights[1]); + } else + PBI->setMetadata(LLVMContext::MD_prof, nullptr); + } else { + // Update PHI nodes in the common successors. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + ConstantInt *PBI_C = cast( + PHIs[i]->getIncomingValueForBlock(PBI->getParent())); + assert(PBI_C->getType()->isIntegerTy(1)); + Instruction *MergedCond = nullptr; + if (PBI->getSuccessor(0) == TrueDest) { + // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) + // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) + // is false: !PBI_Cond and BI_Value + Instruction *NotCond = cast( + Builder.CreateNot(PBI->getCondition(), "not.cond")); + MergedCond = cast(Builder.CreateBinOp( + Instruction::And, NotCond, CondInPred, "and.cond")); + if (PBI_C->isOne()) + MergedCond = cast(Builder.CreateBinOp( + Instruction::Or, PBI->getCondition(), MergedCond, "or.cond")); + } else { + // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) + // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) + // is false: PBI_Cond and BI_Value + MergedCond = cast(Builder.CreateBinOp( + Instruction::And, PBI->getCondition(), CondInPred, "and.cond")); + if (PBI_C->isOne()) { + Instruction *NotCond = cast( + Builder.CreateNot(PBI->getCondition(), "not.cond")); + MergedCond = cast(Builder.CreateBinOp( + Instruction::Or, NotCond, MergedCond, "or.cond")); + } + } + // Update PHI Node. + PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond); + } - Instruction *Terminator = BB->getTerminator(); - Builder.SetInsertPoint(Terminator); - switch (Terminator->getOpcode()) { - case Instruction::Br: - Changed |= simplifyBranch(cast(Terminator), Builder); - break; - case Instruction::Ret: - Changed |= simplifyReturn(cast(Terminator), Builder); - break; - case Instruction::Resume: - Changed |= simplifyResume(cast(Terminator), Builder); - break; - case Instruction::CleanupRet: - Changed |= simplifyCleanupReturn(cast(Terminator)); - break; - case Instruction::Switch: - Changed |= simplifySwitch(cast(Terminator), Builder); - break; - case Instruction::Unreachable: - Changed |= simplifyUnreachable(cast(Terminator)); - break; - case Instruction::IndirectBr: - Changed |= simplifyIndirectBr(cast(Terminator)); - break; - } + // PBI is changed to branch to TrueDest below. Remove itself from + // potential phis from all other successors. + if (MSSAU) + MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest); - return Changed; -} + // Change PBI from Conditional to Unconditional. + BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); + EraseTerminatorAndDCECond(PBI, MSSAU, AC); + PBI = New_PBI; + } -bool SimplifyCFGOpt::run(BasicBlock *BB) { - bool Changed = false; + // If BI was a loop latch, it may have had associated loop metadata. + // We need to copy it to the new latch, that is, PBI. + if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) + PBI->setMetadata(LLVMContext::MD_loop, LoopMD); - // Repeated simplify BB as long as resimplification is requested. - do { - Resimplify = false; + // TODO: If BB is reachable from all paths through PredBlock, then we + // could replace PBI's branch probabilities with BI's. - // Perform one round of simplifcation. Resimplify flag will be set if - // another iteration is requested. - Changed |= simplifyOnce(BB); - } while (Resimplify); + // Copy any debug value intrinsics into the end of PredBlock. + for (Instruction &I : *BB) + if (isa(I)) + I.clone()->insertBefore(PBI); - return Changed; + return true; + } + return false; } bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt -S -passes=attributor --enable-knowledge-retention -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME %struct.ss = type { i32, i64 } @@ -15,15 +16,26 @@ } define i32 @test(i32* %X) { -; CHECK-LABEL: define {{[^@]+}}@test -; CHECK-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 -; CHECK-NEXT: ret i32 0 +; NO_ASSUME-LABEL: define {{[^@]+}}@test +; NO_ASSUME-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]]) +; NO_ASSUME-NEXT: entry: +; NO_ASSUME-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] +; NO_ASSUME-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; NO_ASSUME-NEXT: store i32 1, i32* [[TMP1]], align 8 +; NO_ASSUME-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; NO_ASSUME-NEXT: store i64 2, i64* [[TMP4]], align 4 +; NO_ASSUME-NEXT: ret i32 0 +; +; USE_ASSUME-LABEL: define {{[^@]+}}@test +; USE_ASSUME-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]]) +; USE_ASSUME-NEXT: entry: +; USE_ASSUME-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] +; USE_ASSUME-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; USE_ASSUME-NEXT: store i32 1, i32* [[TMP1]], align 8 +; USE_ASSUME-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; USE_ASSUME-NEXT: store i64 2, i64* [[TMP4]], align 4 +; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[X]], i64 4), "align"(%struct.ss* [[S]], i64 8), "dereferenceable"(i32* [[X]], i64 4), "dereferenceable"(%struct.ss* [[S]], i64 12), "nonnull"(%struct.ss* [[S]]), "nonnull"(i32* [[X]]) ] +; USE_ASSUME-NEXT: ret i32 0 ; entry: %S = alloca %struct.ss diff --git a/llvm/test/Transforms/InstSimplify/load.ll b/llvm/test/Transforms/InstSimplify/load.ll --- a/llvm/test/Transforms/InstSimplify/load.ll +++ b/llvm/test/Transforms/InstSimplify/load.ll @@ -1,20 +1,29 @@ -; NOTE: Assertions have been autogenerated by update_test_checks.py -; RUN: opt < %s -instsimplify -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt < %s -instsimplify --enable-knowledge-retention -S | FileCheck %s --check-prefixes=CHECK,USE_ASSUME @zeroinit = constant {} zeroinitializer @undef = constant {} undef define i32 @crash_on_zeroinit() { -; CHECK-LABEL: @crash_on_zeroinit( -; CHECK: ret i32 0 +; NO_ASSUME-LABEL: @crash_on_zeroinit( +; NO_ASSUME-NEXT: ret i32 0 +; +; USE_ASSUME-LABEL: @crash_on_zeroinit( +; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* bitcast ({}* @zeroinit to i32*), i64 4), "nonnull"(i32* bitcast ({}* @zeroinit to i32*)) ] +; USE_ASSUME-NEXT: ret i32 0 ; %load = load i32, i32* bitcast ({}* @zeroinit to i32*) ret i32 %load } define i32 @crash_on_undef() { -; CHECK-LABEL: @crash_on_undef( -; CHECK: ret i32 undef +; NO_ASSUME-LABEL: @crash_on_undef( +; NO_ASSUME-NEXT: ret i32 undef +; +; USE_ASSUME-LABEL: @crash_on_undef( +; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(i32* bitcast ({}* @undef to i32*), i64 4), "nonnull"(i32* bitcast ({}* @undef to i32*)) ] +; USE_ASSUME-NEXT: ret i32 undef ; %load = load i32, i32* bitcast ({}* @undef to i32*) ret i32 %load @@ -23,8 +32,13 @@ @GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49] define <8 x i32> @partial_load() { -; CHECK-LABEL: @partial_load( -; CHECK: ret <8 x i32> +; NO_ASSUME-LABEL: @partial_load( +; NO_ASSUME-NEXT: ret <8 x i32> +; +; USE_ASSUME-LABEL: @partial_load( +; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -1) to <8 x i32>*), i64 32), "nonnull"(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -1) to <8 x i32>*)) ] +; USE_ASSUME-NEXT: ret <8 x i32> +; %load = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -1) to <8 x i32>*) ret <8 x i32> %load } @@ -33,8 +47,13 @@ ; This does an out of bounds load from the global constant define <3 x float> @load_vec3() { -; CHECK-LABEL: @load_vec3( -; CHECK-NEXT: ret <3 x float> undef +; NO_ASSUME-LABEL: @load_vec3( +; NO_ASSUME-NEXT: ret <3 x float> undef +; +; USE_ASSUME-LABEL: @load_vec3( +; USE_ASSUME-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(<3 x float>* getelementptr inbounds (<3 x float>, <3 x float>* @constvec, i64 1), i64 12), "nonnull"(<3 x float>* getelementptr inbounds (<3 x float>, <3 x float>* @constvec, i64 1)) ] +; USE_ASSUME-NEXT: ret <3 x float> undef +; %1 = load <3 x float>, <3 x float>* getelementptr inbounds (<3 x float>, <3 x float>* @constvec, i64 1) ret <3 x float> %1 } diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp --- a/llvm/unittests/Transforms/Utils/LocalTest.cpp +++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp @@ -25,12 +25,18 @@ TEST(Local, RecursivelyDeleteDeadPHINodes) { LLVMContext C; + auto M = std::make_unique("test", C); IRBuilder<> builder(C); + Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), + GlobalValue::ExternalLinkage, "", &*M); + // Make blocks BasicBlock *bb0 = BasicBlock::Create(C); BasicBlock *bb1 = BasicBlock::Create(C); + bb0->insertInto(F); + bb1->insertInto(F); builder.SetInsertPoint(bb0); PHINode *phi = builder.CreatePHI(Type::getInt32Ty(C), 2); @@ -59,11 +65,6 @@ builder.CreateAdd(phi, phi); EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi)); - - bb0->dropAllReferences(); - bb1->dropAllReferences(); - delete bb0; - delete bb1; } TEST(Local, RemoveDuplicatePHINodes) {