Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -216,7 +216,7 @@ // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, // simplify terminator instructions, etc... // -FunctionPass *createCFGSimplificationPass(); +FunctionPass *createCFGSimplificationPass(int Threshold = -1); //===----------------------------------------------------------------------===// // Index: include/llvm/Transforms/Utils/Local.h =================================================================== --- include/llvm/Transforms/Utils/Local.h +++ include/llvm/Transforms/Utils/Local.h @@ -138,6 +138,7 @@ /// the basic block that was pointed to. /// bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + unsigned BonusInstThreshold, const DataLayout *TD = nullptr, AssumptionTracker *AT = nullptr); @@ -151,7 +152,8 @@ /// and if a predecessor branches to us and one of our successors, fold the /// setcc into the predecessor and use logical operations to pick the right /// destination. -bool FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL = nullptr); +bool FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL = nullptr, + unsigned BonusInstThreshold = 1); /// DemoteRegToStack - This function takes a virtual register computed by an /// Instruction and replaces it with a slot in the stack frame, allocated via Index: lib/Transforms/Scalar/SimplifyCFGPass.cpp =================================================================== --- lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -35,17 +35,24 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "simplifycfg" +static cl::opt +UserBonusInstThreshold("bonus-inst-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the number of bonus instructions (default = 1)")); + STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGSimplifyPass() : FunctionPass(ID) { + unsigned BonusInstThreshold; + CFGSimplifyPass(int T = -1) : FunctionPass(ID) { + BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T); initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -66,8 +73,8 @@ false) // Public interface to the CFGSimplification pass -FunctionPass *llvm::createCFGSimplificationPass() { - return new CFGSimplifyPass(); +FunctionPass *llvm::createCFGSimplificationPass(int Threshold) { + return new CFGSimplifyPass(Threshold); } /// mergeEmptyReturnBlocks - If we have more than one empty (other than phi @@ -150,7 +157,8 @@ /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, const DataLayout *DL, - AssumptionTracker *AT) { + AssumptionTracker *AT, + unsigned BonusInstThreshold) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -159,7 +167,7 @@ // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TTI, DL, AT)) { + if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, DL, AT)) { LocalChange = true; ++NumSimpl; } @@ -182,7 +190,7 @@ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AT); + EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AT, BonusInstThreshold); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -196,7 +204,7 @@ return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, DL, AT); + EverChanged = iterativelySimplifyCFG(F, TTI, DL, AT, BonusInstThreshold); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged); Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include #include #include @@ -93,6 +94,7 @@ class SimplifyCFGOpt { const TargetTransformInfo &TTI; + unsigned BonusInstThreshold; const DataLayout *const DL; AssumptionTracker *AT; Value *isValueEqualityComparison(TerminatorInst *TI); @@ -113,9 +115,9 @@ bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *DL, - AssumptionTracker *AT) - : TTI(TTI), DL(DL), AT(AT) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold, + const DataLayout *DL, AssumptionTracker *AT) + : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AT(AT) {} bool run(BasicBlock *BB); }; } @@ -1973,7 +1975,8 @@ /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL, + unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); Instruction *Cond = nullptr; @@ -2010,33 +2013,6 @@ Cond->getParent() != BB || !Cond->hasOneUse()) return false; - // Only allow this if the condition is a simple instruction that can be - // executed unconditionally. It must be in the same block as the branch, and - // must be at the front of the block. - BasicBlock::iterator FrontIt = BB->front(); - - // Ignore dbg intrinsics. - while (isa(FrontIt)) ++FrontIt; - - // Allow a single instruction to be hoisted in addition to the compare - // that feeds the branch. We later ensure that any values that _it_ uses - // were also live in the predecessor, so that we don't unnecessarily create - // register pressure or inhibit out-of-order execution. - Instruction *BonusInst = nullptr; - if (&*FrontIt != Cond && - FrontIt->hasOneUse() && FrontIt->user_back() == Cond && - isSafeToSpeculativelyExecute(FrontIt, DL)) { - BonusInst = &*FrontIt; - ++FrontIt; - - // Ignore dbg intrinsics. - while (isa(FrontIt)) ++FrontIt; - } - - // Only a single bonus inst is allowed. - if (&*FrontIt != Cond) - return false; - // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; @@ -2046,6 +2022,31 @@ if (&*CondIt != BI) return false; + // Only allow this transformation if computing the condition doesn't involve + // too many instructions and these involved instructions can be executed + // unconditionally. We denote all involved instructions except the condition + // as "bonus instructions", and only allow this transformation when the + // number of the bonus instructions does not exceed a certain threshold. + unsigned NumBonusInsts = 0; + for (auto I = BB->begin(); Cond != I; ++I) { + // Ignore dbg intrinsics. + if (isa(I)) + continue; + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL)) + return false; + // I has only one use and can be executed unconditionally. + Instruction *User = dyn_cast(I->user_back()); + if (User == nullptr || User->getParent() != BB) + return false; + // I is used in the same BB. Since BI uses Cond and doesn't have more slots + // to use any other instruction, User must be an instruction between next(I) + // and Cond. + ++NumBonusInsts; + // Early exits once we reach the limit. + if (NumBonusInsts > BonusInstThreshold) + return false; + } + // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. if (ConstantExpr *CE = dyn_cast(Cond->getOperand(0))) @@ -2115,30 +2116,41 @@ PBI->swapSuccessors(); } - // If we have a bonus inst, clone it into the predecessor block. - Instruction *NewBonus = nullptr; - if (BonusInst) { - NewBonus = BonusInst->clone(); + // If we have bonus instructions, clone them into the predecessor block. + // Note that there may be mutliple predecessor blocks, so we cannot move + // bonus instructions to a predecessor block. + ValueToValueMapTy VMap; // maps original values to cloned values + // We already make sure Cond is the last instruction before BI. Therefore, + // every instructions before Cond other than DbgInfoIntrinsic are bonus + // instructions. + for (auto BonusInst = BB->begin(); Cond != BonusInst; ++BonusInst) { + if (isa(BonusInst)) + continue; + Instruction *NewBonusInst = BonusInst->clone(); + RemapInstruction(NewBonusInst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); + VMap[BonusInst] = NewBonusInst; // If we moved a load, we cannot any longer claim any knowledge about // its potential value. The previous information might have been valid // only given the branch precondition. // For an analogous reason, we must also drop all the metadata whose // semantics we don't understand. - NewBonus->dropUnknownMetadata(LLVMContext::MD_dbg); + NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg); - PredBlock->getInstList().insert(PBI, NewBonus); - NewBonus->takeName(BonusInst); - BonusInst->setName(BonusInst->getName()+".old"); + PredBlock->getInstList().insert(PBI, NewBonusInst); + NewBonusInst->takeName(BonusInst); + BonusInst->setName(BonusInst->getName() + ".old"); } // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); - if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus); + RemapInstruction(New, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); - Cond->setName(New->getName()+".old"); + Cond->setName(New->getName() + ".old"); if (BI->isConditional()) { Instruction *NewCond = @@ -2616,7 +2628,7 @@ /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, - const DataLayout *DL, AssumptionTracker *AT) { + unsigned BonusInstThreshold, const DataLayout *DL, AssumptionTracker *AT) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -2649,7 +2661,7 @@ ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -2665,7 +2677,7 @@ ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -3900,12 +3912,12 @@ // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -3915,22 +3927,22 @@ ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; // Remove unreachable cases. if (EliminateDeadSwitchCases(SI, DL, AT)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; if (SwitchToLookupTable(SI, Builder, TTI, DL)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; return false; } @@ -3967,7 +3979,7 @@ if (SelectInst *SI = dyn_cast(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } return Changed; } @@ -3991,7 +4003,8 @@ for (++I; isa(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, DL, AT)) + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, + BonusInstThreshold, DL, AT)) return true; } @@ -3999,8 +4012,8 @@ // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DL)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; return false; } @@ -4015,7 +4028,7 @@ // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -4025,14 +4038,14 @@ ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } else if (&*I == cast(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } } @@ -4043,8 +4056,8 @@ // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DL)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -4053,7 +4066,7 @@ if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { if (HoistThenElseCodeToIf(BI, DL)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -4061,7 +4074,7 @@ if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -4070,7 +4083,7 @@ if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; } // If this is a branch on a phi node in the current block, thread control @@ -4078,14 +4091,14 @@ if (PHINode *PN = dyn_cast(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, DL)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB, TTI, DL, AT) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AT) | true; return false; } @@ -4229,6 +4242,7 @@ /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + unsigned BonusInstThreshold, const DataLayout *DL, AssumptionTracker *AT) { - return SimplifyCFGOpt(TTI, DL, AT).run(BB); + return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AT).run(BB); } Index: test/Transforms/SimplifyCFG/branch-fold-threshold.ll =================================================================== --- /dev/null +++ test/Transforms/SimplifyCFG/branch-fold-threshold.ll @@ -0,0 +1,28 @@ +; RUN: opt %s -simplifycfg -S | FileCheck %s --check-prefix=NORMAL +; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE + +define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) { +; NORMAL-LABEL: @foo( +; AGGRESSIVE-LABEL: @foo( +entry: + %cmp = icmp sgt i32 %d, 3 + br i1 %cmp, label %cond.end, label %lor.lhs.false +; NORMAL: br i1 +; AGGRESSIVE: br i1 + +lor.lhs.false: + %mul = shl i32 %c, 1 + %add = add nsw i32 %mul, %a + %cmp1 = icmp slt i32 %add, %b + br i1 %cmp1, label %cond.false, label %cond.end +; NORMAL: br i1 +; AGGRESSIVE-NOT: br i1 + +cond.false: + %0 = load i32* %input, align 4 + br label %cond.end + +cond.end: + %cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ], [ 0, %entry ] + ret i32 %cond +}