diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -184,6 +184,7 @@ private: friend class InstVisitor; + Cost estimateBasicBlocks(SmallVectorImpl &WorkList); Cost estimateSwitchInst(SwitchInst &I); Cost estimateBranchInst(BranchInst &I); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -83,6 +83,11 @@ "The maximum number of incoming values a PHI node can have to be " "considered during the specialization bonus estimation")); +static cl::opt MaxBlockPredecessors( + "funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc( + "The maximum number of predecessors a basic block can have to be " + "considered during the estimation of dead code")); + static cl::opt MinFunctionSize( "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc( "Don't specialize functions that have less than this number of " @@ -101,16 +106,24 @@ "Enable specialization of functions that take a literal constant as an " "argument")); +static bool canEliminateSuccessor(BasicBlock *DeadBB, BasicBlock *SuccBB, + DenseSet &DeadBlocks) { + unsigned I = 0; + return all_of(predecessors(SuccBB), + [&I, DeadBB, SuccBB, &DeadBlocks] (BasicBlock *PredBB) { + return I++ < MaxBlockPredecessors && + (PredBB == DeadBB || PredBB == SuccBB || DeadBlocks.contains(PredBB)); + }); +} + // Estimates the codesize savings due to dead code after constant propagation. // \p WorkList represents the basic blocks of a specialization which will // eventually become dead once we replace instructions that are known to be // constants. The successors of such blocks are added to the list as long as // the \p Solver found they were executable prior to specialization, and only // if they have a unique predecessor. -static Cost estimateBasicBlocks(SmallVectorImpl &WorkList, - DenseSet &DeadBlocks, - ConstMap &KnownConstants, SCCPSolver &Solver, - TargetTransformInfo &TTI) { +Cost InstCostVisitor::estimateBasicBlocks( + SmallVectorImpl &WorkList) { Cost CodeSize = 0; // Accumulate the instruction cost of each basic block weighted by frequency. while (!WorkList.empty()) { @@ -141,8 +154,8 @@ // Keep adding dead successors to the list as long as they are // executable and they have a unique predecessor. for (BasicBlock *SuccBB : successors(BB)) - if (Solver.isBlockExecutable(SuccBB) && - SuccBB->getUniquePredecessor() == BB) + if (isBlockExecutable(SuccBB) && + canEliminateSuccessor(BB, SuccBB, DeadBlocks)) WorkList.push_back(SuccBB); } return CodeSize; @@ -185,9 +198,13 @@ C = visit(*User); if (!C) return {0, 0}; - KnownConstants.insert({User, C}); } + // Even though it doesn't make sense to bind switch and branch instructions + // with a constant, unlike any other instruction type, it prevents estimating + // their bonus multiple times. + KnownConstants.insert({User, C}); + CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize); uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() / @@ -226,13 +243,12 @@ SmallVector WorkList; for (const auto &Case : I.cases()) { BasicBlock *BB = Case.getCaseSuccessor(); - if (BB == Succ || !Solver.isBlockExecutable(BB) || - BB->getUniquePredecessor() != I.getParent()) - continue; - WorkList.push_back(BB); + if (BB != Succ && isBlockExecutable(BB) && + canEliminateSuccessor(I.getParent(), BB, DeadBlocks)) + WorkList.push_back(BB); } - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI); + return estimateBasicBlocks(WorkList); } Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { @@ -245,11 +261,11 @@ // Initialize the worklist with the dead successor as long as // it is executable and has a unique predecessor. SmallVector WorkList; - if (Solver.isBlockExecutable(Succ) && - Succ->getUniquePredecessor() == I.getParent()) + if (isBlockExecutable(Succ) && + canEliminateSuccessor(I.getParent(), Succ, DeadBlocks)) WorkList.push_back(Succ); - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI); + return estimateBasicBlocks(WorkList); } Constant *InstCostVisitor::visitPHINode(PHINode &I) { diff --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp --- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp +++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp @@ -194,16 +194,18 @@ entry: br label %loop loop: - br i1 %cond, label %bb0, label %bb2 + br i1 %cond, label %bb0, label %bb3 bb0: %0 = mul i32 %a, 2 %1 = sub i32 6, 5 - br label %bb1 + br i1 %cond, label %bb1, label %bb2 bb1: %2 = add i32 %0, %b %3 = sdiv i32 8, 2 - br label %loop + br label %bb2 bb2: + br label %loop + bb3: ret void } )"; @@ -220,14 +222,16 @@ BasicBlock &Loop = *++FuncIter; BasicBlock &BB0 = *++FuncIter; BasicBlock &BB1 = *++FuncIter; + BasicBlock &BB2 = *++FuncIter; Instruction &Branch = Loop.front(); Instruction &Mul = BB0.front(); Instruction &Sub = *++BB0.begin(); - Instruction &BrBB1 = BB0.back(); + Instruction &BrBB1BB2 = BB0.back(); Instruction &Add = BB1.front(); Instruction &Sdiv = *++BB1.begin(); - Instruction &BrLoop = BB1.back(); + Instruction &BrBB2 = BB1.back(); + Instruction &BrLoop = BB2.front(); // mul Bonus Ref = getInstCost(Mul); @@ -244,8 +248,9 @@ // branch + sub + br + sdiv + br Ref = getInstCost(Branch) + getInstCost(Sub, /*SizeOnly =*/ true) + - getInstCost(BrBB1, /*SizeOnly =*/ true) + + getInstCost(BrBB1BB2) + getInstCost(Sdiv, /*SizeOnly =*/ true) + + getInstCost(BrBB2, /*SizeOnly =*/ true) + getInstCost(BrLoop, /*SizeOnly =*/ true); Test = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor); EXPECT_EQ(Test, Ref);