diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -176,14 +176,16 @@ return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB); } - Bonus getUserBonus(Instruction *User, Value *Use = nullptr, - Constant *C = nullptr); + Bonus getSpecializationBonus(Argument *A, Constant *C); Bonus getBonusFromPendingPHIs(); private: friend class InstVisitor; + Bonus getUserBonus(Instruction *User, Value *Use = nullptr, + Constant *C = nullptr); + Cost estimateBasicBlocks(SmallVectorImpl &WorkList); Cost estimateSwitchInst(SwitchInst &I); Cost estimateBranchInst(BranchInst &I); @@ -241,10 +243,6 @@ return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver); } - /// Compute a bonus for replacing argument \p A with constant \p C. - Bonus getSpecializationBonus(Argument *A, Constant *C, - InstCostVisitor &Visitor); - private: Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); @@ -273,6 +271,9 @@ bool findSpecializations(Function *F, unsigned SpecCost, SmallVectorImpl &AllSpecs, SpecMap &SM); + /// Compute the inlining bonus for replacing argument \p A with constant \p C. + unsigned getInliningBonus(Argument *A, Constant *C); + bool isCandidateFunction(Function *F); /// @brief Create a specialization of \p F and prime the SCCPSolver diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -93,6 +93,15 @@ "Don't specialize functions that have less than this number of " "instructions")); +static cl::opt CodeSizeRatio( + "funcspec-codesize-ratio", cl::init(5), cl::Hidden, cl::desc( + "Reject specializations whose codesize savings are a smaller fraction" + "of the original function size than this value")); + +static cl::opt BonusRatio( + "funcspec-bonus-ratio", cl::init(3), cl::Hidden, cl::desc( + "Ratio between codesize and latency savings")); + static cl::opt SpecializeOnAddress( "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( "Enable function specialization on the address of global values")); @@ -180,6 +189,22 @@ return B; } +/// Compute a bonus for replacing argument \p A with constant \p C. +Bonus InstCostVisitor::getSpecializationBonus(Argument *A, Constant *C) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " + << C->getNameOrAsOperand() << "\n"); + Bonus B; + for (auto *U : A->users()) + if (auto *UI = dyn_cast(U)) + if (isBlockExecutable(UI->getParent())) + B += getUserBonus(UI, A, C); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = " + << B.CodeSize << ", Latency = " << B.Latency + << "} for argument " << *A << "\n"); + return B; +} + Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { // We have already propagated a constant for this user. if (KnownConstants.contains(User)) @@ -798,22 +823,32 @@ AllSpecs[Index].CallSites.push_back(&CS); } else { // Calculate the specialisation gain. - Bonus B; + Bonus VisitorBonus; + unsigned InliningBonus = 0; InstCostVisitor Visitor = getInstCostVisitorFor(F); - for (ArgInfo &A : S.Args) - B += getSpecializationBonus(A.Formal, A.Actual, Visitor); - B += Visitor.getBonusFromPendingPHIs(); + for (ArgInfo &A : S.Args) { + VisitorBonus += Visitor.getSpecializationBonus(A.Formal, A.Actual); + InliningBonus += getInliningBonus(A.Formal, A.Actual); + } + VisitorBonus += Visitor.getBonusFromPendingPHIs(); + - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = " - << B.CodeSize << ", Latency = " << B.Latency + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization bonus {CodeSize = " + << VisitorBonus.CodeSize << ", Latency = " + << VisitorBonus.Latency << ", Inlining = " + << InliningBonus << "}\n"); // Discard unprofitable specialisations. - if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize) + if (!ForceSpecialization && + (VisitorBonus.CodeSize <= SpecCost / CodeSizeRatio || + VisitorBonus.Latency / BonusRatio < VisitorBonus.CodeSize) && + InliningBonus / BonusRatio < SpecCost) continue; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, B.Latency); + unsigned Score = InliningBonus + VisitorBonus.Latency; + auto &Spec = AllSpecs.emplace_back(F, S, Score); if (CS.getFunction() != F) Spec.CallSites.push_back(&CS); const unsigned Index = AllSpecs.size() - 1; @@ -879,31 +914,14 @@ return Clone; } -/// Compute a bonus for replacing argument \p A with constant \p C. -Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, - InstCostVisitor &Visitor) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " - << C->getNameOrAsOperand() << "\n"); - - Bonus B; - for (auto *U : A->users()) - if (auto *UI = dyn_cast(U)) - if (Visitor.isBlockExecutable(UI->getParent())) - B += Visitor.getUserBonus(UI, A, C); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = " - << B.CodeSize << ", Latency = " << B.Latency - << "} for argument " << *A << "\n"); - +/// Compute the inlining bonus for replacing argument \p A with constant \p C. +unsigned FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) { // The below heuristic is only concerned with exposing inlining // opportunities via indirect call promotion. If the argument is not a // (potentially casted) function pointer, give up. - // - // TODO: Perhaps we should consider checking such inlining opportunities - // while traversing the users of the specialization arguments ? Function *CalledFunction = dyn_cast(C->stripPointerCasts()); if (!CalledFunction) - return B; + return 0; // Get TTI for the called function (used for the inline cost). auto &CalleeTTI = (GetTTI)(*CalledFunction); @@ -948,7 +966,7 @@ << " for user " << *U << "\n"); } - return B += {0, InliningBonus}; + return InliningBonus > 0 ? static_cast(InliningBonus) : 0; } /// Determine if it is possible to specialise the function for constant values diff --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp --- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp +++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp @@ -168,13 +168,13 @@ // mul Bonus Ref = getInstCost(Mul); - Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); + Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // and + or + add Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add); - Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(1), One); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); @@ -183,7 +183,7 @@ getInstCost(Sdiv, /*SizeOnly =*/ true) + getInstCost(BrBB2, /*SizeOnly =*/ true) + getInstCost(BrLoop, /*SizeOnly =*/ true); - Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(2), One); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); } @@ -235,13 +235,13 @@ // mul Bonus Ref = getInstCost(Mul); - Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); + Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // add Ref = getInstCost(Add); - Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(1), One); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); @@ -252,7 +252,7 @@ getInstCost(Sdiv, /*SizeOnly =*/ true) + getInstCost(BrBB2, /*SizeOnly =*/ true) + getInstCost(BrLoop, /*SizeOnly =*/ true); - Test = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(2), False); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); } @@ -301,24 +301,24 @@ // icmp + zext Bonus Ref = getInstCost(Icmp) + getInstCost(Zext); - Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); + Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // select Ref = getInstCost(Select); - Test = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(1), True); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // gep + load + freeze + smax Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) + getInstCost(Smax); - Test = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(2), GV); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); - Test = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(3), Undef); EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0); } @@ -369,17 +369,17 @@ Instruction &Icmp = *++BB.begin(); Instruction &Branch = BB.back(); - Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); + Bonus Test = Visitor.getSpecializationBonus(F->getArg(0), One); EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0); - Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(1), One); EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0); // switch + phi + br Bonus Ref = getInstCost(Switch) + getInstCost(PhiCase2, /*SizeOnly =*/ true) + getInstCost(BrBB, /*SizeOnly =*/ true); - Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor); + Test = Visitor.getSpecializationBonus(F->getArg(2), One); EXPECT_EQ(Test, Ref); EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0);