diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -108,17 +108,49 @@ SpecSig Sig; // Profitability of the specialization. - Cost Score; + unsigned Score; // List of call sites, matching this specialization. SmallVector CallSites; - Spec(Function *F, const SpecSig &S, Cost Score) + Spec(Function *F, const SpecSig &S, unsigned Score) : F(F), Sig(S), Score(Score) {} - Spec(Function *F, const SpecSig &&S, Cost Score) + Spec(Function *F, const SpecSig &&S, unsigned Score) : F(F), Sig(S), Score(Score) {} }; +struct Bonus { + unsigned CodeSize = 0; + unsigned Latency = 0; + + Bonus() = default; + + Bonus(Cost CodeSize, Cost Latency) { + int64_t Sz = *CodeSize.getValue(); + int64_t Ltc = *Latency.getValue(); + + assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative"); + // It is safe to down cast since we know the arguments + // cannot be negative and Cost is of type int64_t. + this->CodeSize = static_cast(Sz); + this->Latency = static_cast(Ltc); + } + + Bonus &operator+=(const Bonus RHS) { + CodeSize += RHS.CodeSize; + Latency += RHS.Latency; + return *this; + } + + Bonus operator+(const Bonus RHS) const { + return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency); + } + + bool operator==(const Bonus RHS) const { + return CodeSize == RHS.CodeSize && Latency == RHS.Latency; + } +}; + class InstCostVisitor : public InstVisitor { const DataLayout &DL; BlockFrequencyInfo &BFI; @@ -143,10 +175,10 @@ TargetTransformInfo &TTI, SCCPSolver &Solver) : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {} - Cost getUserBonus(Instruction *User, Value *Use = nullptr, - Constant *C = nullptr); + Bonus getUserBonus(Instruction *User, Value *Use = nullptr, + Constant *C = nullptr); - Cost getBonusFromPendingPHIs(); + Bonus getBonusFromPendingPHIs(); private: friend class InstVisitor; @@ -208,8 +240,8 @@ } /// Compute a bonus for replacing argument \p A with constant \p C. - Cost getSpecializationBonus(Argument *A, Constant *C, - InstCostVisitor &Visitor); + Bonus getSpecializationBonus(Argument *A, Constant *C, + InstCostVisitor &Visitor); private: Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); @@ -236,7 +268,7 @@ /// @param AllSpecs A vector to add potential specializations to. /// @param SM A map for a function's specialisation range /// @return True, if any potential specializations were found - bool findSpecializations(Function *F, Cost SpecCost, + bool findSpecializations(Function *F, unsigned SpecCost, SmallVectorImpl &AllSpecs, SpecMap &SM); bool isCandidateFunction(Function *F); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -101,29 +101,21 @@ "Enable specialization of functions that take a literal constant as an " "argument")); -// Estimates the instruction cost of all the basic blocks in \p WorkList. -// The successors of such blocks are added to the list as long as they are -// executable and they have a unique predecessor. \p WorkList represents -// the basic blocks of a specialization which become dead once we replace -// instructions that are known to be constants. The aim here is to estimate -// the combination of size and latency savings in comparison to the non -// specialized version of the function. +// Estimates the codesize savings due to dead code after constant propagation. +// \p WorkList represents the basic blocks of a specialization which will +// eventually become dead once we replace instructions that are known to be +// constants. The successors of such blocks are added to the list as long as +// the \p Solver found they were executable prior to specialization, and only +// if they have a unique predecessor. static Cost estimateBasicBlocks(SmallVectorImpl &WorkList, DenseSet &DeadBlocks, ConstMap &KnownConstants, SCCPSolver &Solver, - BlockFrequencyInfo &BFI, TargetTransformInfo &TTI) { - Cost Bonus = 0; - + Cost CodeSize = 0; // Accumulate the instruction cost of each basic block weighted by frequency. while (!WorkList.empty()) { BasicBlock *BB = WorkList.pop_back_val(); - uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() / - BFI.getEntryFreq(); - if (!Weight) - continue; - // These blocks are considered dead as far as the InstCostVisitor is // concerned. They haven't been proven dead yet by the Solver, but // may become if we propagate the constant specialization arguments. @@ -139,11 +131,11 @@ if (KnownConstants.contains(&I)) continue; - Bonus += Weight * - TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); - LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus - << " after user " << I << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: CodeSize " << C + << " for user " << I << "\n"); + CodeSize += C; } // Keep adding dead successors to the list as long as they are @@ -153,7 +145,7 @@ SuccBB->getUniquePredecessor() == BB) WorkList.push_back(SuccBB); } - return Bonus; + return CodeSize; } static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) { @@ -164,49 +156,51 @@ return nullptr; } -Cost InstCostVisitor::getBonusFromPendingPHIs() { - Cost Bonus = 0; +Bonus InstCostVisitor::getBonusFromPendingPHIs() { + Bonus B; while (!PendingPHIs.empty()) { Instruction *Phi = PendingPHIs.pop_back_val(); - Bonus += getUserBonus(Phi); + B += getUserBonus(Phi); } - return Bonus; + return B; } -Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { +Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { // Cache the iterator before visiting. LastVisited = Use ? KnownConstants.insert({Use, C}).first : KnownConstants.end(); - if (auto *I = dyn_cast(User)) - return estimateSwitchInst(*I); - - if (auto *I = dyn_cast(User)) - return estimateBranchInst(*I); - - C = visit(*User); - if (!C) - return 0; + Cost CodeSize = 0; + if (auto *I = dyn_cast(User)) { + CodeSize = estimateSwitchInst(*I); + } else if (auto *I = dyn_cast(User)) { + CodeSize = estimateBranchInst(*I); + } else { + C = visit(*User); + if (!C) + return {0, 0}; + KnownConstants.insert({User, C}); + } - KnownConstants.insert({User, C}); + CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize); uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() / BFI.getEntryFreq(); - if (!Weight) - return 0; - Cost Bonus = Weight * - TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency); + Cost Latency = Weight * + TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency); - LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus - << " for user " << *User << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize + << ", Latency = " << Latency << "} for user " + << *User << "\n"); + Bonus B(CodeSize, Latency); for (auto *U : User->users()) if (auto *UI = dyn_cast(U)) if (UI != User && Solver.isBlockExecutable(UI->getParent())) - Bonus += getUserBonus(UI, User, C); + B += getUserBonus(UI, User, C); - return Bonus; + return B; } Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { @@ -232,8 +226,7 @@ WorkList.push_back(BB); } - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); + return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI); } Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { @@ -250,8 +243,7 @@ Succ->getUniquePredecessor() == I.getParent()) WorkList.push_back(Succ); - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); + return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI); } Constant *InstCostVisitor::visitPHINode(PHINode &I) { @@ -572,13 +564,18 @@ if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant) continue; + int64_t Sz = *Metrics.NumInsts.getValue(); + assert(Sz > 0 && "CodeSize should be positive"); + // It is safe to down cast from int64_t, NumInsts is always positive. + unsigned SpecCost = static_cast(Sz); + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " - << F.getName() << " is " << Metrics.NumInsts << "\n"); + << F.getName() << " is " << SpecCost << "\n"); if (Inserted && Metrics.isRecursive) promoteConstantStackValues(&F); - if (!findSpecializations(&F, Metrics.NumInsts, AllSpecs, SM)) { + if (!findSpecializations(&F, SpecCost, AllSpecs, SM)) { LLVM_DEBUG( dbgs() << "FnSpecialization: No possible specializations found for " << F.getName() << "\n"); @@ -713,7 +710,7 @@ return Clone; } -bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost, +bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost, SmallVectorImpl &AllSpecs, SpecMap &SM) { // A mapping from a specialisation signature to the index of the respective @@ -779,21 +776,22 @@ AllSpecs[Index].CallSites.push_back(&CS); } else { // Calculate the specialisation gain. - Cost Score = 0; + Bonus B; InstCostVisitor Visitor = getInstCostVisitorFor(F); for (ArgInfo &A : S.Args) - Score += getSpecializationBonus(A.Formal, A.Actual, Visitor); - Score += Visitor.getBonusFromPendingPHIs(); + B += getSpecializationBonus(A.Formal, A.Actual, Visitor); + B += Visitor.getBonusFromPendingPHIs(); - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = " - << Score << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = " + << B.CodeSize << ", Latency = " << B.Latency + << "}\n"); // Discard unprofitable specialisations. - if (!ForceSpecialization && Score <= SpecCost) + if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize) continue; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, Score); + auto &Spec = AllSpecs.emplace_back(F, S, B.Latency); if (CS.getFunction() != F) Spec.CallSites.push_back(&CS); const unsigned Index = AllSpecs.size() - 1; @@ -860,19 +858,20 @@ } /// Compute a bonus for replacing argument \p A with constant \p C. -Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, +Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, InstCostVisitor &Visitor) { LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " << C->getNameOrAsOperand() << "\n"); - Cost TotalCost = 0; + Bonus B; for (auto *U : A->users()) if (auto *UI = dyn_cast(U)) if (Solver.isBlockExecutable(UI->getParent())) - TotalCost += Visitor.getUserBonus(UI, A, C); + B += Visitor.getUserBonus(UI, A, C); - LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus " - << TotalCost << " for argument " << *A << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = " + << B.CodeSize << ", Latency = " << B.Latency + << "} for argument " << *A << "\n"); // The below heuristic is only concerned with exposing inlining // opportunities via indirect call promotion. If the argument is not a @@ -882,7 +881,7 @@ // while traversing the users of the specialization arguments ? Function *CalledFunction = dyn_cast(C->stripPointerCasts()); if (!CalledFunction) - return TotalCost; + return B; // Get TTI for the called function (used for the inline cost). auto &CalleeTTI = (GetTTI)(*CalledFunction); @@ -892,7 +891,7 @@ // calls to be promoted to direct calls. If the indirect call promotion // would likely enable the called function to be inlined, specializing is a // good idea. - int Bonus = 0; + int InliningBonus = 0; for (User *U : A->users()) { if (!isa(U) && !isa(U)) continue; @@ -919,15 +918,15 @@ // We clamp the bonus for this call to be between zero and the default // threshold. if (IC.isAlways()) - Bonus += Params.DefaultThreshold; + InliningBonus += Params.DefaultThreshold; else if (IC.isVariable() && IC.getCostDelta() > 0) - Bonus += IC.getCostDelta(); + InliningBonus += IC.getCostDelta(); - LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus + LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << InliningBonus << " for user " << *U << "\n"); } - return TotalCost + Bonus; + return B += {0, InliningBonus}; } /// Determine if it is possible to specialise the function for constant values diff --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp --- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp +++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp @@ -81,12 +81,18 @@ GetAC); } - Cost getInstCost(Instruction &I) { + Bonus getInstCost(Instruction &I, bool SizeOnly = false) { auto &TTI = FAM.getResult(*I.getFunction()); auto &BFI = FAM.getResult(*I.getFunction()); - return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() * - TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + Cost CodeSize = + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); + + Cost Latency = SizeOnly ? 0 : + BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() * + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_Latency); + + return {CodeSize, Latency}; } }; @@ -130,12 +136,13 @@ Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1); auto FuncIter = F->begin(); - ++FuncIter; + BasicBlock &Loop = *++FuncIter; BasicBlock &Case1 = *++FuncIter; BasicBlock &Case2 = *++FuncIter; BasicBlock &BB1 = *++FuncIter; BasicBlock &BB2 = *++FuncIter; + Instruction &Switch = Loop.front(); Instruction &Mul = Case1.front(); Instruction &And = Case2.front(); Instruction &Sdiv = *++Case2.begin(); @@ -145,22 +152,25 @@ Instruction &BrLoop = BB2.back(); // mul - Cost Ref = getInstCost(Mul); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Bonus Ref = getInstCost(Mul); + Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // and + or + add Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add); - Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // sdiv + br + br - Ref = getInstCost(Sdiv) + getInstCost(BrBB2) + getInstCost(BrLoop); - Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); + + // switch + sdiv + br + br + Ref = getInstCost(Switch) + + getInstCost(Sdiv, /*SizeOnly =*/ true) + + getInstCost(BrBB2, /*SizeOnly =*/ true) + + getInstCost(BrLoop, /*SizeOnly =*/ true); + Test = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); } TEST_F(FunctionSpecializationTest, BranchInst) { @@ -192,10 +202,11 @@ Constant *False = ConstantInt::getFalse(M.getContext()); auto FuncIter = F->begin(); - ++FuncIter; + BasicBlock &Loop = *++FuncIter; BasicBlock &BB0 = *++FuncIter; BasicBlock &BB1 = *++FuncIter; + Instruction &Branch = Loop.front(); Instruction &Mul = BB0.front(); Instruction &Sub = *++BB0.begin(); Instruction &BrBB1 = BB0.back(); @@ -204,23 +215,26 @@ Instruction &BrLoop = BB1.back(); // mul - Cost Ref = getInstCost(Mul); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Bonus Ref = getInstCost(Mul); + Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // add Ref = getInstCost(Add); - Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // sub + br + sdiv + br - Ref = getInstCost(Sub) + getInstCost(BrBB1) + getInstCost(Sdiv) + - getInstCost(BrLoop); - Bonus = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Test = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); + + // branch + sub + br + sdiv + br + Ref = getInstCost(Branch) + + getInstCost(Sub, /*SizeOnly =*/ true) + + getInstCost(BrBB1, /*SizeOnly =*/ true) + + getInstCost(Sdiv, /*SizeOnly =*/ true) + + getInstCost(BrLoop, /*SizeOnly =*/ true); + Test = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); } TEST_F(FunctionSpecializationTest, Misc) { @@ -266,26 +280,26 @@ Instruction &Smax = *BlockIter++; // icmp + zext - Cost Ref = getInstCost(Icmp) + getInstCost(Zext); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Bonus Ref = getInstCost(Icmp) + getInstCost(Zext); + Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // select Ref = getInstCost(Select); - Bonus = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Test = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); // gep + load + freeze + smax Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) + getInstCost(Smax); - Bonus = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Test = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); - Bonus = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor); - EXPECT_TRUE(Bonus == 0); + Test = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor); + EXPECT_TRUE(Test.CodeSize == 0 && Test.Latency == 0); } TEST_F(FunctionSpecializationTest, PhiNode) { @@ -327,16 +341,17 @@ Instruction &Phi = BB.front(); Instruction &Icmp = *++BB.begin(); + Instruction &Branch = BB.back(); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor) + + Bonus Test = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor) + Specializer.getSpecializationBonus(F->getArg(1), One, Visitor) + Specializer.getSpecializationBonus(F->getArg(2), One, Visitor); - EXPECT_TRUE(Bonus > 0); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); - // phi + icmp - Cost Ref = getInstCost(Phi) + getInstCost(Icmp); - Bonus = Visitor.getBonusFromPendingPHIs(); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + // phi + icmp + branch + Bonus Ref = getInstCost(Phi) + getInstCost(Icmp) + getInstCost(Branch); + Test = Visitor.getBonusFromPendingPHIs(); + EXPECT_EQ(Test, Ref); + EXPECT_TRUE(Test.CodeSize > 0 && Test.Latency > 0); }