diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -125,6 +125,9 @@ TargetTransformInfo &TTI; SCCPSolver &Solver; + Cost CodeSize = 0; + Cost Latency = 0; + ConstMap KnownConstants; // Basic blocks known to be unreachable after constant propagation. DenseSet DeadBlocks; @@ -143,16 +146,23 @@ TargetTransformInfo &TTI, SCCPSolver &Solver) : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {} - Cost getUserBonus(Instruction *User, Value *Use = nullptr, - Constant *C = nullptr); + void estimateSpecializationBonus(Argument *A, Constant *C); - Cost getBonusFromPendingPHIs(); + std::pair getSpecializationBonus() { + estimateBonusFromPendingPHIs(); + return {CodeSize, Latency}; + } private: friend class InstVisitor; - Cost estimateSwitchInst(SwitchInst &I); - Cost estimateBranchInst(BranchInst &I); + void estimateUserBonus(Instruction *User, Value *Use = nullptr, + Constant *C = nullptr); + + void estimateBonusFromPendingPHIs(); + + void estimateSwitchInst(SwitchInst &I); + void estimateBranchInst(BranchInst &I); Constant *visitInstruction(Instruction &I) { return nullptr; } Constant *visitPHINode(PHINode &I); @@ -207,11 +217,10 @@ return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver); } - /// Compute a bonus for replacing argument \p A with constant \p C. - Cost getSpecializationBonus(Argument *A, Constant *C, - InstCostVisitor &Visitor); - private: + /// Compute the inlining bonus for replacing argument \p A with constant \p C. + Cost getInliningBonus(Argument *A, Constant *C); + Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); /// A constant stack value is an AllocaInst that has a single constant diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -101,24 +101,16 @@ // executable and they have a unique predecessor. \p WorkList represents // the basic blocks of a specialization which become dead once we replace // instructions that are known to be constants. The aim here is to estimate -// the combination of size and latency savings in comparison to the non -// specialized version of the function. +// the codesize savings in comparison to the non-specialized function. static Cost estimateBasicBlocks(SmallVectorImpl &WorkList, DenseSet &DeadBlocks, ConstMap &KnownConstants, SCCPSolver &Solver, - BlockFrequencyInfo &BFI, TargetTransformInfo &TTI) { - Cost Bonus = 0; - - // Accumulate the instruction cost of each basic block weighted by frequency. + Cost CodeSize = 0; + // Accumulate the instruction cost of each basic block. while (!WorkList.empty()) { BasicBlock *BB = WorkList.pop_back_val(); - uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() / - BFI.getEntryFreq(); - if (!Weight) - continue; - if (!DeadBlocks.insert(BB).second) continue; @@ -131,11 +123,11 @@ if (KnownConstants.contains(&I)) continue; - Bonus += Weight * - TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); - LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus - << " after user " << I << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: CodeSize " << C + << " for user " << I << "\n"); + CodeSize += C; } // Keep adding dead successors to the list as long as they are @@ -145,7 +137,7 @@ SuccBB->getUniquePredecessor() == BB) WorkList.push_back(SuccBB); } - return Bonus; + return CodeSize; } static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) { @@ -154,58 +146,69 @@ return nullptr; } -Cost InstCostVisitor::getBonusFromPendingPHIs() { - Cost Bonus = 0; +void InstCostVisitor::estimateBonusFromPendingPHIs() { while (!PendingPHIs.empty()) { Instruction *Phi = PendingPHIs.pop_back_val(); - Bonus += getUserBonus(Phi); + estimateUserBonus(Phi); } - return Bonus; } -Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { +void InstCostVisitor::estimateSpecializationBonus(Argument *A, Constant *C) { + for (auto *U : A->users()) + if (auto *UI = dyn_cast(U)) + if (Solver.isBlockExecutable(UI->getParent())) + estimateUserBonus(UI, A, C); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus " + << "{CodeSize = " << CodeSize << ", Latency = " << Latency + << "} after argument " << *A << "\n"); +} + +void InstCostVisitor::estimateUserBonus(Instruction *User, Value *Use, + Constant *C) { // Cache the iterator before visiting. if (Use) LastVisited = KnownConstants.insert({Use, C}).first; if (auto *I = dyn_cast(User)) - return estimateSwitchInst(*I); - - if (auto *I = dyn_cast(User)) - return estimateBranchInst(*I); - - C = visit(*User); - if (!C) - return 0; + estimateSwitchInst(*I); + else if (auto *I = dyn_cast(User)) + estimateBranchInst(*I); + else { + C = visit(*User); + if (!C) + return; + KnownConstants.insert({User, C}); + } - KnownConstants.insert({User, C}); + Cost Sz = TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize); uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() / BFI.getEntryFreq(); - if (!Weight) - return 0; - Cost Bonus = Weight * - TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency); + Cost Ltc = Weight * + TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency); - LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus - << " for user " << *User << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << Sz + << ", Latency = " << Ltc << "} for user " + << *User << "\n"); + + CodeSize += Sz; + Latency += Ltc; for (auto *U : User->users()) if (auto *UI = dyn_cast(U)) if (UI != User && Solver.isBlockExecutable(UI->getParent())) - Bonus += getUserBonus(UI, User, C); - - return Bonus; + estimateUserBonus(UI, User, C); } -Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) { +void InstCostVisitor::estimateSwitchInst(SwitchInst &I) { if (I.getCondition() != LastVisited->first) - return 0; + return; auto *C = dyn_cast(LastVisited->second); if (!C) - return 0; + return; BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor(); // Initialize the worklist with the dead basic blocks. These are the @@ -220,13 +223,13 @@ WorkList.push_back(BB); } - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); + CodeSize += + estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI); } -Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { +void InstCostVisitor::estimateBranchInst(BranchInst &I) { if (I.getCondition() != LastVisited->first) - return 0; + return; BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue()); // Initialize the worklist with the dead successor as long as @@ -236,8 +239,8 @@ Succ->getUniquePredecessor() == I.getParent()) WorkList.push_back(Succ); - return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI, - TTI); + CodeSize += + estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI); } Constant *InstCostVisitor::visitPHINode(PHINode &I) { @@ -766,18 +769,24 @@ const unsigned Index = It->second; AllSpecs[Index].CallSites.push_back(&CS); } else { - // Calculate the specialisation gain. Cost Score = 0; + // Calculate the specialisation gain. InstCostVisitor Visitor = getInstCostVisitorFor(F); - for (ArgInfo &A : S.Args) - Score += getSpecializationBonus(A.Formal, A.Actual, Visitor); - Score += Visitor.getBonusFromPendingPHIs(); + for (ArgInfo &A : S.Args) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " + << A.Actual->getNameOrAsOperand() << "\n"); + + Score += getInliningBonus(A.Formal, A.Actual); + Visitor.estimateSpecializationBonus(A.Formal, A.Actual); + } + auto [CodeSize, Latency] = Visitor.getSpecializationBonus(); + Score += Latency; LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = " << Score << "\n"); // Discard unprofitable specialisations. - if (!ForceSpecialization && Score <= SpecCost) + if (!ForceSpecialization && Score <= SpecCost - CodeSize) continue; // Create a new specialisation entry. @@ -848,26 +857,13 @@ } /// Compute a bonus for replacing argument \p A with constant \p C. -Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, - InstCostVisitor &Visitor) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " - << C->getNameOrAsOperand() << "\n"); - - Cost TotalCost = 0; - for (auto *U : A->users()) - if (auto *UI = dyn_cast(U)) - if (Solver.isBlockExecutable(UI->getParent())) - TotalCost += Visitor.getUserBonus(UI, A, C); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus " - << TotalCost << " for argument " << *A << "\n"); - +Cost FunctionSpecializer::getInliningBonus(Argument *A, Constant *C) { // The below heuristic is only concerned with exposing inlining // opportunities via indirect call promotion. If the argument is not a // (potentially casted) function pointer, give up. Function *CalledFunction = dyn_cast(C->stripPointerCasts()); if (!CalledFunction) - return TotalCost; + return 0; // Get TTI for the called function (used for the inline cost). auto &CalleeTTI = (GetTTI)(*CalledFunction); @@ -912,7 +908,7 @@ << " for user " << *U << "\n"); } - return TotalCost + Bonus; + return Bonus; } /// Determine if it is possible to specialise the function for constant values diff --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp --- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp +++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp @@ -30,6 +30,9 @@ std::unique_ptr M; std::unique_ptr Solver; + Cost CodeSize = 0; + Cost Latency = 0; + FunctionSpecializationTest() { FAM.registerPass([&] { return TargetLibraryAnalysis(); }); FAM.registerPass([&] { return TargetIRAnalysis(); }); @@ -81,12 +84,15 @@ GetAC); } - Cost getInstCost(Instruction &I) { + void addInstCost(Instruction &I, bool SizeOnly = false) { auto &TTI = FAM.getResult(*I.getFunction()); auto &BFI = FAM.getResult(*I.getFunction()); - return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() * - TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + CodeSize += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); + + Latency += SizeOnly ? 0 : + BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() * + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_Latency); } }; @@ -130,12 +136,13 @@ Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1); auto FuncIter = F->begin(); - ++FuncIter; + BasicBlock &Loop = *++FuncIter; BasicBlock &Case1 = *++FuncIter; BasicBlock &Case2 = *++FuncIter; BasicBlock &BB1 = *++FuncIter; BasicBlock &BB2 = *++FuncIter; + Instruction &Switch = Loop.front(); Instruction &Mul = Case1.front(); Instruction &And = Case2.front(); Instruction &Sdiv = *++Case2.begin(); @@ -144,23 +151,24 @@ Instruction &Or = BB2.front(); Instruction &BrLoop = BB2.back(); - // mul - Cost Ref = getInstCost(Mul); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // and + or + add - Ref = getInstCost(And) + getInstCost(Or) + getInstCost(Add); - Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // sdiv + br + br - Ref = getInstCost(Sdiv) + getInstCost(BrBB2) + getInstCost(BrLoop); - Bonus = Specializer.getSpecializationBonus(F->getArg(2), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Visitor.estimateSpecializationBonus(F->getArg(0), One); + Visitor.estimateSpecializationBonus(F->getArg(1), One); + Visitor.estimateSpecializationBonus(F->getArg(2), One); + auto [Sz, Ltc] = Visitor.getSpecializationBonus(); + + addInstCost(Mul); + addInstCost(And); + addInstCost(Or); + addInstCost(Add); + addInstCost(Switch); + addInstCost(Sdiv, /*SizeOnly = */ true); + addInstCost(BrBB2, /*SizeOnly = */ true); + addInstCost(BrLoop, /*SizeOnly = */ true); + + EXPECT_EQ(Sz, CodeSize); + EXPECT_EQ(Ltc, Latency); + + EXPECT_TRUE(Sz > 0 && Ltc > 0); } TEST_F(FunctionSpecializationTest, BranchInst) { @@ -192,10 +200,11 @@ Constant *False = ConstantInt::getFalse(M.getContext()); auto FuncIter = F->begin(); - ++FuncIter; + BasicBlock &Loop = *++FuncIter; BasicBlock &BB0 = *++FuncIter; BasicBlock &BB1 = *++FuncIter; + Instruction &Branch = Loop.front(); Instruction &Mul = BB0.front(); Instruction &Sub = *++BB0.begin(); Instruction &BrBB1 = BB0.back(); @@ -203,24 +212,23 @@ Instruction &Sdiv = *++BB1.begin(); Instruction &BrLoop = BB1.back(); - // mul - Cost Ref = getInstCost(Mul); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // add - Ref = getInstCost(Add); - Bonus = Specializer.getSpecializationBonus(F->getArg(1), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // sub + br + sdiv + br - Ref = getInstCost(Sub) + getInstCost(BrBB1) + getInstCost(Sdiv) + - getInstCost(BrLoop); - Bonus = Specializer.getSpecializationBonus(F->getArg(2), False, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + Visitor.estimateSpecializationBonus(F->getArg(0), One); + Visitor.estimateSpecializationBonus(F->getArg(1), One); + Visitor.estimateSpecializationBonus(F->getArg(2), False); + auto [Sz, Ltc] = Visitor.getSpecializationBonus(); + + addInstCost(Mul); + addInstCost(Add); + addInstCost(Branch); + addInstCost(Sub, /*SizeOnly = */ true); + addInstCost(BrBB1, /*SizeOnly = */ true); + addInstCost(Sdiv, /*SizeOnly = */ true); + addInstCost(BrLoop, /*SizeOnly = */ true); + + EXPECT_EQ(Sz, CodeSize); + EXPECT_EQ(Ltc, Latency); + + EXPECT_TRUE(Sz > 0 && Ltc > 0); } TEST_F(FunctionSpecializationTest, Misc) { @@ -265,27 +273,24 @@ Instruction &Freeze = *BlockIter++; Instruction &Smax = *BlockIter++; - // icmp + zext - Cost Ref = getInstCost(Icmp) + getInstCost(Zext); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // select - Ref = getInstCost(Select); - Bonus = Specializer.getSpecializationBonus(F->getArg(1), True, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - // gep + load + freeze + smax - Ref = getInstCost(Gep) + getInstCost(Load) + getInstCost(Freeze) + - getInstCost(Smax); - Bonus = Specializer.getSpecializationBonus(F->getArg(2), GV, Visitor); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); - - Bonus = Specializer.getSpecializationBonus(F->getArg(3), Undef, Visitor); - EXPECT_TRUE(Bonus == 0); + Visitor.estimateSpecializationBonus(F->getArg(0), One); + Visitor.estimateSpecializationBonus(F->getArg(1), True); + Visitor.estimateSpecializationBonus(F->getArg(2), GV); + Visitor.estimateSpecializationBonus(F->getArg(3), Undef); + auto [Sz, Ltc] = Visitor.getSpecializationBonus(); + + addInstCost(Icmp); + addInstCost(Zext); + addInstCost(Select); + addInstCost(Gep); + addInstCost(Load); + addInstCost(Freeze); + addInstCost(Smax); + + EXPECT_EQ(Sz, CodeSize); + EXPECT_EQ(Ltc, Latency); + + EXPECT_TRUE(Sz > 0 && Ltc > 0); } TEST_F(FunctionSpecializationTest, PhiNode) { @@ -320,23 +325,35 @@ Constant *One = ConstantInt::get(IntegerType::getInt32Ty(M.getContext()), 1); auto FuncIter = F->begin(); - for (int I = 0; I < 4; ++I) - ++FuncIter; - - BasicBlock &BB = *FuncIter; + BasicBlock &Loop = *++FuncIter; + BasicBlock &Case1 = *++FuncIter; + BasicBlock &Case2 = *++FuncIter; + BasicBlock &BB = *++FuncIter; + Instruction &Switch = Loop.front(); + Instruction &Add = Case1.front(); + Instruction &Sub = Case2.front(); + Instruction &BrBB = Case2.back(); Instruction &Phi = BB.front(); Instruction &Icmp = *++BB.begin(); + Instruction &Branch = BB.back(); + + Visitor.estimateSpecializationBonus(F->getArg(0), One); + Visitor.estimateSpecializationBonus(F->getArg(1), One); + Visitor.estimateSpecializationBonus(F->getArg(2), One); + auto [Sz, Ltc] = Visitor.getSpecializationBonus(); + + addInstCost(Add); + addInstCost(Sub); + addInstCost(BrBB, /*SizeOnly = */ true); + addInstCost(Switch); + addInstCost(Phi); + addInstCost(Icmp); + addInstCost(Branch); - Cost Bonus = Specializer.getSpecializationBonus(F->getArg(0), One, Visitor) + - Specializer.getSpecializationBonus(F->getArg(1), One, Visitor) + - Specializer.getSpecializationBonus(F->getArg(2), One, Visitor); - EXPECT_TRUE(Bonus > 0); + EXPECT_EQ(Sz, CodeSize); + EXPECT_EQ(Ltc, Latency); - // phi + icmp - Cost Ref = getInstCost(Phi) + getInstCost(Icmp); - Bonus = Visitor.getBonusFromPendingPHIs(); - EXPECT_EQ(Bonus, Ref); - EXPECT_TRUE(Bonus > 0); + EXPECT_TRUE(Sz > 0 && Ltc > 0); }