diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -69,6 +69,9 @@ // Just a shorter abbreviation to improve indentation. using Cost = InstructionCost; +// Map of known constants found during the specialization bonus estimation. +using ConstMap = DenseMap; + // Specialization signature, used to uniquely designate a specialization within // a function. struct SpecSig { @@ -194,8 +197,18 @@ /// Compute and return the cost of specializing function \p F. Cost getSpecializationCost(Function *F); + Cost estimateSwitchInst(SwitchInst *I, Value *V, ConstantInt *C, + ConstMap &KnownConstants); + + Cost estimateBranchInst(BranchInst *I, Value *V, Constant *C, + ConstMap &KnownConstants); + + Cost getUserBonus(Instruction *User, Value *Use, Constant *C, + ConstMap &KnownConstants); + /// Compute a bonus for replacing argument \p A with constant \p C. - Cost getSpecializationBonus(Argument *A, Constant *C); + Cost getSpecializationBonus(Argument *A, Constant *C, + ConstMap &KnownConstants); /// Determine if it is possible to specialise the function for constant values /// of the formal parameter \p A. diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -48,7 +48,9 @@ #include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" @@ -412,10 +414,6 @@ CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); for (BasicBlock &BB : *F) Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " - << F->getName() << " is " << Metrics.NumInsts - << " instructions\n"); } return Metrics; } @@ -496,8 +494,9 @@ } else { // Calculate the specialisation gain. Cost Score = 0 - SpecCost; + DenseMap KnownConstants; for (ArgInfo &A : S.Args) - Score += getSpecializationBonus(A.Formal, A.Actual); + Score += getSpecializationBonus(A.Formal, A.Actual, KnownConstants); // Discard unprofitable specialisations. if (!ForceSpecialization && Score <= 0) @@ -584,49 +583,230 @@ // Otherwise, set the specialization cost to be the cost of all the // instructions in the function. - return Metrics.NumInsts * InlineConstants::getInstrCost(); + return Metrics.NumInsts; } -static Cost getUserBonus(User *U, TargetTransformInfo &TTI, - BlockFrequencyInfo &BFI) { - auto *I = dyn_cast_or_null(U); - // If not an instruction we do not know how to evaluate. - // Keep minimum possible cost for now so that it doesnt affect - // specialization. - if (!I) - return 0; - - uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() / - BFI.getEntryFreq(); - if (!Weight) - return 0; - - Cost Bonus = Weight * - TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); - - // Traverse recursively if there are more uses. - // TODO: Any other instructions to be added here? - if (I->mayReadFromMemory() || I->isCast()) - for (auto *User : I->users()) - Bonus += getUserBonus(User, TTI, BFI); +Cost FunctionSpecializer::estimateSwitchInst(SwitchInst *I, Value *V, + ConstantInt *C, + ConstMap &KnownConstants) { + Cost Bonus = 0; + if (I->getCondition() == V) { + Function *F = I->getFunction(); + auto &TTI = (GetTTI)(*F); + auto &BFI = (GetBFI)(*F); + BasicBlock *Succ = I->findCaseValue(C)->getCaseSuccessor(); + + // Initialize the worklist with the dead basic blocks. + SmallVector WorkList; + for (const auto &Case : I->cases()) { + BasicBlock *BB = Case.getCaseSuccessor(); + if (BB == Succ || !BB->hasNPredecessors(1) || + !Solver.isBlockExecutable(BB)) + continue; + WorkList.push_back(BB); + } + + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + + uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() / + BFI.getEntryFreq(); + if (!Weight) + continue; + + for (Instruction &I : *BB) { + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::ssa_copy) + continue; + if (KnownConstants.find(&I) != KnownConstants.end()) + continue; + + Bonus += Weight * + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus + << " after user " << I << "\n"); + } + + // Populate the worklist with dead successors. + for (BasicBlock *SuccBB : successors(BB)) + if (SuccBB->hasNPredecessors(1) && Solver.isBlockExecutable(SuccBB)) + WorkList.push_back(SuccBB); + } + } + return Bonus; +} + +Cost FunctionSpecializer::estimateBranchInst(BranchInst *I, Value *V, + Constant *C, + ConstMap &KnownConstants) { + Cost Bonus = 0; + if (I->getCondition() == V) { + Function *F = I->getFunction(); + auto &TTI = (GetTTI)(*F); + auto &BFI = (GetBFI)(*F); + BasicBlock *Succ = I->getSuccessor(C->isZeroValue()); + + // Initialize the worklist with the dead basic block. + SmallVector WorkList; + if (Succ->hasNPredecessors(1) && Solver.isBlockExecutable(Succ)) + WorkList.push_back(Succ); + + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + + uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() / + BFI.getEntryFreq(); + if (!Weight) + continue; + + for (Instruction &I : *BB) { + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::ssa_copy) + continue; + if (KnownConstants.find(&I) != KnownConstants.end()) + continue; + + Bonus += Weight * + TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus + << " after user " << I << "\n"); + } + // Populate the worklist with dead successors. + for (BasicBlock *SuccBB : successors(BB)) + if (SuccBB->hasNPredecessors(1) && Solver.isBlockExecutable(SuccBB)) + WorkList.push_back(SuccBB); + } + } return Bonus; } +Cost FunctionSpecializer::getUserBonus(Instruction *User, Value *Use, + Constant *C, ConstMap &KnownConstants) { + KnownConstants.insert({Use, C}); + + if (auto *I = dyn_cast(User)) { + return estimateSwitchInst(I, Use, cast(C), KnownConstants); + } else if (auto *I = dyn_cast(User)) { + return estimateBranchInst(I, Use, C, KnownConstants); + } else if (auto *I = dyn_cast(User)) { + if (isa(C)) + C = nullptr; + else + C = ConstantFoldLoadFromConstPtr(C, I->getType(), M.getDataLayout()); + } else if (auto *I = dyn_cast(User)) { + SmallVector Operands; + Operands.reserve(I->getNumOperands()); + bool AllOperandsAreConst = true; + for (unsigned Idx = 0, E = I->getNumOperands(); Idx != E; ++Idx) { + Value *V = I->getOperand(Idx); + C = dyn_cast(V); + if (!C) + if (auto It = KnownConstants.find(V); It != KnownConstants.end()) + C = It->second; + if (!C) { + AllOperandsAreConst = false; + break; + } + Operands.push_back(C); + } + if (AllOperandsAreConst) { + Constant *Ptr = Operands[0]; + auto Ops = ArrayRef(Operands.begin() + 1, Operands.end()); + C = ConstantExpr::getGetElementPtr(I->getSourceElementType(), Ptr, Ops); + } + } else if (auto *I = dyn_cast(User)) { + if (I->getCondition() == Use) { + Value *V = C->isZeroValue() ? I->getFalseValue() : I->getTrueValue(); + C = dyn_cast(V); + if (!C) + if (auto It = KnownConstants.find(V); It != KnownConstants.end()) + C = It->second; + } else + C = nullptr; + } else if (auto *I = dyn_cast(User)) { + C = ConstantFoldCastOperand(I->getOpcode(), C, I->getType(), + M.getDataLayout()); + } else if (auto *I = dyn_cast(User)) { + bool Swap = I->getOperand(1) == Use; + Value *V = Swap ? I->getOperand(0) : I->getOperand(1); + auto *Other = dyn_cast(V); + if (!Other) + if (auto It = KnownConstants.find(V); It != KnownConstants.end()) + Other = It->second; + if (!Other) + C = nullptr; + else if (Swap) + C = ConstantFoldCompareInstOperands(I->getPredicate(), Other, C, + M.getDataLayout()); + else + C = ConstantFoldCompareInstOperands(I->getPredicate(), C, Other, + M.getDataLayout()); + } else if (Instruction::isUnaryOp(User->getOpcode())) { + C = ConstantFoldUnaryOpOperand(User->getOpcode(), C, M.getDataLayout()); + } else if (Instruction::isBinaryOp(User->getOpcode())) { + bool Swap = User->getOperand(1) == Use; + Value *V = Swap ? User->getOperand(0) : User->getOperand(1); + auto *Other = dyn_cast(V); + if (!Other) + if (auto It = KnownConstants.find(V); It != KnownConstants.end()) + Other = It->second; + if (!Other) + C = nullptr; + else + C = dyn_cast_or_null( + Swap ? simplifyBinOp(User->getOpcode(), Other, C, + SimplifyQuery(M.getDataLayout())) + : simplifyBinOp(User->getOpcode(), C, Other, + SimplifyQuery(M.getDataLayout()))); + } else { + C = nullptr; + } + + if (C) { + KnownConstants.insert({User, C}); + + Function *F = User->getFunction(); + auto &TTI = (GetTTI)(*F); + auto &BFI = (GetBFI)(*F); + + uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() / + BFI.getEntryFreq(); + if (!Weight) + return 0; + + Cost Bonus = Weight * + TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus + << " for user " << *User << "\n"); + + for (auto *U : User->users()) + if (auto *UI = dyn_cast(U)) + if (Solver.isBlockExecutable(UI->getParent())) + Bonus += getUserBonus(UI, User, C, KnownConstants); + + return Bonus; + } + return 0; +} + /// Compute a bonus for replacing argument \p A with constant \p C. -Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C) { - Function *F = A->getParent(); - auto &TTI = (GetTTI)(*F); - auto &BFI = (GetBFI)(*F); +Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, + ConstMap &KnownConstants) { LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " << C->getNameOrAsOperand() << "\n"); Cost TotalCost = 0; - for (auto *U : A->users()) { - TotalCost += getUserBonus(U, TTI, BFI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; - TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); - } + for (auto *U : A->users()) + if (auto *UI = dyn_cast(U)) + if (Solver.isBlockExecutable(UI->getParent())) + TotalCost += getUserBonus(UI, A, C, KnownConstants); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus " + << TotalCost << " for argument " << *A << "\n"); // The below heuristic is only concerned with exposing inlining // opportunities via indirect call promotion. If the argument is not a