diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -62,6 +62,11 @@ // This map's value is the beginning and the end of that range. using SpecMap = DenseMap>; +// Map of all the interesting call sites which reside in a function. Interesting +// in the sense that they may produce specializations of the called function. +using CallMap = DenseMap>; +using SmallCallMap = SmallDenseMap, 16>; + // Just a shorter abreviation. using Cost = InstructionCost; @@ -128,8 +133,10 @@ SmallPtrSet Specializations; SmallPtrSet FullySpecialized; + SmallVector Clones; DenseMap FunctionMetrics; DenseMap NumSpecs; + CallMap InterestingCalls; public: FunctionSpecializer( @@ -154,10 +161,9 @@ /// is a function argument. Constant *getConstantStackValue(CallInst *Call, Value *Val); - /// Iterate over the argument tracked functions see if there - /// are any new constant values for the call instruction via - /// stack variables. - void promoteConstantStackValues(); + /// See if there are any new constant values for the callers of \p F via + /// stack variables and promote them to global variables. + void promoteConstantStackValues(Function *F); /// Clean up fully specialized functions. void removeDeadFunctions(); @@ -165,12 +171,24 @@ /// Remove any ssa_copy intrinsics that may have been introduced. void cleanUpSSA(); + /// Find possible specializations examining the call sites which reside in + /// specializations which were created in the previous invocation of the + /// Specializer. + unsigned findNewCandidates(SpecMap &SM, SmallVectorImpl &AllSpecs); + + /// Find possible specializations examining the entire module. It runs only + /// in the first invocation of the Specializer. + unsigned findInitialCandidates(SpecMap &SM, SmallVectorImpl &AllSpecs); + /// @brief Find potential specialization opportunities. /// @param F Function to specialize + /// @param Metrics CodeMetrics for function F + /// @param Calls Interesting call sites for specialization /// @param AllSpecs A vector to add potential specializations to. /// @param SM A map for a function's specialisation range /// @return True, if any potential specializations were found - bool findSpecializations(Function *F, SpecMap &SM, + bool findSpecializations(Function *F, CodeMetrics &Metrics, + SmallVectorImpl &Calls, SpecMap &SM, SmallVectorImpl &AllSpecs); bool isCandidateFunction(Function *F); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -137,49 +137,37 @@ // ret void // } // -void FunctionSpecializer::promoteConstantStackValues() { - // Iterate over the argument tracked functions see if there - // are any new constant values for the call instruction via - // stack variables. - for (Function &F : M) { - if (!Solver.isArgumentTrackedFunction(&F)) +// See if there are any new constant values for the callers of \p F via +// stack variables and promote them to global variables. +void FunctionSpecializer::promoteConstantStackValues(Function *F) { + for (User *U : F->users()) { + + auto *Call = dyn_cast(U); + if (!Call) continue; - for (auto *User : F.users()) { + if (!Solver.isBlockExecutable(Call->getParent())) + continue; - auto *Call = dyn_cast(User); - if (!Call) - continue; + for (const Use &U : Call->args()) { + unsigned Idx = Call->getArgOperandNo(&U); + Value *ArgOp = Call->getArgOperand(Idx); + Type *ArgOpType = ArgOp->getType(); - if (!Solver.isBlockExecutable(Call->getParent())) + if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) continue; - bool Changed = false; - for (const Use &U : Call->args()) { - unsigned Idx = Call->getArgOperandNo(&U); - Value *ArgOp = Call->getArgOperand(Idx); - Type *ArgOpType = ArgOp->getType(); - - if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) - continue; - - auto *ConstVal = getConstantStackValue(Call, ArgOp); - if (!ConstVal) - continue; - - Value *GV = new GlobalVariable(M, ConstVal->getType(), true, - GlobalValue::InternalLinkage, ConstVal, - "funcspec.arg"); - if (ArgOpType != ConstVal->getType()) - GV = ConstantExpr::getBitCast(cast(GV), ArgOpType); + auto *ConstVal = getConstantStackValue(Call, ArgOp); + if (!ConstVal) + continue; - Call->setArgOperand(Idx, GV); - Changed = true; - } + Value *GV = new GlobalVariable(M, ConstVal->getType(), true, + GlobalValue::InternalLinkage, ConstVal, + "funcspec.arg"); + if (ArgOpType != ConstVal->getType()) + GV = ConstantExpr::getBitCast(cast(GV), ArgOpType); - // Add the changed CallInst to Solver Worklist - if (Changed) - Solver.visitCall(*Call); + Call->setArgOperand(Idx, GV); } } } @@ -206,7 +194,6 @@ removeSSACopy(*F); } - template <> struct llvm::DenseMapInfo { static inline SpecSig getEmptyKey() { return {~0U, {}}; } @@ -231,29 +218,90 @@ cleanUpSSA(); } -/// Attempt to specialize functions in the module to enable constant -/// propagation across function boundaries. -/// -/// \returns true if at least one function is specialized. -bool FunctionSpecializer::run() { - // Find possible specializations for each function. - SpecMap SM; - SmallVector AllSpecs; +/// Find possible specializations examining the call sites which reside in +/// specializations which were created in the previous invocation of the +/// Specializer. +unsigned FunctionSpecializer::findNewCandidates(SpecMap &SM, + SmallVectorImpl &AllSpecs) { + unsigned NumCandidates = 0; + SmallCallMap CM; + for (Function *F : Clones) { + SmallVector &CallSites = InterestingCalls[F]; + for (CallBase *CS : CallSites) { + // Do not specialize a specialization. + Function *Callee = CS->getCalledFunction(); + if (any_of(Clones, [Callee](Function *Clone) { return Callee == Clone; })) + continue; + CM[Callee].push_back(CS); + } + } + for (auto [F, Calls] : CM) { + CodeMetrics &Metrics = FunctionMetrics[F]; + if (!findSpecializations(F, Metrics, Calls, SM, AllSpecs)) { + LLVM_DEBUG( + dbgs() << "FnSpecialization: No possible specializations found for " + << F->getName() << "\n"); + continue; + } + ++NumCandidates; + } + Clones.clear(); + return NumCandidates; +} + +/// Find possible specializations examining the entire module. It runs only +/// in the first invocation of the Specializer. +unsigned FunctionSpecializer::findInitialCandidates(SpecMap &SM, + SmallVectorImpl &AllSpecs) { unsigned NumCandidates = 0; for (Function &F : M) { if (!isCandidateFunction(&F)) continue; - if (!findSpecializations(&F, SM, AllSpecs)) { + auto [It, _] = FunctionMetrics.try_emplace(&F); + CodeMetrics &Metrics = It->second; + // Analyze the function. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(&F, &(GetAC)(F), EphValues); + for (BasicBlock &BB : F) + Metrics.analyzeBasicBlock(&BB, (GetTTI)(F), EphValues); + // If the code metrics reveal that we shouldn't duplicate the function, we + // shouldn't specialize it. Set the specialization cost to Invalid. + // Or if the lines of codes implies that this function is easy to get + // inlined so that we shouldn't specialize it. + if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || + (!ForceSpecialization && !F.hasFnAttribute(Attribute::NoInline) && + Metrics.NumInsts < MinFunctionSize)) { + FunctionMetrics.erase(It); + continue; + } + + if (Metrics.isRecursive) + promoteConstantStackValues(&F); + + SmallVector Calls; + if (!findSpecializations(&F, Metrics, Calls, SM, AllSpecs)) { LLVM_DEBUG( dbgs() << "FnSpecialization: No possible specializations found for " << F.getName() << "\n"); continue; } - ++NumCandidates; } + return NumCandidates; +} +/// Attempt to specialize functions in the module to enable constant +/// propagation across function boundaries. +/// +/// \returns true if at least one function is specialized. +bool FunctionSpecializer::run() { + // Find possible specializations for each function. + SpecMap SM; + SmallVector AllSpecs; + + unsigned NumCandidates = Clones.empty() ? findInitialCandidates(SM, AllSpecs) + : findNewCandidates(SM, AllSpecs); if (!NumCandidates) { LLVM_DEBUG( dbgs() @@ -299,7 +347,6 @@ // Create the chosen specializations. SmallPtrSet OriginalFuncs; - SmallVector Clones; for (unsigned I = 0; I < NSpecs; ++I) { Spec &S = AllSpecs[BestSpecs[I]]; S.Clone = createSpecialization(S.F, S.Sig); @@ -323,9 +370,11 @@ for (Function *F : OriginalFuncs) { auto [Begin, End] = SM[F]; updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End); + + if (FunctionMetrics[F].isRecursive) + promoteConstantStackValues(F); } - promoteConstantStackValues(); return true; } @@ -342,34 +391,51 @@ /// Clone the function \p F and remove the ssa_copy intrinsics added by /// the SCCPSolver in the cloned version. -static Function *cloneCandidateFunction(Function *F) { +static Function *cloneCandidateFunction(Function *F, CallMap &CM) { ValueToValueMapTy Mappings; Function *Clone = CloneFunction(F, Mappings); + + // Copy the interesting call sites. + SmallVector &OldCallSites = CM[F]; + SmallVector &NewCallSites = CM[Clone]; + for (CallBase *CS : OldCallSites) + NewCallSites.push_back(cast(Mappings[CS])); + removeSSACopy(*Clone); return Clone; } -bool FunctionSpecializer::findSpecializations(Function *F, SpecMap &SM, - SmallVectorImpl &AllSpecs) { - // Analyze the function if not done yet. - auto [It, Inserted] = FunctionMetrics.try_emplace(F, CodeMetrics()); - CodeMetrics &Metrics = It->second; - if (Inserted) { - // The code metrics were not cached. - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); - for (BasicBlock &BB : *F) - Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); +static bool findInterestingCalls(Function *F, SCCPSolver &Solver, CallMap &CM, + SmallVectorImpl &Calls) { + for (User *U : F->users()) { + if (!isa(U) && !isa(U)) + continue; + + auto *CS = cast(U); + + // The user instruction does not call our function. + if (CS->getCalledFunction() != F) + continue; + + // If the call site has attribute minsize set, that callsite won't be + // specialized. + if (CS->hasFnAttr(Attribute::MinSize)) + continue; + + // If the parent of the call site will never be executed, we don't need + // to worry about the passed value. + if (!Solver.isBlockExecutable(CS->getParent())) + continue; + + Calls.push_back(CS); + CM[CS->getFunction()].push_back(CS); } - // If the code metrics reveal that we shouldn't duplicate the function, we - // shouldn't specialize it. Set the specialization cost to Invalid. - // Or if the lines of codes implies that this function is easy to get - // inlined so that we shouldn't specialize it. - if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || - (!ForceSpecialization && !F->hasFnAttribute(Attribute::NoInline) && - Metrics.NumInsts < MinFunctionSize)) - return false; + return !Calls.empty(); +} +bool FunctionSpecializer::findSpecializations(Function *F, CodeMetrics &Metrics, + SmallVectorImpl &Calls, SpecMap &SM, + SmallVectorImpl &AllSpecs) { // Get a list of interesting arguments. SmallVector Args; for (Argument &Arg : F->args()) @@ -379,39 +445,27 @@ if (Args.empty()) return false; + if (Calls.empty()) + if (!findInterestingCalls(F, Solver, InterestingCalls, Calls)) + return false; + // A mapping from a specialisation signature to the index of the respective // entry in the all specialisation array. Used to ensure uniqueness of // specialisations. - DenseMap UM; + SmallDenseMap UM; // LoopInfo is required for the bonus estimation of each argument's users. const LoopInfo &LI = Solver.getLoopInfo(*F); - bool Found = false; - for (User *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); - - // The user instruction does not call our function. - if (CS.getCalledFunction() != F) - continue; - - // If the call site has attribute minsize set, that callsite won't be - // specialized. - if (CS.hasFnAttr(Attribute::MinSize)) - continue; - - // If the parent of the call site will never be executed, we don't need - // to worry about the passed value. - if (!Solver.isBlockExecutable(CS.getParent())) + for (CallBase *CS : Calls) { + if (!Solver.isBlockExecutable(CS->getParent())) continue; // Examine arguments and create a specialisation candidate from the // constant operands of this call site. SpecSig S; for (Argument *A : Args) { - Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); + Constant *C = getCandidateConstant(CS->getArgOperand(A->getArgNo())); if (!C) continue; LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " @@ -431,10 +485,10 @@ // the cloned instances of this call, which result from specialising // functions. Hence we don't rewrite the call directly, but match it with // the best specialisation once all specialisations are known. - if (CS.getFunction() == F) + if (CS->getFunction() == F) continue; const unsigned Index = It->second; - AllSpecs[Index].CallSites.push_back(&CS); + AllSpecs[Index].CallSites.push_back(CS); } else { // Calculate the specialisation gain. Cost Latency = 0; @@ -457,17 +511,15 @@ // Create a new specialisation entry. auto &Spec = AllSpecs.emplace_back(F, S, Score); - if (CS.getFunction() != F) - Spec.CallSites.push_back(&CS); + if (CS->getFunction() != F) + Spec.CallSites.push_back(CS); const unsigned Index = AllSpecs.size() - 1; UM[S] = Index; if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted) It->second.second = Index + 1; - Found = true; } } - - return Found; + return !UM.empty(); } bool FunctionSpecializer::isCandidateFunction(Function *F) { @@ -480,10 +532,6 @@ if (!Solver.isArgumentTrackedFunction(F)) return false; - // Do not specialize the cloned function again. - if (Specializations.contains(F)) - return false; - // If we're optimizing the function for size, we shouldn't specialize it. if (F->hasOptSize() || shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) @@ -503,9 +551,9 @@ return true; } -Function *FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S) { - Function *Clone = cloneCandidateFunction(F); - +Function *FunctionSpecializer::createSpecialization(Function *F, + const SpecSig &S) { + Function *Clone = cloneCandidateFunction(F, InterestingCalls); // Initialize the lattice state of the arguments of the function clone, // marking the argument on which we specialized the function constant // with the given value. diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -43,7 +43,7 @@ "Number of instructions replaced with (simpler) instruction"); static cl::opt FuncSpecMaxIters( - "funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc( + "funcspec-max-iters", cl::init(10), cl::Hidden, cl::desc( "The maximum number of iterations function specialization is run")); static void findReturnsToZap(Function &F,