diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -166,7 +166,6 @@ void initializeForwardControlFlowIntegrityPass(PassRegistry&); void initializeFuncletLayoutPass(PassRegistry&); void initializeFunctionImportLegacyPassPass(PassRegistry&); -void initializeFunctionSpecializationLegacyPassPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -231,7 +231,6 @@ (void) llvm::createInjectTLIMappingsLegacyPass(); (void) llvm::createUnifyLoopExitsPass(); (void) llvm::createFixIrreduciblePass(); - (void)llvm::createFunctionSpecializationPass(); (void)llvm::createSelectOptimizePass(); (void)new llvm::IntervalPartition(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -169,11 +169,6 @@ /// ModulePass *createIPSCCPPass(); -//===----------------------------------------------------------------------===// -/// createFunctionSpecializationPass - This pass propagates constants from call -/// sites to the specialized version of the callee function. -ModulePass *createFunctionSpecializationPass(); - //===----------------------------------------------------------------------===// // /// createLoopExtractorPass - This pass extracts all natural loops from the diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -0,0 +1,180 @@ +//===- FunctionSpecialization.h - Function Specialization -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This specialises functions with constant parameters. Constant parameters +// like function pointers and constant globals are propagated to the callee by +// specializing the function. The main benefit of this pass at the moment is +// that indirect calls are transformed into direct calls, which provides inline +// opportunities that the inliner would not have been able to achieve. That's +// why function specialisation is run before the inliner in the optimisation +// pipeline; that is by design. Otherwise, we would only benefit from constant +// passing, which is a valid use-case too, but hasn't been explored much in +// terms of performance uplifts, cost-model and compile-time impact. +// +// Current limitations: +// - It does not yet handle integer ranges. We do support "literal constants", +// but that's off by default under an option. +// - The cost-model could be further looked into (it mainly focuses on inlining +// benefits), +// +// Ideas: +// - With a function specialization attribute for arguments, we could have +// a direct way to steer function specialization, avoiding the cost-model, +// and thus control compile-times / code-size. +// +// Todos: +// - Specializing recursive functions relies on running the transformation a +// number of times, which is controlled by option +// `func-specialization-max-iters`. Thus, increasing this value and the +// number of iterations, will linearly increase the number of times recursive +// functions get specialized, see also the discussion in +// https://reviews.llvm.org/D106426 for details. Perhaps there is a +// compile-time friendlier way to control/limit the number of specialisations +// for recursive functions. +// - Don't transform the function if function specialization does not trigger; +// the SCCPSolver may make IR changes. +// +// References: +// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable +// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H +#define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H + +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" +#include "llvm/Transforms/Utils/SizeOpts.h" + +using namespace llvm; + +namespace llvm { +// Bookkeeping struct to pass data from the analysis and profitability phase +// to the actual transform helper functions. +struct SpecializationInfo { + SmallVector Args; // Stores the {formal,actual} argument pairs. + InstructionCost Gain; // Profitability: Gain = Bonus - Cost. +}; + +using FuncList = SmallVectorImpl; +using CallArgBinding = std::pair; +using CallSpecBinding = std::pair; +// We are using MapVector because it guarantees deterministic iteration +// order across executions. +using SpecializationMap = SmallMapVector; + +class FunctionSpecializer { + + /// The IPSCCP Solver. + SCCPSolver &Solver; + + Module &M; + + /// Analyses used to help determine if a function should be specialized. + std::function GetAC; + std::function GetTTI; + std::function GetTLI; + + // The number of functions specialised, used for collecting statistics and + // also in the cost model. + unsigned NbFunctionsSpecialized = 0; + + SmallPtrSet SpecializedFuncs; + SmallPtrSet FullySpecialized; + DenseMap FunctionMetrics; + +public: + FunctionSpecializer(SCCPSolver &Solver, Module &M, + std::function GetAC, + std::function GetTTI, + std::function GetTLI) + : Solver(Solver), M(M), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {} + + ~FunctionSpecializer() { removeDeadFunctions(); } + + bool specialize(FuncList &FuncDecls); + +private: + /// Iterate over the argument tracked functions see if there + /// are any new constant values for the call instruction via + /// stack variables. + void propagateConstantArgs(FuncList &WorkList); + + /// Attempt to specialize functions in the module to enable constant + /// propagation across function boundaries. + /// + /// \returns true if at least one function is specialized. + bool specializeFunctions(FuncList &Candidates, FuncList &WorkList); + + /// Clean up fully specialized functions. + void removeDeadFunctions(); + + // Compute the code metrics for function \p F. + CodeMetrics &analyzeFunction(Function *F); + + /// This function decides whether it's worthwhile to specialize function + /// \p F based on the known constant values its arguments can take on. It + /// only discovers potential specialization opportunities without actually + /// applying them. + /// + /// \returns true if any specializations have been found. + bool calculateGains(Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList); + + bool isCandidateFunction(Function *F); + + void specializeFunction(Function *F, SpecializationInfo &S, + FuncList &WorkList); + + /// Compute and return the cost of specializing function \p F. + InstructionCost getSpecializationCost(Function *F); + + /// Compute a bonus for replacing argument \p A with constant \p C. + InstructionCost getSpecializationBonus(Argument *A, Constant *C); + + /// Determine if we should specialize a function based on the incoming values + /// of the given argument. + /// + /// This function implements the goal-directed heuristic. It determines if + /// specializing the function based on the incoming values of argument \p A + /// would result in any significant optimization opportunities. If + /// optimization opportunities exist, the constant values of \p A on which to + /// specialize the function are collected in \p Constants. + /// + /// \returns true if the function should be specialized on the given + /// argument. + bool isArgumentInteresting(Argument *A, + SmallVectorImpl &Constants); + + /// Collect in \p Constants all the constant values that argument \p A can + /// take on. + void getPossibleConstants(Argument *A, + SmallVectorImpl &Constants); + + /// Rewrite calls to function \p F to call function \p Clone instead. + /// + /// This function modifies calls to function \p F as long as the actual + /// arguments match those in \p Args. Note that for recursive calls we + /// need to compare against the cloned formal arguments. + /// + /// Callsites that have been marked with the MinSize function attribute won't + /// be specialized and rewritten. + void rewriteCallSites(Function *Clone, const SmallVectorImpl &Args, + ValueToValueMapTy &Mappings); + + void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList); +}; +} // namespace + +#endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H diff --git a/llvm/include/llvm/Transforms/IPO/SCCP.h b/llvm/include/llvm/Transforms/IPO/SCCP.h --- a/llvm/include/llvm/Transforms/IPO/SCCP.h +++ b/llvm/include/llvm/Transforms/IPO/SCCP.h @@ -32,14 +32,6 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -/// Pass to perform interprocedural constant propagation by specializing -/// functions -class FunctionSpecializationPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); -}; - } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_SCCP_H diff --git a/llvm/include/llvm/Transforms/Scalar/SCCP.h b/llvm/include/llvm/Transforms/Scalar/SCCP.h --- a/llvm/include/llvm/Transforms/Scalar/SCCP.h +++ b/llvm/include/llvm/Transforms/Scalar/SCCP.h @@ -42,14 +42,9 @@ bool runIPSCCP(Module &M, const DataLayout &DL, std::function GetTLI, + std::function GetTTI, + std::function GetAC, function_ref getAnalysis); - -bool runFunctionSpecialization( - Module &M, const DataLayout &DL, - std::function GetTLI, - std::function GetTTI, - std::function GetAC, - function_ref GetAnalysis); } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_SCCP_H diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -927,10 +927,6 @@ for (auto &C : PipelineEarlySimplificationEPCallbacks) C(MPM, Level); - // Specialize functions with IPSCCP. - if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) - MPM.addPass(FunctionSpecializationPass()); - // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -1522,8 +1518,6 @@ MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); - if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) - MPM.addPass(FunctionSpecializationPass()); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -60,7 +60,6 @@ MODULE_PASS("extract-blocks", BlockExtractorPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) -MODULE_PASS("function-specialization", FunctionSpecializationPass()) MODULE_PASS("globaldce", GlobalDCEPass()) MODULE_PASS("globalopt", GlobalOptPass()) MODULE_PASS("globalsplit", GlobalSplitPass()) diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -53,6 +53,7 @@ #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/SCCPSolver.h" @@ -108,22 +109,6 @@ cl::desc("Enable specialization of functions that take a literal constant " "as an argument.")); -namespace { -// Bookkeeping struct to pass data from the analysis and profitability phase -// to the actual transform helper functions. -struct SpecializationInfo { - SmallVector Args; // Stores the {formal,actual} argument pairs. - InstructionCost Gain; // Profitability: Gain = Bonus - Cost. -}; -} // Anonymous namespace - -using FuncList = SmallVectorImpl; -using CallArgBinding = std::pair; -using CallSpecBinding = std::pair; -// We are using MapVector because it guarantees deterministic iteration -// order across executions. -using SpecializationMap = SmallMapVector; - // Helper to check if \p LV is either a constant or a constant // range with a single element. This should cover exactly the same cases as the // old ValueLatticeElement::isConstant() and is intended to be used in the @@ -167,8 +152,7 @@ // A constant stack value is an AllocaInst that has a single constant // value stored to it. Return this constant if such an alloca stack value // is a function argument. -static Constant *getConstantStackValue(CallInst *Call, Value *Val, - SCCPSolver &Solver) { +static Constant *getConstantStackValue(CallInst *Call, Value *Val) { if (!Val) return nullptr; Val = Val->stripPointerCasts(); @@ -201,8 +185,7 @@ // ret void // } // -static void constantArgPropagation(FuncList &WorkList, Module &M, - SCCPSolver &Solver) { +void FunctionSpecializer::propagateConstantArgs(FuncList &WorkList) { // Iterate over the argument tracked functions see if there // are any new constant values for the call instruction via // stack variables. @@ -223,7 +206,7 @@ if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) continue; - auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); + auto *ConstVal = getConstantStackValue(Call, ArgOp); if (!ConstVal) continue; @@ -265,626 +248,527 @@ removeSSACopy(F); } -namespace { -class FunctionSpecializer { - - /// The IPSCCP Solver. - SCCPSolver &Solver; - - /// Analyses used to help determine if a function should be specialized. - std::function GetAC; - std::function GetTTI; - std::function GetTLI; - - SmallPtrSet SpecializedFuncs; - SmallPtrSet FullySpecialized; - SmallVector ReplacedWithConstant; - DenseMap FunctionMetrics; - -public: - FunctionSpecializer(SCCPSolver &Solver, - std::function GetAC, - std::function GetTTI, - std::function GetTLI) - : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {} - - ~FunctionSpecializer() { - // Eliminate dead code. - removeDeadInstructions(); - removeDeadFunctions(); - } - - /// Attempt to specialize functions in the module to enable constant - /// propagation across function boundaries. - /// - /// \returns true if at least one function is specialized. - bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) { - bool Changed = false; - for (auto *F : Candidates) { - if (!isCandidateFunction(F)) - continue; - - auto Cost = getSpecializationCost(F); - if (!Cost.isValid()) { - LLVM_DEBUG( - dbgs() << "FnSpecialization: Invalid specialization cost.\n"); - continue; - } +/// Attempt to specialize functions in the module to enable constant +/// propagation across function boundaries. +/// +/// \returns true if at least one function is specialized. +bool FunctionSpecializer::specializeFunctions(FuncList &Candidates, + FuncList &WorkList) { + bool Changed = false; + for (auto *F : Candidates) { + if (!isCandidateFunction(F)) + continue; - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " - << F->getName() << " is " << Cost << "\n"); + auto Cost = getSpecializationCost(F); + if (!Cost.isValid()) { + LLVM_DEBUG( + dbgs() << "FnSpecialization: Invalid specialization cost.\n"); + continue; + } - SmallVector Specializations; - if (!calculateGains(F, Cost, Specializations)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n"); - continue; - } + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " + << F->getName() << " is " << Cost << "\n"); - Changed = true; - for (auto &Entry : Specializations) - specializeFunction(F, Entry.second, WorkList); + SmallVector Specializations; + if (!calculateGains(F, Cost, Specializations)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n"); + continue; } - updateSpecializedFuncs(Candidates, WorkList); - NumFuncSpecialized += NbFunctionsSpecialized; - return Changed; + Changed = true; + for (auto &Entry : Specializations) + specializeFunction(F, Entry.second, WorkList); } - void removeDeadInstructions() { - for (auto *I : ReplacedWithConstant) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I - << "\n"); - I->eraseFromParent(); - } - ReplacedWithConstant.clear(); - } + updateSpecializedFuncs(Candidates, WorkList); + NumFuncSpecialized += NbFunctionsSpecialized; + return Changed; +} - void removeDeadFunctions() { - for (auto *F : FullySpecialized) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " - << F->getName() << "\n"); - F->eraseFromParent(); - } - FullySpecialized.clear(); +void FunctionSpecializer::removeDeadFunctions() { + for (auto *F : FullySpecialized) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " + << F->getName() << "\n"); + F->eraseFromParent(); } + FullySpecialized.clear(); +} - bool tryToReplaceWithConstant(Value *V) { - if (!V->getType()->isSingleValueType() || isa(V) || - V->user_empty()) - return false; - - const ValueLatticeElement &IV = Solver.getLatticeValueFor(V); - if (isOverdefined(IV)) - return false; - auto *Const = - isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType()); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V - << "\nFnSpecialization: with " << *Const << "\n"); - - // Record uses of V to avoid visiting irrelevant uses of const later. - SmallVector UseInsts; - for (auto *U : V->users()) - if (auto *I = dyn_cast(U)) - if (Solver.isBlockExecutable(I->getParent())) - UseInsts.push_back(I); - - V->replaceAllUsesWith(Const); - - for (auto *I : UseInsts) - Solver.visit(I); - - // Remove the instruction from Block and Solver. - if (auto *I = dyn_cast(V)) { - if (I->isSafeToRemove()) { - ReplacedWithConstant.push_back(I); - Solver.removeLatticeValueFor(I); - } - } - return true; - } +static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { + if (!V->getType()->isSingleValueType() || isa(V) || + V->user_empty()) + return false; + + const ValueLatticeElement &IV = Solver.getLatticeValueFor(V); + if (isOverdefined(IV)) + return false; + auto *Const = + isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType()); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V + << "\nFnSpecialization: with " << *Const << "\n"); -private: - // The number of functions specialised, used for collecting statistics and - // also in the cost model. - unsigned NbFunctionsSpecialized = 0; - - // Compute the code metrics for function \p F. - CodeMetrics &analyzeFunction(Function *F) { - auto I = FunctionMetrics.insert({F, CodeMetrics()}); - CodeMetrics &Metrics = I.first->second; - if (I.second) { - // The code metrics were not cached. - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); - for (BasicBlock &BB : *F) - Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " - << F->getName() << " is " << Metrics.NumInsts - << " instructions\n"); + // Record uses of V to avoid visiting irrelevant uses of const later. + SmallVector UseInsts; + for (auto *U : V->users()) + if (auto *I = dyn_cast(U)) + if (Solver.isBlockExecutable(I->getParent())) + UseInsts.push_back(I); + + V->replaceAllUsesWith(Const); + + for (auto *I : UseInsts) + Solver.visit(I); + + // Remove the instruction from Block and Solver. + if (auto *I = dyn_cast(V)) { + if (I->isSafeToRemove()) { + I->eraseFromParent(); + Solver.removeLatticeValueFor(I); } - return Metrics; } + return true; +} - /// Clone the function \p F and remove the ssa_copy intrinsics added by - /// the SCCPSolver in the cloned version. - Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) { - Function *Clone = CloneFunction(F, Mappings); - removeSSACopy(*Clone); - return Clone; +// Compute the code metrics for function \p F. +CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) { + auto I = FunctionMetrics.insert({F, CodeMetrics()}); + CodeMetrics &Metrics = I.first->second; + if (I.second) { + // The code metrics were not cached. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); + for (BasicBlock &BB : *F) + Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " + << F->getName() << " is " << Metrics.NumInsts + << " instructions\n"); } + return Metrics; +} - /// This function decides whether it's worthwhile to specialize function - /// \p F based on the known constant values its arguments can take on. It - /// only discovers potential specialization opportunities without actually - /// applying them. - /// - /// \returns true if any specializations have been found. - bool calculateGains(Function *F, InstructionCost Cost, - SmallVectorImpl &WorkList) { - SpecializationMap Specializations; - // Determine if we should specialize the function based on the values the - // argument can take on. If specialization is not profitable, we continue - // on to the next argument. - for (Argument &FormalArg : F->args()) { - // Determine if this argument is interesting. If we know the argument can - // take on any constant values, they are collected in Constants. - SmallVector ActualArgs; - if (!isArgumentInteresting(&FormalArg, ActualArgs)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Argument " - << FormalArg.getNameOrAsOperand() - << " is not interesting\n"); - continue; - } +/// Clone the function \p F and remove the ssa_copy intrinsics added by +/// the SCCPSolver in the cloned version. +static Function *cloneCandidateFunction(Function *F, + ValueToValueMapTy &Mappings) { + Function *Clone = CloneFunction(F, Mappings); + removeSSACopy(*Clone); + return Clone; +} - for (const auto &Entry : ActualArgs) { - CallBase *Call = Entry.first; - Constant *ActualArg = Entry.second; +/// This function decides whether it's worthwhile to specialize function +/// \p F based on the known constant values its arguments can take on. It +/// only discovers potential specialization opportunities without actually +/// applying them. +/// +/// \returns true if any specializations have been found. +bool FunctionSpecializer::calculateGains(Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList) { + SpecializationMap Specializations; + // Determine if we should specialize the function based on the values the + // argument can take on. If specialization is not profitable, we continue + // on to the next argument. + for (Argument &FormalArg : F->args()) { + // Determine if this argument is interesting. If we know the argument can + // take on any constant values, they are collected in Constants. + SmallVector ActualArgs; + if (!isArgumentInteresting(&FormalArg, ActualArgs)) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Argument " + << FormalArg.getNameOrAsOperand() + << " is not interesting\n"); + continue; + } - auto I = Specializations.insert({Call, SpecializationInfo()}); - SpecializationInfo &S = I.first->second; + for (const auto &Entry : ActualArgs) { + CallBase *Call = Entry.first; + Constant *ActualArg = Entry.second; - if (I.second) - S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost; - if (!ForceFunctionSpecialization) - S.Gain += getSpecializationBonus(&FormalArg, ActualArg); - S.Args.push_back({&FormalArg, ActualArg}); - } - } + auto I = Specializations.insert({Call, SpecializationInfo()}); + SpecializationInfo &S = I.first->second; - // Remove unprofitable specializations. - Specializations.remove_if( - [](const auto &Entry) { return Entry.second.Gain <= 0; }); - - // Clear the MapVector and return the underlying vector. - WorkList = Specializations.takeVector(); - - // Sort the candidates in descending order. - llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { - return L.second.Gain > R.second.Gain; - }); - - // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. - if (WorkList.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "FnSpecialization: Truncating worklist to " - << MaxClonesThreshold << " candidates.\n"); - WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + if (I.second) + S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost; + if (!ForceFunctionSpecialization) + S.Gain += getSpecializationBonus(&FormalArg, ActualArg); + S.Args.push_back({&FormalArg, ActualArg}); } - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " - << F->getName() << "\n"; - for (const auto &Entry - : WorkList) { - dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain - << "\n"; - for (const ArgInfo &Arg : Entry.second.Args) - dbgs() << "FnSpecialization: FormalArg = " - << Arg.Formal->getNameOrAsOperand() - << ", ActualArg = " - << Arg.Actual->getNameOrAsOperand() << "\n"; - }); - - return !WorkList.empty(); } - bool isCandidateFunction(Function *F) { - // Do not specialize the cloned function again. - if (SpecializedFuncs.contains(F)) - return false; + // Remove unprofitable specializations. + Specializations.remove_if( + [](const auto &Entry) { return Entry.second.Gain <= 0; }); + + // Clear the MapVector and return the underlying vector. + WorkList = Specializations.takeVector(); + + // Sort the candidates in descending order. + llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { + return L.second.Gain > R.second.Gain; + }); + + // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. + if (WorkList.size() > MaxClonesThreshold) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " + << "the maximum number of clones threshold.\n" + << "FnSpecialization: Truncating worklist to " + << MaxClonesThreshold << " candidates.\n"); + WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + } - // If we're optimizing the function for size, we shouldn't specialize it. - if (F->hasOptSize() || - shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) - return false; + LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " + << F->getName() << "\n"; + for (const auto &Entry + : WorkList) { + dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain + << "\n"; + for (const ArgInfo &Arg : Entry.second.Args) + dbgs() << "FnSpecialization: FormalArg = " + << Arg.Formal->getNameOrAsOperand() + << ", ActualArg = " + << Arg.Actual->getNameOrAsOperand() << "\n"; + }); + + return !WorkList.empty(); +} - // Exit if the function is not executable. There's no point in specializing - // a dead function. - if (!Solver.isBlockExecutable(&F->getEntryBlock())) - return false; +bool FunctionSpecializer::isCandidateFunction(Function *F) { + // Do not specialize the cloned function again. + if (SpecializedFuncs.contains(F)) + return false; - // It wastes time to specialize a function which would get inlined finally. - if (F->hasFnAttribute(Attribute::AlwaysInline)) - return false; + // If we're optimizing the function for size, we shouldn't specialize it. + if (F->hasOptSize() || + shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) + return false; - LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() - << "\n"); - return true; - } + // Exit if the function is not executable. There's no point in specializing + // a dead function. + if (!Solver.isBlockExecutable(&F->getEntryBlock())) + return false; - void specializeFunction(Function *F, SpecializationInfo &S, - FuncList &WorkList) { - ValueToValueMapTy Mappings; - Function *Clone = cloneCandidateFunction(F, Mappings); - - // Rewrite calls to the function so that they call the clone instead. - rewriteCallSites(Clone, S.Args, Mappings); - - // Initialize the lattice state of the arguments of the function clone, - // marking the argument on which we specialized the function constant - // with the given value. - Solver.markArgInFuncSpecialization(Clone, S.Args); - - // Mark all the specialized functions - WorkList.push_back(Clone); - NbFunctionsSpecialized++; - - // If the function has been completely specialized, the original function - // is no longer needed. Mark it unreachable. - if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) { - if (auto *CS = dyn_cast(U)) - return CS->getFunction() == F; - return false; - })) { - Solver.markFunctionUnreachable(F); - FullySpecialized.insert(F); - } - } + // It wastes time to specialize a function which would get inlined finally. + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return false; - /// Compute and return the cost of specializing function \p F. - InstructionCost getSpecializationCost(Function *F) { - CodeMetrics &Metrics = analyzeFunction(F); - // If the code metrics reveal that we shouldn't duplicate the function, we - // shouldn't specialize it. Set the specialization cost to Invalid. - // Or if the lines of codes implies that this function is easy to get - // inlined so that we shouldn't specialize it. - if (Metrics.notDuplicatable || - (!ForceFunctionSpecialization && - Metrics.NumInsts < SmallFunctionThreshold)) { - InstructionCost C{}; - C.setInvalid(); - return C; - } + LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() + << "\n"); + return true; +} - // Otherwise, set the specialization cost to be the cost of all the - // instructions in the function and penalty for specializing more functions. - unsigned Penalty = NbFunctionsSpecialized + 1; - return Metrics.NumInsts * InlineConstants::InstrCost * Penalty; +void FunctionSpecializer::specializeFunction(Function *F, SpecializationInfo &S, + FuncList &WorkList) { + ValueToValueMapTy Mappings; + Function *Clone = cloneCandidateFunction(F, Mappings); + + // Rewrite calls to the function so that they call the clone instead. + rewriteCallSites(Clone, S.Args, Mappings); + + // Initialize the lattice state of the arguments of the function clone, + // marking the argument on which we specialized the function constant + // with the given value. + Solver.markArgInFuncSpecialization(Clone, S.Args); + + // Mark all the specialized functions + WorkList.push_back(Clone); + NbFunctionsSpecialized++; + + // If the function has been completely specialized, the original function + // is no longer needed. Mark it unreachable. + if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) { + if (auto *CS = dyn_cast(U)) + return CS->getFunction() == F; + return false; + })) { + Solver.markFunctionUnreachable(F); + FullySpecialized.insert(F); } +} - InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, - LoopInfo &LI) { - auto *I = dyn_cast_or_null(U); - // If not an instruction we do not know how to evaluate. - // Keep minimum possible cost for now so that it doesnt affect - // specialization. - if (!I) - return std::numeric_limits::min(); - - auto Cost = TTI.getUserCost(U, TargetTransformInfo::TCK_SizeAndLatency); - - // Traverse recursively if there are more uses. - // TODO: Any other instructions to be added here? - if (I->mayReadFromMemory() || I->isCast()) - for (auto *User : I->users()) - Cost += getUserBonus(User, TTI, LI); - - // Increase the cost if it is inside the loop. - auto LoopDepth = LI.getLoopDepth(I->getParent()); - Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); - return Cost; +/// Compute and return the cost of specializing function \p F. +InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) { + CodeMetrics &Metrics = analyzeFunction(F); + // If the code metrics reveal that we shouldn't duplicate the function, we + // shouldn't specialize it. Set the specialization cost to Invalid. + // Or if the lines of codes implies that this function is easy to get + // inlined so that we shouldn't specialize it. + if (Metrics.notDuplicatable || + (!ForceFunctionSpecialization && + Metrics.NumInsts < SmallFunctionThreshold)) { + InstructionCost C{}; + C.setInvalid(); + return C; } - /// Compute a bonus for replacing argument \p A with constant \p C. - InstructionCost getSpecializationBonus(Argument *A, Constant *C) { - Function *F = A->getParent(); - DominatorTree DT(*F); - LoopInfo LI(DT); - auto &TTI = (GetTTI)(*F); - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " - << C->getNameOrAsOperand() << "\n"); - - InstructionCost TotalCost = 0; - for (auto *U : A->users()) { - TotalCost += getUserBonus(U, TTI, LI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; - TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); - } - - // The below heuristic is only concerned with exposing inlining - // opportunities via indirect call promotion. If the argument is not a - // (potentially casted) function pointer, give up. - Function *CalledFunction = dyn_cast(C->stripPointerCasts()); - if (!CalledFunction) - return TotalCost; - - // Get TTI for the called function (used for the inline cost). - auto &CalleeTTI = (GetTTI)(*CalledFunction); - - // Look at all the call sites whose called value is the argument. - // Specializing the function on the argument would allow these indirect - // calls to be promoted to direct calls. If the indirect call promotion - // would likely enable the called function to be inlined, specializing is a - // good idea. - int Bonus = 0; - for (User *U : A->users()) { - if (!isa(U) && !isa(U)) - continue; - auto *CS = cast(U); - if (CS->getCalledOperand() != A) - continue; + // Otherwise, set the specialization cost to be the cost of all the + // instructions in the function and penalty for specializing more functions. + unsigned Penalty = NbFunctionsSpecialized + 1; + return Metrics.NumInsts * InlineConstants::InstrCost * Penalty; +} - // Get the cost of inlining the called function at this call site. Note - // that this is only an estimate. The called function may eventually - // change in a way that leads to it not being inlined here, even though - // inlining looks profitable now. For example, one of its called - // functions may be inlined into it, making the called function too large - // to be inlined into this call site. - // - // We apply a boost for performing indirect call promotion by increasing - // the default threshold by the threshold for indirect calls. - auto Params = getInlineParams(); - Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; - InlineCost IC = - getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); - - // We clamp the bonus for this call to be between zero and the default - // threshold. - if (IC.isAlways()) - Bonus += Params.DefaultThreshold; - else if (IC.isVariable() && IC.getCostDelta() > 0) - Bonus += IC.getCostDelta(); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus - << " for user " << *U << "\n"); - } +static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, + LoopInfo &LI) { + auto *I = dyn_cast_or_null(U); + // If not an instruction we do not know how to evaluate. + // Keep minimum possible cost for now so that it doesnt affect + // specialization. + if (!I) + return std::numeric_limits::min(); + + auto Cost = TTI.getUserCost(U, TargetTransformInfo::TCK_SizeAndLatency); + + // Traverse recursively if there are more uses. + // TODO: Any other instructions to be added here? + if (I->mayReadFromMemory() || I->isCast()) + for (auto *User : I->users()) + Cost += getUserBonus(User, TTI, LI); + + // Increase the cost if it is inside the loop. + auto LoopDepth = LI.getLoopDepth(I->getParent()); + Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); + return Cost; +} - return TotalCost + Bonus; +/// Compute a bonus for replacing argument \p A with constant \p C. +InstructionCost FunctionSpecializer::getSpecializationBonus(Argument *A, + Constant *C) { + Function *F = A->getParent(); + DominatorTree DT(*F); + LoopInfo LI(DT); + auto &TTI = (GetTTI)(*F); + LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " + << C->getNameOrAsOperand() << "\n"); + + InstructionCost TotalCost = 0; + for (auto *U : A->users()) { + TotalCost += getUserBonus(U, TTI, LI); + LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; + TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); } - /// Determine if we should specialize a function based on the incoming values - /// of the given argument. - /// - /// This function implements the goal-directed heuristic. It determines if - /// specializing the function based on the incoming values of argument \p A - /// would result in any significant optimization opportunities. If - /// optimization opportunities exist, the constant values of \p A on which to - /// specialize the function are collected in \p Constants. - /// - /// \returns true if the function should be specialized on the given - /// argument. - bool isArgumentInteresting(Argument *A, - SmallVectorImpl &Constants) { - // For now, don't attempt to specialize functions based on the values of - // composite types. - if (!A->getType()->isSingleValueType() || A->user_empty()) - return false; - - // If the argument isn't overdefined, there's nothing to do. It should - // already be constant. - if (!Solver.getLatticeValueFor(A).isOverdefined()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " - << A->getNameOrAsOperand() - << " is already constant?\n"); - return false; - } + // The below heuristic is only concerned with exposing inlining + // opportunities via indirect call promotion. If the argument is not a + // (potentially casted) function pointer, give up. + Function *CalledFunction = dyn_cast(C->stripPointerCasts()); + if (!CalledFunction) + return TotalCost; + + // Get TTI for the called function (used for the inline cost). + auto &CalleeTTI = (GetTTI)(*CalledFunction); + + // Look at all the call sites whose called value is the argument. + // Specializing the function on the argument would allow these indirect + // calls to be promoted to direct calls. If the indirect call promotion + // would likely enable the called function to be inlined, specializing is a + // good idea. + int Bonus = 0; + for (User *U : A->users()) { + if (!isa(U) && !isa(U)) + continue; + auto *CS = cast(U); + if (CS->getCalledOperand() != A) + continue; - // Collect the constant values that the argument can take on. If the - // argument can't take on any constant values, we aren't going to - // specialize the function. While it's possible to specialize the function - // based on non-constant arguments, there's likely not much benefit to - // constant propagation in doing so. - // - // TODO 1: currently it won't specialize if there are over the threshold of - // calls using the same argument, e.g foo(a) x 4 and foo(b) x 1, but it - // might be beneficial to take the occurrences into account in the cost - // model, so we would need to find the unique constants. + // Get the cost of inlining the called function at this call site. Note + // that this is only an estimate. The called function may eventually + // change in a way that leads to it not being inlined here, even though + // inlining looks profitable now. For example, one of its called + // functions may be inlined into it, making the called function too large + // to be inlined into this call site. // - // TODO 2: this currently does not support constants, i.e. integer ranges. - // - getPossibleConstants(A, Constants); + // We apply a boost for performing indirect call promotion by increasing + // the default threshold by the threshold for indirect calls. + auto Params = getInlineParams(); + Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; + InlineCost IC = + getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); + + // We clamp the bonus for this call to be between zero and the default + // threshold. + if (IC.isAlways()) + Bonus += Params.DefaultThreshold; + else if (IC.isVariable() && IC.getCostDelta() > 0) + Bonus += IC.getCostDelta(); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus + << " for user " << *U << "\n"); + } - if (Constants.empty()) - return false; + return TotalCost + Bonus; +} - LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " - << A->getNameOrAsOperand() << "\n"); - return true; - } +/// Determine if we should specialize a function based on the incoming values +/// of the given argument. +/// +/// This function implements the goal-directed heuristic. It determines if +/// specializing the function based on the incoming values of argument \p A +/// would result in any significant optimization opportunities. If +/// optimization opportunities exist, the constant values of \p A on which to +/// specialize the function are collected in \p Constants. +/// +/// \returns true if the function should be specialized on the given +/// argument. +bool FunctionSpecializer::isArgumentInteresting(Argument *A, + SmallVectorImpl &Constants) { + // For now, don't attempt to specialize functions based on the values of + // composite types. + if (!A->getType()->isSingleValueType() || A->user_empty()) + return false; - /// Collect in \p Constants all the constant values that argument \p A can - /// take on. - void getPossibleConstants(Argument *A, - SmallVectorImpl &Constants) { - Function *F = A->getParent(); + // If the argument isn't overdefined, there's nothing to do. It should + // already be constant. + if (!Solver.getLatticeValueFor(A).isOverdefined()) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " + << A->getNameOrAsOperand() + << " is already constant?\n"); + return false; + } - // Iterate over all the call sites of the argument's parent function. - for (User *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); - // If the call site has attribute minsize set, that callsite won't be - // specialized. - if (CS.hasFnAttr(Attribute::MinSize)) - continue; + // Collect the constant values that the argument can take on. If the + // argument can't take on any constant values, we aren't going to + // specialize the function. While it's possible to specialize the function + // based on non-constant arguments, there's likely not much benefit to + // constant propagation in doing so. + // + // TODO 1: currently it won't specialize if there are over the threshold of + // calls using the same argument, e.g foo(a) x 4 and foo(b) x 1, but it + // might be beneficial to take the occurrences into account in the cost + // model, so we would need to find the unique constants. + // + // TODO 2: this currently does not support constants, i.e. integer ranges. + // + getPossibleConstants(A, Constants); + + if (Constants.empty()) + return false; - // If the parent of the call site will never be executed, we don't need - // to worry about the passed value. - if (!Solver.isBlockExecutable(CS.getParent())) - continue; + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " + << A->getNameOrAsOperand() << "\n"); + return true; +} - auto *V = CS.getArgOperand(A->getArgNo()); - if (isa(V)) - return; +/// Collect in \p Constants all the constant values that argument \p A can +/// take on. +void FunctionSpecializer::getPossibleConstants(Argument *A, + SmallVectorImpl &Constants) { + Function *F = A->getParent(); - // For now, constant expressions are fine but only if they are function - // calls. - if (auto *CE = dyn_cast(V)) - if (!isa(CE->getOperand(0))) - return; + // Iterate over all the call sites of the argument's parent function. + for (User *U : F->users()) { + if (!isa(U) && !isa(U)) + continue; + auto &CS = *cast(U); + // If the call site has attribute minsize set, that callsite won't be + // specialized. + if (CS.hasFnAttr(Attribute::MinSize)) + continue; - // TrackValueOfGlobalVariable only tracks scalar global variables. - if (auto *GV = dyn_cast(V)) { - // Check if we want to specialize on the address of non-constant - // global values. - if (!GV->isConstant()) - if (!SpecializeOnAddresses) - return; + // If the parent of the call site will never be executed, we don't need + // to worry about the passed value. + if (!Solver.isBlockExecutable(CS.getParent())) + continue; - if (!GV->getValueType()->isSingleValueType()) - return; - } + auto *V = CS.getArgOperand(A->getArgNo()); + if (isa(V)) + return; - if (isa(V) && (Solver.getLatticeValueFor(V).isConstant() || - EnableSpecializationForLiteralConstant)) - Constants.push_back({&CS, cast(V)}); - } - } + // For now, constant expressions are fine but only if they are function + // calls. + if (auto *CE = dyn_cast(V)) + if (!isa(CE->getOperand(0))) + return; - /// Rewrite calls to function \p F to call function \p Clone instead. - /// - /// This function modifies calls to function \p F as long as the actual - /// arguments match those in \p Args. Note that for recursive calls we - /// need to compare against the cloned formal arguments. - /// - /// Callsites that have been marked with the MinSize function attribute won't - /// be specialized and rewritten. - void rewriteCallSites(Function *Clone, const SmallVectorImpl &Args, - ValueToValueMapTy &Mappings) { - assert(!Args.empty() && "Specialization without arguments"); - Function *F = Args[0].Formal->getParent(); - - SmallVector CallSitesToRewrite; - for (auto *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); - if (!CS.getCalledFunction() || CS.getCalledFunction() != F) - continue; - CallSitesToRewrite.push_back(&CS); - } + // TrackValueOfGlobalVariable only tracks scalar global variables. + if (auto *GV = dyn_cast(V)) { + // Check if we want to specialize on the address of non-constant + // global values. + if (!GV->isConstant()) + if (!SpecializeOnAddresses) + return; - LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " - << F->getName() << " with " << Clone->getName() << "\n"); - - for (auto *CS : CallSitesToRewrite) { - LLVM_DEBUG(dbgs() << "FnSpecialization: " - << CS->getFunction()->getName() << " ->" << *CS - << "\n"); - if (/* recursive call */ - (CS->getFunction() == Clone && - all_of(Args, - [CS, &Mappings](const ArgInfo &Arg) { - unsigned ArgNo = Arg.Formal->getArgNo(); - return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; - })) || - /* normal call */ - all_of(Args, [CS](const ArgInfo &Arg) { - unsigned ArgNo = Arg.Formal->getArgNo(); - return CS->getArgOperand(ArgNo) == Arg.Actual; - })) { - CS->setCalledFunction(Clone); - Solver.markOverdefined(CS); - } + if (!GV->getValueType()->isSingleValueType()) + return; } - } - void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) { - for (auto *F : WorkList) { - SpecializedFuncs.insert(F); - - // Initialize the state of the newly created functions, marking them - // argument-tracked and executable. - if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked)) - Solver.addTrackedFunction(F); - - Solver.addArgumentTrackedFunction(F); - Candidates.push_back(F); - Solver.markBlockExecutable(&F->front()); - - // Replace the function arguments for the specialized functions. - for (Argument &Arg : F->args()) - if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg)) - LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: " - << Arg.getNameOrAsOperand() << "\n"); - } + if (isa(V) && (Solver.getLatticeValueFor(V).isConstant() || + EnableSpecializationForLiteralConstant)) + Constants.push_back({&CS, cast(V)}); } -}; -} // namespace - -bool llvm::runFunctionSpecialization( - Module &M, const DataLayout &DL, - std::function GetTLI, - std::function GetTTI, - std::function GetAC, - function_ref GetAnalysis) { - SCCPSolver Solver(DL, GetTLI, M.getContext()); - FunctionSpecializer FS(Solver, GetAC, GetTTI, GetTLI); - bool Changed = false; +} - // Loop over all functions, marking arguments to those with their addresses - // taken or that are external as overdefined. - for (Function &F : M) { - if (F.isDeclaration()) +/// Rewrite calls to function \p F to call function \p Clone instead. +/// +/// This function modifies calls to function \p F as long as the actual +/// arguments match those in \p Args. Note that for recursive calls we +/// need to compare against the cloned formal arguments. +/// +/// Callsites that have been marked with the MinSize function attribute won't +/// be specialized and rewritten. +void FunctionSpecializer::rewriteCallSites(Function *Clone, + const SmallVectorImpl &Args, + ValueToValueMapTy &Mappings) { + assert(!Args.empty() && "Specialization without arguments"); + Function *F = Args[0].Formal->getParent(); + + SmallVector CallSitesToRewrite; + for (auto *U : F->users()) { + if (!isa(U) && !isa(U)) continue; - if (F.hasFnAttribute(Attribute::NoDuplicate)) + auto &CS = *cast(U); + if (!CS.getCalledFunction() || CS.getCalledFunction() != F) continue; + CallSitesToRewrite.push_back(&CS); + } - LLVM_DEBUG(dbgs() << "\nFnSpecialization: Analysing decl: " << F.getName() - << "\n"); - Solver.addAnalysis(F, GetAnalysis(F)); + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " + << F->getName() << " with " << Clone->getName() << "\n"); - // Determine if we can track the function's arguments. If so, add the - // function to the solver's set of argument-tracked functions. - if (canTrackArgumentsInterprocedurally(&F)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Can track arguments\n"); - Solver.addArgumentTrackedFunction(&F); - continue; - } else { - LLVM_DEBUG(dbgs() << "FnSpecialization: Can't track arguments!\n" - << "FnSpecialization: Doesn't have local linkage, or " - << "has its address taken\n"); + for (auto *CS : CallSitesToRewrite) { + LLVM_DEBUG(dbgs() << "FnSpecialization: " + << CS->getFunction()->getName() << " ->" << *CS + << "\n"); + if (/* recursive call */ + (CS->getFunction() == Clone && + all_of(Args, + [CS, &Mappings](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; + })) || + /* normal call */ + all_of(Args, [CS](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Arg.Actual; + })) { + CS->setCalledFunction(Clone); + Solver.markOverdefined(CS); } - - // Assume the function is called. - Solver.markBlockExecutable(&F.front()); - - // Assume nothing about the incoming arguments. - for (Argument &AI : F.args()) - Solver.markOverdefined(&AI); } +} - // Determine if we can track any of the module's global variables. If so, add - // the global variables we can track to the solver's set of tracked global - // variables. - for (GlobalVariable &G : M.globals()) { - G.removeDeadConstantUsers(); - if (canTrackGlobalVariableInterprocedurally(&G)) - Solver.trackValueOfGlobalVariable(&G); +void FunctionSpecializer::updateSpecializedFuncs(FuncList &Candidates, + FuncList &WorkList) { + for (auto *F : WorkList) { + SpecializedFuncs.insert(F); + + // Initialize the state of the newly created functions, marking them + // argument-tracked and executable. + if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked)) + Solver.addTrackedFunction(F); + + Solver.addArgumentTrackedFunction(F); + Candidates.push_back(F); + Solver.markBlockExecutable(&F->front()); + + // Replace the function arguments for the specialized functions. + for (Argument &Arg : F->args()) + if (!Arg.use_empty() && tryToReplaceWithConstant(Solver, &Arg)) + LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: " + << Arg.getNameOrAsOperand() << "\n"); } +} - auto &TrackedFuncs = Solver.getArgumentTrackedFunctions(); - SmallVector FuncDecls(TrackedFuncs.begin(), - TrackedFuncs.end()); - - // No tracked functions, so nothing to do: don't run the solver and remove - // the ssa_copy intrinsics that may have been introduced. - if (TrackedFuncs.empty()) { - removeSSACopy(M); - return false; - } +bool FunctionSpecializer::specialize(FuncList &FuncDecls) { + bool Changed = false; // Solve for constants. auto RunSCCPSolver = [&](auto &WorkList) { @@ -911,7 +795,7 @@ // FIXME: The solver may make changes to the function here, so set // Changed, even if later function specialization does not trigger. for (auto &I : make_early_inc_range(BB)) - Changed |= FS.tryToReplaceWithConstant(&I); + Changed |= tryToReplaceWithConstant(Solver, &I); } } }; @@ -922,20 +806,17 @@ LLVM_DEBUG(dbgs() << "FnSpecialization: *) " << F->getName() << "\n"); #endif - // Initially resolve the constants in all the argument tracked functions. - RunSCCPSolver(FuncDecls); - SmallVector WorkList; unsigned I = 0; while (FuncSpecializationMaxIters != I++ && - FS.specializeFunctions(FuncDecls, WorkList)) { + specializeFunctions(FuncDecls, WorkList)) { LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n"); // Run the solver for the specialized functions. RunSCCPSolver(WorkList); // Replace some unresolved constant arguments. - constantArgPropagation(FuncDecls, M, Solver); + propagateConstantArgs(FuncDecls); WorkList.clear(); Changed = true; diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -32,7 +32,6 @@ initializeDAEPass(Registry); initializeDAHPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); - initializeFunctionSpecializationLegacyPassPass(Registry); initializeGlobalDCELegacyPassPass(Registry); initializeGlobalOptLegacyPassPass(Registry); initializeGlobalSplitPass(Registry); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -690,10 +690,6 @@ if (OptLevel > 2) MPM.add(createCallSiteSplittingPass()); - // Propage constant function arguments by specializing the functions. - if (OptLevel > 2 && EnableFunctionSpecialization) - MPM.add(createFunctionSpecializationPass()); - MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); @@ -925,10 +921,6 @@ // Split call-site with more constrained arguments. PM.add(createCallSiteSplittingPass()); - // Propage constant function arguments by specializing the functions. - if (EnableFunctionSpecialization && OptLevel > 2) - PM.add(createFunctionSpecializationPass()); - // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -28,6 +28,12 @@ auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { return FAM.getResult(F); }; + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + auto GetAC = [&FAM](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = FAM.getResult(F); return { @@ -35,7 +41,7 @@ &DT, FAM.getCachedResult(F)}; }; - if (!runIPSCCP(M, DL, GetTLI, getAnalysis)) + if (!runIPSCCP(M, DL, GetTLI, GetTTI, GetAC, getAnalysis)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -63,10 +69,17 @@ bool runOnModule(Module &M) override { if (skipModule(M)) return false; + const DataLayout &DL = M.getDataLayout(); auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { return this->getAnalysis().getTLI(F); }; + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + auto GetAC = [this](Function &F) -> AssumptionCache & { + return this->getAnalysis().getAssumptionCache(F); + }; auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = this->getAnalysis(F).getDomTree(); @@ -79,13 +92,14 @@ nullptr}; // manager, so set them to nullptr. }; - return runIPSCCP(M, DL, GetTLI, getAnalysis); + return runIPSCCP(M, DL, GetTLI, GetTTI, GetAC, getAnalysis); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } }; @@ -106,93 +120,3 @@ // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { return new IPSCCPLegacyPass(); } -PreservedAnalyses FunctionSpecializationPass::run(Module &M, - ModuleAnalysisManager &AM) { - const DataLayout &DL = M.getDataLayout(); - auto &FAM = AM.getResult(M).getManager(); - auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { - return FAM.getResult(F); - }; - auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { - return FAM.getResult(F); - }; - auto GetAC = [&FAM](Function &F) -> AssumptionCache & { - return FAM.getResult(F); - }; - auto GetAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { - DominatorTree &DT = FAM.getResult(F); - return {std::make_unique( - F, DT, FAM.getResult(F)), - &DT, FAM.getCachedResult(F)}; - }; - - if (!runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserve(); - PA.preserve(); - PA.preserve(); - return PA; -} - -namespace { -struct FunctionSpecializationLegacyPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid - FunctionSpecializationLegacyPass() : ModulePass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - - virtual bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - const DataLayout &DL = M.getDataLayout(); - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis().getTTI(F); - }; - auto GetAC = [this](Function &F) -> AssumptionCache & { - return this->getAnalysis().getAssumptionCache(F); - }; - - auto GetAnalysis = [this](Function &F) -> AnalysisResultsForFn { - DominatorTree &DT = - this->getAnalysis(F).getDomTree(); - return { - std::make_unique( - F, DT, - this->getAnalysis().getAssumptionCache( - F)), - nullptr, // We cannot preserve the DT or PDT with the legacy pass - nullptr}; // manager, so set them to nullptr. - }; - return runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis); - } -}; -} // namespace - -char FunctionSpecializationLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN( - FunctionSpecializationLegacyPass, "function-specialization", - "Propagate constant arguments by specializing the function", false, false) - -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(FunctionSpecializationLegacyPass, "function-specialization", - "Propagate constant arguments by specializing the function", - false, false) - -ModulePass *llvm::createFunctionSpecializationPass() { - return new FunctionSpecializationLegacyPass(); -} diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -94,6 +94,7 @@ Analysis Core InstCombine + IPO Support TransformUtils ) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SCCPSolver.h" @@ -74,6 +75,9 @@ IPNumInstReplaced, "Number of instructions replaced with (simpler) instruction by IPSCCP"); +static cl::opt SpecializeFunctions("specialize-functions", cl::init(false), + cl::Hidden, cl::desc("Enable function specialization")); + // Helper to check if \p LV is either a constant or a constant // range with a single element. This should cover exactly the same cases as the // old ValueLatticeElement::isConstant() and is intended to be used in the @@ -421,8 +425,11 @@ bool llvm::runIPSCCP( Module &M, const DataLayout &DL, std::function GetTLI, + std::function GetTTI, + std::function GetAC, function_ref getAnalysis) { SCCPSolver Solver(DL, GetTLI, M.getContext()); + FunctionSpecializer Specializer(Solver, M, GetAC, GetTTI, GetTLI); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. @@ -553,14 +560,17 @@ if (!DeadBB->hasAddressTaken()) DTU.deleteBB(DeadBB); - for (BasicBlock &BB : F) { - for (Instruction &Inst : llvm::make_early_inc_range(BB)) { - if (Solver.getPredicateInfoFor(&Inst)) { - if (auto *II = dyn_cast(&Inst)) { - if (II->getIntrinsicID() == Intrinsic::ssa_copy) { - Value *Op = II->getOperand(0); - Inst.replaceAllUsesWith(Op); - Inst.eraseFromParent(); + if (!SpecializeFunctions) { + // The Function Specializer will delete those after completion. + for (BasicBlock &BB : F) { + for (Instruction &Inst : llvm::make_early_inc_range(BB)) { + if (Solver.getPredicateInfoFor(&Inst)) { + if (auto *II = dyn_cast(&Inst)) { + if (II->getIntrinsicID() == Intrinsic::ssa_copy) { + Value *Op = II->getOperand(0); + Inst.replaceAllUsesWith(Op); + Inst.eraseFromParent(); + } } } } @@ -674,5 +684,14 @@ ++IPNumGlobalConst; } + if (SpecializeFunctions) { + SmallVector Candidates; + for (Function *F : Solver.getArgumentTrackedFunctions()) + if (!F->hasFnAttribute(Attribute::NoDuplicate)) + Candidates.push_back(F); + + MadeChanges |= Specializer.specialize(Candidates); + } + return MadeChanges; } diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -6,6 +6,7 @@ "//llvm/lib/IR", "//llvm/lib/Support", "//llvm/lib/Transforms/AggressiveInstCombine", + "//llvm/lib/Transforms/IPO", "//llvm/lib/Transforms/InstCombine", "//llvm/lib/Transforms/Utils", ]