diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -155,7 +155,6 @@ void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); void initializeForwardControlFlowIntegrityPass(PassRegistry&); void initializeFuncletLayoutPass(PassRegistry&); -void initializeFunctionSpecializationLegacyPassPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -219,7 +219,6 @@ (void) llvm::createInjectTLIMappingsLegacyPass(); (void) llvm::createUnifyLoopExitsPass(); (void) llvm::createFixIrreduciblePass(); - (void)llvm::createFunctionSpecializationPass(); (void)llvm::createSelectOptimizePass(); (void)new llvm::IntervalPartition(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -150,11 +150,6 @@ /// ModulePass *createIPSCCPPass(); -//===----------------------------------------------------------------------===// -/// createFunctionSpecializationPass - This pass propagates constants from call -/// sites to the specialized version of the callee function. -ModulePass *createFunctionSpecializationPass(); - //===----------------------------------------------------------------------===// // /// createLoopExtractorPass - This pass extracts all natural loops from the diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -0,0 +1,169 @@ +//===- FunctionSpecialization.h - Function Specialization -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This specialises functions with constant parameters. Constant parameters +// like function pointers and constant globals are propagated to the callee by +// specializing the function. The main benefit of this pass at the moment is +// that indirect calls are transformed into direct calls, which provides inline +// opportunities that the inliner would not have been able to achieve. That's +// why function specialisation is run before the inliner in the optimisation +// pipeline; that is by design. Otherwise, we would only benefit from constant +// passing, which is a valid use-case too, but hasn't been explored much in +// terms of performance uplifts, cost-model and compile-time impact. +// +// Current limitations: +// - It does not yet handle integer ranges. We do support "literal constants", +// but that's off by default under an option. +// - The cost-model could be further looked into (it mainly focuses on inlining +// benefits), +// +// Ideas: +// - With a function specialization attribute for arguments, we could have +// a direct way to steer function specialization, avoiding the cost-model, +// and thus control compile-times / code-size. +// +// Todos: +// - Specializing recursive functions relies on running the transformation a +// number of times, which is controlled by option +// `func-specialization-max-iters`. Thus, increasing this value and the +// number of iterations, will linearly increase the number of times recursive +// functions get specialized, see also the discussion in +// https://reviews.llvm.org/D106426 for details. Perhaps there is a +// compile-time friendlier way to control/limit the number of specialisations +// for recursive functions. +// - Don't transform the function if function specialization does not trigger; +// the SCCPSolver may make IR changes. +// +// References: +// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable +// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H +#define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H + +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" +#include "llvm/Transforms/Utils/SizeOpts.h" + +using namespace llvm; + +namespace llvm { +// Bookkeeping struct to pass data from the analysis and profitability phase +// to the actual transform helper functions. +struct SpecializationInfo { + SmallVector Args; // Stores the {formal,actual} argument pairs. + InstructionCost Gain; // Profitability: Gain = Bonus - Cost. +}; + +using CallSpecBinding = std::pair; +// We are using MapVector because it guarantees deterministic iteration +// order across executions. +using SpecializationMap = SmallMapVector; + +class FunctionSpecializer { + + /// The IPSCCP Solver. + SCCPSolver &Solver; + + Module &M; + + /// Analyses used to help determine if a function should be specialized. + std::function GetTLI; + std::function GetTTI; + std::function GetAC; + + // The number of functions specialised, used for collecting statistics and + // also in the cost model. + unsigned NbFunctionsSpecialized = 0; + + SmallPtrSet SpecializedFuncs; + SmallPtrSet FullySpecialized; + DenseMap FunctionMetrics; + +public: + FunctionSpecializer(SCCPSolver &Solver, Module &M, + std::function GetTLI, + std::function GetTTI, + std::function GetAC) + : Solver(Solver), M(M), GetTLI(GetTLI), GetTTI(GetTTI), GetAC(GetAC) {} + + ~FunctionSpecializer() { + // Eliminate dead code. + removeDeadFunctions(); + cleanUpSSA(); + } + + bool isClonedFunction(Function *F) { return SpecializedFuncs.count(F); } + + bool specialize(); + +private: + Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); + + /// A constant stack value is an AllocaInst that has a single constant + /// value stored to it. Return this constant if such an alloca stack value + /// is a function argument. + Constant *getConstantStackValue(CallInst *Call, Value *Val); + + /// Iterate over the argument tracked functions see if there + /// are any new constant values for the call instruction via + /// stack variables. + void promoteConstantStackValues(); + + /// Clean up fully specialized functions. + void removeDeadFunctions(); + + /// Remove any ssa_copy intrinsics that may have been introduced. + void cleanUpSSA(); + + // Compute the code metrics for function \p F. + CodeMetrics &analyzeFunction(Function *F); + + /// This function decides whether it's worthwhile to specialize function + /// \p F based on the known constant values its arguments can take on. It + /// only discovers potential specialization opportunities without actually + /// applying them. + /// + /// \returns true if any specializations have been found. + bool findSpecializations(Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList); + + bool isCandidateFunction(Function *F); + + Function *createSpecialization(Function *F, CallSpecBinding &Specialization); + + /// Compute and return the cost of specializing function \p F. + InstructionCost getSpecializationCost(Function *F); + + /// Compute a bonus for replacing argument \p A with constant \p C. + InstructionCost getSpecializationBonus(Argument *A, Constant *C, + const LoopInfo &LI); + + /// Determine if it is possible to specialise the function for constant values + /// of the formal parameter \p A. + bool isArgumentInteresting(Argument *A); + + /// Check if the value \p V (an actual argument) is a constant or can only + /// have a constant value. Return that constant. + Constant *getCandidateConstant(Value *V); + + /// Rewrite calls to function \p F to call \p Clones instead. + /// Assumes Clones[i] corresponds to Specializations[i]. + void rewriteCallSites(Function *F, SmallVectorImpl &Clones, + SmallVectorImpl &Specializations); +}; +} // namespace + +#endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H diff --git a/llvm/include/llvm/Transforms/IPO/SCCP.h b/llvm/include/llvm/Transforms/IPO/SCCP.h --- a/llvm/include/llvm/Transforms/IPO/SCCP.h +++ b/llvm/include/llvm/Transforms/IPO/SCCP.h @@ -32,14 +32,6 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -/// Pass to perform interprocedural constant propagation by specializing -/// functions -class FunctionSpecializationPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); -}; - } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_SCCP_H diff --git a/llvm/include/llvm/Transforms/Scalar/SCCP.h b/llvm/include/llvm/Transforms/Scalar/SCCP.h --- a/llvm/include/llvm/Transforms/Scalar/SCCP.h +++ b/llvm/include/llvm/Transforms/Scalar/SCCP.h @@ -42,14 +42,9 @@ bool runIPSCCP(Module &M, const DataLayout &DL, std::function GetTLI, + std::function GetTTI, + std::function GetAC, function_ref getAnalysis); - -bool runFunctionSpecialization( - Module &M, const DataLayout &DL, - std::function GetTLI, - std::function GetTTI, - std::function GetAC, - function_ref GetAnalysis); } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_SCCP_H diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -116,6 +116,10 @@ /// should be rerun. bool resolvedUndefsIn(Function &F); + void solveWhileResolvedUndefsIn(Module &M); + + void solveWhileResolvedUndefsIn(SmallVectorImpl &WorkList); + bool isBlockExecutable(BasicBlock *BB) const; // isEdgeFeasible - Return true if the control flow edge from the 'From' basic diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -206,7 +206,6 @@ namespace llvm { extern cl::opt MaxDevirtIterations; extern cl::opt EnableConstraintElimination; -extern cl::opt EnableFunctionSpecialization; extern cl::opt EnableGVNHoist; extern cl::opt EnableGVNSink; extern cl::opt EnableHotColdSplit; @@ -935,10 +934,6 @@ for (auto &C : PipelineEarlySimplificationEPCallbacks) C(MPM, Level); - // Specialize functions with IPSCCP. - if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) - MPM.addPass(FunctionSpecializationPass()); - // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -1539,8 +1534,6 @@ MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); - if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) - MPM.addPass(FunctionSpecializationPass()); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -59,7 +59,6 @@ MODULE_PASS("extract-blocks", BlockExtractorPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) -MODULE_PASS("function-specialization", FunctionSpecializationPass()) MODULE_PASS("globaldce", GlobalDCEPass()) MODULE_PASS("globalopt", GlobalOptPass()) MODULE_PASS("globalsplit", GlobalSplitPass()) diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -53,6 +53,7 @@ #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/SCCPSolver.h" @@ -70,11 +71,6 @@ cl::desc("Force function specialization for every call site with a " "constant argument")); -static cl::opt FuncSpecializationMaxIters( - "func-specialization-max-iters", cl::Hidden, - cl::desc("The maximum number of iterations function specialization is run"), - cl::init(1)); - static cl::opt MaxClonesThreshold( "func-specialization-max-clones", cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " @@ -108,37 +104,8 @@ cl::desc("Enable specialization of functions that take a literal constant " "as an argument.")); -namespace { -// Bookkeeping struct to pass data from the analysis and profitability phase -// to the actual transform helper functions. -struct SpecializationInfo { - SmallVector Args; // Stores the {formal,actual} argument pairs. - InstructionCost Gain; // Profitability: Gain = Bonus - Cost. -}; -} // Anonymous namespace - -using FuncList = SmallVectorImpl; -using CallArgBinding = std::pair; -using CallSpecBinding = std::pair; -// We are using MapVector because it guarantees deterministic iteration -// order across executions. -using SpecializationMap = SmallMapVector; - -// Helper to check if \p LV is either a constant or a constant -// range with a single element. This should cover exactly the same cases as the -// old ValueLatticeElement::isConstant() and is intended to be used in the -// transition to ValueLatticeElement. -static bool isConstant(const ValueLatticeElement &LV) { - return LV.isConstant() || - (LV.isConstantRange() && LV.getConstantRange().isSingleElement()); -} - -// Helper to check if \p LV is either overdefined or a constant int. -static bool isOverdefined(const ValueLatticeElement &LV) { - return !LV.isUnknownOrUndef() && !isConstant(LV); -} - -static Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) { +Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca, + CallInst *Call) { Value *StoreValue = nullptr; for (auto *User : Alloca->users()) { // We can't use llvm::isAllocaPromotable() as that would fail because of @@ -161,14 +128,14 @@ // Bail if there is any other unknown usage. return nullptr; } - return dyn_cast_or_null(StoreValue); + return getCandidateConstant(StoreValue); } // A constant stack value is an AllocaInst that has a single constant // value stored to it. Return this constant if such an alloca stack value // is a function argument. -static Constant *getConstantStackValue(CallInst *Call, Value *Val, - SCCPSolver &Solver) { +Constant *FunctionSpecializer::getConstantStackValue(CallInst *Call, + Value *Val) { if (!Val) return nullptr; Val = Val->stripPointerCasts(); @@ -201,19 +168,23 @@ // ret void // } // -static void constantArgPropagation(FuncList &WorkList, Module &M, - SCCPSolver &Solver) { +void FunctionSpecializer::promoteConstantStackValues() { // Iterate over the argument tracked functions see if there // are any new constant values for the call instruction via // stack variables. - for (auto *F : WorkList) { + for (Function &F : M) { + if (!Solver.isArgumentTrackedFunction(&F)) + continue; - for (auto *User : F->users()) { + for (auto *User : F.users()) { auto *Call = dyn_cast(User); if (!Call) continue; + if (!Solver.isBlockExecutable(Call->getParent())) + continue; + bool Changed = false; for (const Use &U : Call->args()) { unsigned Idx = Call->getArgOperandNo(&U); @@ -223,7 +194,7 @@ if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) continue; - auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); + auto *ConstVal = getConstantStackValue(Call, ArgOp); if (!ConstVal) continue; @@ -245,7 +216,7 @@ } // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics -// interfere with the constantArgPropagation optimization. +// interfere with the promoteConstantStackValues() optimization. static void removeSSACopy(Function &F) { for (BasicBlock &BB : F) { for (Instruction &Inst : llvm::make_early_inc_range(BB)) { @@ -260,685 +231,475 @@ } } -static void removeSSACopy(Module &M) { - for (Function &F : M) - removeSSACopy(F); +/// Remove any ssa_copy intrinsics that may have been introduced. +void FunctionSpecializer::cleanUpSSA() { + for (Function *F : SpecializedFuncs) + removeSSACopy(*F); } -namespace { -class FunctionSpecializer { - - /// The IPSCCP Solver. - SCCPSolver &Solver; - - /// Analyses used to help determine if a function should be specialized. - std::function GetAC; - std::function GetTTI; - std::function GetTLI; - - SmallPtrSet SpecializedFuncs; - SmallPtrSet FullySpecialized; - SmallVector ReplacedWithConstant; - DenseMap FunctionMetrics; - -public: - FunctionSpecializer(SCCPSolver &Solver, - std::function GetAC, - std::function GetTTI, - std::function GetTLI) - : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {} - - ~FunctionSpecializer() { - // Eliminate dead code. - removeDeadInstructions(); - removeDeadFunctions(); - } - - /// Attempt to specialize functions in the module to enable constant - /// propagation across function boundaries. - /// - /// \returns true if at least one function is specialized. - bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) { - bool Changed = false; - for (auto *F : Candidates) { - if (!isCandidateFunction(F)) - continue; - - auto Cost = getSpecializationCost(F); - if (!Cost.isValid()) { - LLVM_DEBUG( - dbgs() << "FnSpecialization: Invalid specialization cost.\n"); - continue; - } - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " - << F->getName() << " is " << Cost << "\n"); +/// Attempt to specialize functions in the module to enable constant +/// propagation across function boundaries. +/// +/// \returns true if at least one function is specialized. +bool FunctionSpecializer::specialize() { + bool Changed = false; - SmallVector Specializations; - if (!findSpecializations(F, Cost, Specializations)) { - LLVM_DEBUG( - dbgs() << "FnSpecialization: No possible specializations found\n"); - continue; - } + for (Function &F : M) { + if (!isCandidateFunction(&F)) + continue; - Changed = true; - for (auto &Entry : Specializations) - specializeFunction(F, Entry.second, WorkList); + auto Cost = getSpecializationCost(&F); + if (!Cost.isValid()) { + LLVM_DEBUG( + dbgs() << "FnSpecialization: Invalid specialization cost.\n"); + continue; } - updateSpecializedFuncs(Candidates, WorkList); - NumFuncSpecialized += NbFunctionsSpecialized; - return Changed; - } + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " + << F.getName() << " is " << Cost << "\n"); - void removeDeadInstructions() { - for (auto *I : ReplacedWithConstant) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I - << "\n"); - I->eraseFromParent(); + SmallVector Specializations; + if (!findSpecializations(&F, Cost, Specializations)) { + LLVM_DEBUG( + dbgs() << "FnSpecialization: No possible specializations found\n"); + continue; } - ReplacedWithConstant.clear(); - } - void removeDeadFunctions() { - for (auto *F : FullySpecialized) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " - << F->getName() << "\n"); - F->eraseFromParent(); - } - FullySpecialized.clear(); - } + Changed = true; - bool tryToReplaceWithConstant(Value *V) { - if (!V->getType()->isSingleValueType() || isa(V) || - V->user_empty()) - return false; - - const ValueLatticeElement &IV = Solver.getLatticeValueFor(V); - if (isOverdefined(IV)) - return false; - auto *Const = - isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType()); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V - << "\nFnSpecialization: with " << *Const << "\n"); - - // Record uses of V to avoid visiting irrelevant uses of const later. - SmallVector UseInsts; - for (auto *U : V->users()) - if (auto *I = dyn_cast(U)) - if (Solver.isBlockExecutable(I->getParent())) - UseInsts.push_back(I); - - V->replaceAllUsesWith(Const); - - for (auto *I : UseInsts) - Solver.visit(I); - - // Remove the instruction from Block and Solver. - if (auto *I = dyn_cast(V)) { - if (I->isSafeToRemove()) { - ReplacedWithConstant.push_back(I); - Solver.removeLatticeValueFor(I); - } - } - return true; - } + SmallVector Clones; + for (CallSpecBinding &Specialization : Specializations) + Clones.push_back(createSpecialization(&F, Specialization)); -private: - // The number of functions specialised, used for collecting statistics and - // also in the cost model. - unsigned NbFunctionsSpecialized = 0; - - // Compute the code metrics for function \p F. - CodeMetrics &analyzeFunction(Function *F) { - auto I = FunctionMetrics.insert({F, CodeMetrics()}); - CodeMetrics &Metrics = I.first->second; - if (I.second) { - // The code metrics were not cached. - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); - for (BasicBlock &BB : *F) - Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " - << F->getName() << " is " << Metrics.NumInsts - << " instructions\n"); - } - return Metrics; + Solver.solveWhileResolvedUndefsIn(Clones); + rewriteCallSites(&F, Clones, Specializations); } - /// Clone the function \p F and remove the ssa_copy intrinsics added by - /// the SCCPSolver in the cloned version. - Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) { - Function *Clone = CloneFunction(F, Mappings); - removeSSACopy(*Clone); - return Clone; - } + promoteConstantStackValues(); - /// This function decides whether it's worthwhile to specialize function - /// \p F based on the known constant values its arguments can take on. It - /// only discovers potential specialization opportunities without actually - /// applying them. - /// - /// \returns true if any specializations have been found. - bool findSpecializations(Function *F, InstructionCost Cost, - SmallVectorImpl &WorkList) { - // Get a list of interesting arguments. - SmallVector Args; - for (Argument &Arg : F->args()) - if (isArgumentInteresting(&Arg)) - Args.push_back(&Arg); - - if (!Args.size()) - return false; - - // Find all the call sites for the function. - SpecializationMap Specializations; - for (User *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); - // If the call site has attribute minsize set, that callsite won't be - // specialized. - if (CS.hasFnAttr(Attribute::MinSize)) - continue; + LLVM_DEBUG(if (NbFunctionsSpecialized) + dbgs() << "FnSpecialization: Specialized " + << NbFunctionsSpecialized << " functions in module " + << M.getName() << "\n"); - // If the parent of the call site will never be executed, we don't need - // to worry about the passed value. - if (!Solver.isBlockExecutable(CS.getParent())) - continue; - - // Examine arguments and create specialization candidates from call sites - // with constant arguments. - bool Added = false; - for (Argument *A : Args) { - Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); - if (!C) - continue; - - if (!Added) { - Specializations[&CS] = {{}, 0 - Cost}; - Added = true; - } + NumFuncSpecialized += NbFunctionsSpecialized; + return Changed; +} - SpecializationInfo &S = Specializations.back().second; - S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F)); - S.Args.push_back({A, C}); - } - Added = false; - } - // Remove unprofitable specializations. - if (!ForceFunctionSpecialization) - Specializations.remove_if( - [](const auto &Entry) { return Entry.second.Gain <= 0; }); - - // Clear the MapVector and return the underlying vector. - WorkList = Specializations.takeVector(); - - // Sort the candidates in descending order. - llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { - return L.second.Gain > R.second.Gain; - }); - - // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. - if (WorkList.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "FnSpecialization: Truncating worklist to " - << MaxClonesThreshold << " candidates.\n"); - WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); - } +void FunctionSpecializer::removeDeadFunctions() { + for (Function *F : FullySpecialized) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " + << F->getName() << "\n"); + F->eraseFromParent(); + } + FullySpecialized.clear(); +} - LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " - << F->getName() << "\n"; - for (const auto &Entry - : WorkList) { - dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain - << "\n"; - for (const ArgInfo &Arg : Entry.second.Args) - dbgs() << "FnSpecialization: FormalArg = " - << Arg.Formal->getNameOrAsOperand() - << ", ActualArg = " - << Arg.Actual->getNameOrAsOperand() << "\n"; - }); - - return !WorkList.empty(); +// Compute the code metrics for function \p F. +CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) { + auto I = FunctionMetrics.insert({F, CodeMetrics()}); + CodeMetrics &Metrics = I.first->second; + if (I.second) { + // The code metrics were not cached. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); + for (BasicBlock &BB : *F) + Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " + << F->getName() << " is " << Metrics.NumInsts + << " instructions\n"); } + return Metrics; +} - bool isCandidateFunction(Function *F) { - // Do not specialize the cloned function again. - if (SpecializedFuncs.contains(F)) - return false; +/// Clone the function \p F and remove the ssa_copy intrinsics added by +/// the SCCPSolver in the cloned version. +static Function *cloneCandidateFunction(Function *F) { + ValueToValueMapTy Mappings; + Function *Clone = CloneFunction(F, Mappings); + removeSSACopy(*Clone); + return Clone; +} - // If we're optimizing the function for size, we shouldn't specialize it. - if (F->hasOptSize() || - shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) - return false; +/// This function decides whether it's worthwhile to specialize function +/// \p F based on the known constant values its arguments can take on. It +/// only discovers potential specialization opportunities without actually +/// applying them. +/// +/// \returns true if any specializations have been found. +bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList) { + // Get a list of interesting arguments. + SmallVector Args; + for (Argument &Arg : F->args()) + if (isArgumentInteresting(&Arg)) + Args.push_back(&Arg); + + if (!Args.size()) + return false; - // Exit if the function is not executable. There's no point in specializing - // a dead function. - if (!Solver.isBlockExecutable(&F->getEntryBlock())) - return false; + // Find all the call sites for the function. + SpecializationMap Specializations; + for (User *U : F->users()) { + if (!isa(U) && !isa(U)) + continue; + auto &CS = *cast(U); - // It wastes time to specialize a function which would get inlined finally. - if (F->hasFnAttribute(Attribute::AlwaysInline)) - return false; + // Skip irrelevant users. + if (CS.getCalledFunction() != F) + continue; - LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() - << "\n"); - return true; - } + // If the call site has attribute minsize set, that callsite won't be + // specialized. + if (CS.hasFnAttr(Attribute::MinSize)) + continue; - void specializeFunction(Function *F, SpecializationInfo &S, - FuncList &WorkList) { - ValueToValueMapTy Mappings; - Function *Clone = cloneCandidateFunction(F, Mappings); - - // Rewrite calls to the function so that they call the clone instead. - rewriteCallSites(Clone, S.Args, Mappings); - - // Initialize the lattice state of the arguments of the function clone, - // marking the argument on which we specialized the function constant - // with the given value. - Solver.markArgInFuncSpecialization(Clone, S.Args); - - // Mark all the specialized functions - WorkList.push_back(Clone); - NbFunctionsSpecialized++; - - // If the function has been completely specialized, the original function - // is no longer needed. Mark it unreachable. - if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) { - if (auto *CS = dyn_cast(U)) - return CS->getFunction() == F; - return false; - })) { - Solver.markFunctionUnreachable(F); - FullySpecialized.insert(F); - } - } + // If the parent of the call site will never be executed, we don't need + // to worry about the passed value. + if (!Solver.isBlockExecutable(CS.getParent())) + continue; - /// Compute and return the cost of specializing function \p F. - InstructionCost getSpecializationCost(Function *F) { - CodeMetrics &Metrics = analyzeFunction(F); - // If the code metrics reveal that we shouldn't duplicate the function, we - // shouldn't specialize it. Set the specialization cost to Invalid. - // Or if the lines of codes implies that this function is easy to get - // inlined so that we shouldn't specialize it. - if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || - (!ForceFunctionSpecialization && - !F->hasFnAttribute(Attribute::NoInline) && - Metrics.NumInsts < SmallFunctionThreshold)) - return InstructionCost::getInvalid(); - - // Otherwise, set the specialization cost to be the cost of all the - // instructions in the function and penalty for specializing more functions. - unsigned Penalty = NbFunctionsSpecialized + 1; - return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty; - } + // Examine arguments and create specialization candidates from call sites + // with constant arguments. + bool Added = false; + for (Argument *A : Args) { + Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); + if (!C) + continue; - InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, - const LoopInfo &LI) { - auto *I = dyn_cast_or_null(U); - // If not an instruction we do not know how to evaluate. - // Keep minimum possible cost for now so that it doesnt affect - // specialization. - if (!I) - return std::numeric_limits::min(); - - InstructionCost Cost = - TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); - - // Increase the cost if it is inside the loop. - unsigned LoopDepth = LI.getLoopDepth(I->getParent()); - Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); - - // Traverse recursively if there are more uses. - // TODO: Any other instructions to be added here? - if (I->mayReadFromMemory() || I->isCast()) - for (auto *User : I->users()) - Cost += getUserBonus(User, TTI, LI); - - return Cost; - } + if (!Added) { + Specializations[&CS] = {{}, 0 - Cost}; + Added = true; + } - /// Compute a bonus for replacing argument \p A with constant \p C. - InstructionCost getSpecializationBonus(Argument *A, Constant *C, - const LoopInfo &LI) { - Function *F = A->getParent(); - auto &TTI = (GetTTI)(*F); - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " - << C->getNameOrAsOperand() << "\n"); - - InstructionCost TotalCost = 0; - for (auto *U : A->users()) { - TotalCost += getUserBonus(U, TTI, LI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; - TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); + SpecializationInfo &S = Specializations.back().second; + S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F)); + S.Args.push_back({A, C}); } + Added = false; + } - // The below heuristic is only concerned with exposing inlining - // opportunities via indirect call promotion. If the argument is not a - // (potentially casted) function pointer, give up. - Function *CalledFunction = dyn_cast(C->stripPointerCasts()); - if (!CalledFunction) - return TotalCost; - - // Get TTI for the called function (used for the inline cost). - auto &CalleeTTI = (GetTTI)(*CalledFunction); - - // Look at all the call sites whose called value is the argument. - // Specializing the function on the argument would allow these indirect - // calls to be promoted to direct calls. If the indirect call promotion - // would likely enable the called function to be inlined, specializing is a - // good idea. - int Bonus = 0; - for (User *U : A->users()) { - if (!isa(U) && !isa(U)) - continue; - auto *CS = cast(U); - if (CS->getCalledOperand() != A) - continue; + // Remove unprofitable specializations. + if (!ForceFunctionSpecialization) + Specializations.remove_if( + [](const auto &Entry) { return Entry.second.Gain <= 0; }); + + // Clear the MapVector and return the underlying vector. + WorkList = Specializations.takeVector(); + + // Sort the candidates in descending order. + llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { + return L.second.Gain > R.second.Gain; + }); + + // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. + if (WorkList.size() > MaxClonesThreshold) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " + << "the maximum number of clones threshold.\n" + << "FnSpecialization: Truncating worklist to " + << MaxClonesThreshold << " candidates.\n"); + WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + } - // Get the cost of inlining the called function at this call site. Note - // that this is only an estimate. The called function may eventually - // change in a way that leads to it not being inlined here, even though - // inlining looks profitable now. For example, one of its called - // functions may be inlined into it, making the called function too large - // to be inlined into this call site. - // - // We apply a boost for performing indirect call promotion by increasing - // the default threshold by the threshold for indirect calls. - auto Params = getInlineParams(); - Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; - InlineCost IC = - getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); - - // We clamp the bonus for this call to be between zero and the default - // threshold. - if (IC.isAlways()) - Bonus += Params.DefaultThreshold; - else if (IC.isVariable() && IC.getCostDelta() > 0) - Bonus += IC.getCostDelta(); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus - << " for user " << *U << "\n"); - } + LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " + << F->getName() << "\n"; + for (const auto &Entry + : WorkList) { + dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain + << "\n"; + for (const ArgInfo &Arg : Entry.second.Args) + dbgs() << "FnSpecialization: FormalArg = " + << Arg.Formal->getNameOrAsOperand() + << ", ActualArg = " + << Arg.Actual->getNameOrAsOperand() << "\n"; + }); + + return !WorkList.empty(); +} - return TotalCost + Bonus; - } +bool FunctionSpecializer::isCandidateFunction(Function *F) { + if (F->isDeclaration()) + return false; - /// Determine if it is possible to specialise the function for constant values - /// of the formal parameter \p A. - bool isArgumentInteresting(Argument *A) { - // No point in specialization if the argument is unused. - if (A->user_empty()) - return false; - - // For now, don't attempt to specialize functions based on the values of - // composite types. - Type *ArgTy = A->getType(); - if (!ArgTy->isSingleValueType()) - return false; - - // Specialization of integer and floating point types needs to be explicitly - // enabled. - if (!EnableSpecializationForLiteralConstant && - (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy())) - return false; - - // SCCP solver does not record an argument that will be constructed on - // stack. - if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory()) - return false; - - // Check the lattice value and decide if we should attemt to specialize, - // based on this argument. No point in specialization, if the lattice value - // is already a constant. - const ValueLatticeElement &LV = Solver.getLatticeValueFor(A); - if (LV.isUnknownOrUndef() || LV.isConstant() || - (LV.isConstantRange() && LV.getConstantRange().isSingleElement())) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " - << A->getNameOrAsOperand() << " is already constant\n"); - return false; - } + if (F->hasFnAttribute(Attribute::NoDuplicate)) + return false; - return true; - } + if (!Solver.isArgumentTrackedFunction(F)) + return false; - /// Check if the valuy \p V (an actual argument) is a constant or can only - /// have a constant value. Return that constant. - Constant *getCandidateConstant(Value *V) { - if (isa(V)) - return nullptr; + // Do not specialize the cloned function again. + if (SpecializedFuncs.contains(F)) + return false; - // TrackValueOfGlobalVariable only tracks scalar global variables. - if (auto *GV = dyn_cast(V)) { - // Check if we want to specialize on the address of non-constant - // global values. - if (!GV->isConstant() && !SpecializeOnAddresses) - return nullptr; + // If we're optimizing the function for size, we shouldn't specialize it. + if (F->hasOptSize() || + shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) + return false; - if (!GV->getValueType()->isSingleValueType()) - return nullptr; - } + // Exit if the function is not executable. There's no point in specializing + // a dead function. + if (!Solver.isBlockExecutable(&F->getEntryBlock())) + return false; - // Select for possible specialisation values that are constants or - // are deduced to be constants or constant ranges with a single element. - Constant *C = dyn_cast(V); - if (!C) { - const ValueLatticeElement &LV = Solver.getLatticeValueFor(V); - if (LV.isConstant()) - C = LV.getConstant(); - else if (LV.isConstantRange() && - LV.getConstantRange().isSingleElement()) { - assert(V->getType()->isIntegerTy() && "Non-integral constant range"); - C = Constant::getIntegerValue( - V->getType(), *LV.getConstantRange().getSingleElement()); - } else - return nullptr; - } + // It wastes time to specialize a function which would get inlined finally. + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return false; - LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " - << V->getNameOrAsOperand() << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() + << "\n"); + return true; +} - return C; - } +Function *FunctionSpecializer::createSpecialization(Function *F, + CallSpecBinding &Specialization) { + Function *Clone = cloneCandidateFunction(F); - /// Rewrite calls to function \p F to call function \p Clone instead. - /// - /// This function modifies calls to function \p F as long as the actual - /// arguments match those in \p Args. Note that for recursive calls we - /// need to compare against the cloned formal arguments. - /// - /// Callsites that have been marked with the MinSize function attribute won't - /// be specialized and rewritten. - void rewriteCallSites(Function *Clone, const SmallVectorImpl &Args, - ValueToValueMapTy &Mappings) { - assert(!Args.empty() && "Specialization without arguments"); - Function *F = Args[0].Formal->getParent(); - - SmallVector CallSitesToRewrite; - for (auto *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); - if (!CS.getCalledFunction() || CS.getCalledFunction() != F) - continue; - CallSitesToRewrite.push_back(&CS); - } + // Initialize the lattice state of the arguments of the function clone, + // marking the argument on which we specialized the function constant + // with the given value. + Solver.markArgInFuncSpecialization(Clone, Specialization.second.Args); - LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " - << F->getName() << " with " << Clone->getName() << "\n"); + Solver.addArgumentTrackedFunction(Clone); + Solver.markBlockExecutable(&Clone->front()); - for (auto *CS : CallSitesToRewrite) { - LLVM_DEBUG(dbgs() << "FnSpecialization: " - << CS->getFunction()->getName() << " ->" << *CS - << "\n"); - if (/* recursive call */ - (CS->getFunction() == Clone && - all_of(Args, - [CS, &Mappings](const ArgInfo &Arg) { - unsigned ArgNo = Arg.Formal->getArgNo(); - return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; - })) || - /* normal call */ - all_of(Args, [CS](const ArgInfo &Arg) { - unsigned ArgNo = Arg.Formal->getArgNo(); - return CS->getArgOperand(ArgNo) == Arg.Actual; - })) { - CS->setCalledFunction(Clone); - Solver.markOverdefined(CS); - } - } - } + // Mark all the specialized functions + SpecializedFuncs.insert(Clone); + NbFunctionsSpecialized++; - void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) { - for (auto *F : WorkList) { - SpecializedFuncs.insert(F); + return Clone; +} - // Initialize the state of the newly created functions, marking them - // argument-tracked and executable. - if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked)) - Solver.addTrackedFunction(F); +/// Compute and return the cost of specializing function \p F. +InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) { + CodeMetrics &Metrics = analyzeFunction(F); + // If the code metrics reveal that we shouldn't duplicate the function, we + // shouldn't specialize it. Set the specialization cost to Invalid. + // Or if the lines of codes implies that this function is easy to get + // inlined so that we shouldn't specialize it. + if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || + (!ForceFunctionSpecialization && + !F->hasFnAttribute(Attribute::NoInline) && + Metrics.NumInsts < SmallFunctionThreshold)) + return InstructionCost::getInvalid(); + + // Otherwise, set the specialization cost to be the cost of all the + // instructions in the function and penalty for specializing more functions. + unsigned Penalty = NbFunctionsSpecialized + 1; + return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty; +} - Solver.addArgumentTrackedFunction(F); - Candidates.push_back(F); - Solver.markBlockExecutable(&F->front()); +static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, + const LoopInfo &LI) { + auto *I = dyn_cast_or_null(U); + // If not an instruction we do not know how to evaluate. + // Keep minimum possible cost for now so that it doesnt affect + // specialization. + if (!I) + return std::numeric_limits::min(); + + InstructionCost Cost = + TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); + + // Increase the cost if it is inside the loop. + unsigned LoopDepth = LI.getLoopDepth(I->getParent()); + Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); + + // Traverse recursively if there are more uses. + // TODO: Any other instructions to be added here? + if (I->mayReadFromMemory() || I->isCast()) + for (auto *User : I->users()) + Cost += getUserBonus(User, TTI, LI); + + return Cost; +} - // Replace the function arguments for the specialized functions. - for (Argument &Arg : F->args()) - if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg)) - LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: " - << Arg.getNameOrAsOperand() << "\n"); - } +/// Compute a bonus for replacing argument \p A with constant \p C. +InstructionCost FunctionSpecializer::getSpecializationBonus(Argument *A, + Constant *C, const LoopInfo &LI) { + Function *F = A->getParent(); + auto &TTI = (GetTTI)(*F); + LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " + << C->getNameOrAsOperand() << "\n"); + + InstructionCost TotalCost = 0; + for (auto *U : A->users()) { + TotalCost += getUserBonus(U, TTI, LI); + LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; + TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); } -}; -} // namespace - -bool llvm::runFunctionSpecialization( - Module &M, const DataLayout &DL, - std::function GetTLI, - std::function GetTTI, - std::function GetAC, - function_ref GetAnalysis) { - SCCPSolver Solver(DL, GetTLI, M.getContext()); - FunctionSpecializer FS(Solver, GetAC, GetTTI, GetTLI); - bool Changed = false; - // Loop over all functions, marking arguments to those with their addresses - // taken or that are external as overdefined. - for (Function &F : M) { - if (F.isDeclaration()) + // The below heuristic is only concerned with exposing inlining + // opportunities via indirect call promotion. If the argument is not a + // (potentially casted) function pointer, give up. + Function *CalledFunction = dyn_cast(C->stripPointerCasts()); + if (!CalledFunction) + return TotalCost; + + // Get TTI for the called function (used for the inline cost). + auto &CalleeTTI = (GetTTI)(*CalledFunction); + + // Look at all the call sites whose called value is the argument. + // Specializing the function on the argument would allow these indirect + // calls to be promoted to direct calls. If the indirect call promotion + // would likely enable the called function to be inlined, specializing is a + // good idea. + int Bonus = 0; + for (User *U : A->users()) { + if (!isa(U) && !isa(U)) continue; - if (F.hasFnAttribute(Attribute::NoDuplicate)) + auto *CS = cast(U); + if (CS->getCalledOperand() != A) continue; - LLVM_DEBUG(dbgs() << "\nFnSpecialization: Analysing decl: " << F.getName() - << "\n"); - Solver.addAnalysis(F, GetAnalysis(F)); + // Get the cost of inlining the called function at this call site. Note + // that this is only an estimate. The called function may eventually + // change in a way that leads to it not being inlined here, even though + // inlining looks profitable now. For example, one of its called + // functions may be inlined into it, making the called function too large + // to be inlined into this call site. + // + // We apply a boost for performing indirect call promotion by increasing + // the default threshold by the threshold for indirect calls. + auto Params = getInlineParams(); + Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; + InlineCost IC = + getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); + + // We clamp the bonus for this call to be between zero and the default + // threshold. + if (IC.isAlways()) + Bonus += Params.DefaultThreshold; + else if (IC.isVariable() && IC.getCostDelta() > 0) + Bonus += IC.getCostDelta(); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus + << " for user " << *U << "\n"); + } - // Determine if we can track the function's arguments. If so, add the - // function to the solver's set of argument-tracked functions. - if (canTrackArgumentsInterprocedurally(&F)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Can track arguments\n"); - Solver.addArgumentTrackedFunction(&F); - continue; - } else { - LLVM_DEBUG(dbgs() << "FnSpecialization: Can't track arguments!\n" - << "FnSpecialization: Doesn't have local linkage, or " - << "has its address taken\n"); - } + return TotalCost + Bonus; +} - // Assume the function is called. - Solver.markBlockExecutable(&F.front()); +/// Determine if it is possible to specialise the function for constant values +/// of the formal parameter \p A. +bool FunctionSpecializer::isArgumentInteresting(Argument *A) { + // No point in specialization if the argument is unused. + if (A->user_empty()) + return false; - // Assume nothing about the incoming arguments. - for (Argument &AI : F.args()) - Solver.markOverdefined(&AI); - } + // For now, don't attempt to specialize functions based on the values of + // composite types. + Type *ArgTy = A->getType(); + if (!ArgTy->isSingleValueType()) + return false; - // Determine if we can track any of the module's global variables. If so, add - // the global variables we can track to the solver's set of tracked global - // variables. - for (GlobalVariable &G : M.globals()) { - G.removeDeadConstantUsers(); - if (canTrackGlobalVariableInterprocedurally(&G)) - Solver.trackValueOfGlobalVariable(&G); - } + // Specialization of integer and floating point types needs to be explicitly + // enabled. + if (!EnableSpecializationForLiteralConstant && + (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy())) + return false; - auto &TrackedFuncs = Solver.getArgumentTrackedFunctions(); - SmallVector FuncDecls(TrackedFuncs.begin(), - TrackedFuncs.end()); + // SCCP solver does not record an argument that will be constructed on + // stack. + if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory()) + return false; - // No tracked functions, so nothing to do: don't run the solver and remove - // the ssa_copy intrinsics that may have been introduced. - if (TrackedFuncs.empty()) { - removeSSACopy(M); + // Check the lattice value and decide if we should attemt to specialize, + // based on this argument. No point in specialization, if the lattice value + // is already a constant. + const ValueLatticeElement &LV = Solver.getLatticeValueFor(A); + if (LV.isUnknownOrUndef() || LV.isConstant() || + (LV.isConstantRange() && LV.getConstantRange().isSingleElement())) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " + << A->getNameOrAsOperand() << " is already constant\n"); return false; } - // Solve for constants. - auto RunSCCPSolver = [&](auto &WorkList) { - bool ResolvedUndefs = true; - - while (ResolvedUndefs) { - // Not running the solver unnecessary is checked in regression test - // nothing-to-do.ll, so if this debug message is changed, this regression - // test needs updating too. - LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n"); - - Solver.solve(); - LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n"); - ResolvedUndefs = false; - for (Function *F : WorkList) - if (Solver.resolvedUndefsIn(*F)) - ResolvedUndefs = true; - } + return true; +} - for (auto *F : WorkList) { - for (BasicBlock &BB : *F) { - if (!Solver.isBlockExecutable(&BB)) - continue; - // FIXME: The solver may make changes to the function here, so set - // Changed, even if later function specialization does not trigger. - for (auto &I : make_early_inc_range(BB)) - Changed |= FS.tryToReplaceWithConstant(&I); - } - } - }; +/// Check if the valuy \p V (an actual argument) is a constant or can only +/// have a constant value. Return that constant. +Constant *FunctionSpecializer::getCandidateConstant(Value *V) { + if (isa(V)) + return nullptr; + + // TrackValueOfGlobalVariable only tracks scalar global variables. + if (auto *GV = dyn_cast(V)) { + // Check if we want to specialize on the address of non-constant + // global values. + if (!GV->isConstant() && !SpecializeOnAddresses) + return nullptr; + + if (!GV->getValueType()->isSingleValueType()) + return nullptr; + } -#ifndef NDEBUG - LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n"); - for (auto *F : FuncDecls) - LLVM_DEBUG(dbgs() << "FnSpecialization: *) " << F->getName() << "\n"); -#endif + // Select for possible specialisation values that are constants or + // are deduced to be constants or constant ranges with a single element. + Constant *C = dyn_cast(V); + if (!C) { + const ValueLatticeElement &LV = Solver.getLatticeValueFor(V); + if (LV.isConstant()) + C = LV.getConstant(); + else if (LV.isConstantRange() && + LV.getConstantRange().isSingleElement()) { + assert(V->getType()->isIntegerTy() && "Non-integral constant range"); + C = Constant::getIntegerValue( + V->getType(), *LV.getConstantRange().getSingleElement()); + } else + return nullptr; + } - // Initially resolve the constants in all the argument tracked functions. - RunSCCPSolver(FuncDecls); + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " + << V->getNameOrAsOperand() << "\n"); - SmallVector WorkList; - unsigned I = 0; - while (FuncSpecializationMaxIters != I++ && - FS.specializeFunctions(FuncDecls, WorkList)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n"); + return C; +} - // Run the solver for the specialized functions. - RunSCCPSolver(WorkList); +/// Rewrite calls to function \p F to call \p Clones instead. +/// Assumes Clones[i] corresponds to Specializations[i]. +void FunctionSpecializer::rewriteCallSites(Function *F, + SmallVectorImpl &Clones, + SmallVectorImpl &Specializations) { + SmallVector CallSitesToRewrite; + for (User *U : F->users()) + if (auto *CS = dyn_cast(U)) + if (CS->getCalledFunction() == F) + CallSitesToRewrite.push_back(CS); + + for (unsigned I = 0; I < Clones.size(); ++I) { + Function *Clone = Clones[I]; + CallSpecBinding &Specialization = Specializations[I]; - // Replace some unresolved constant arguments. - constantArgPropagation(FuncDecls, M, Solver); + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " + << F->getName() << " with " << Clone->getName() << "\n"); - WorkList.clear(); - Changed = true; + for (auto I = CallSitesToRewrite.begin(), + IE = CallSitesToRewrite.end(); I != IE; ++I) { + CallBase *CS = *I; + if (CS == Specialization.first || + all_of(Specialization.second.Args, [CS, this](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return getCandidateConstant(CS->getArgOperand(ArgNo)) == Arg.Actual; + })) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *CS << "\n"); + CS->setCalledFunction(Clone); + std::swap(*I--, *--IE); + CallSitesToRewrite.pop_back(); + } + } } - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = " - << NumFuncSpecialized << "\n"); - - // Remove any ssa_copy intrinsics that may have been introduced. - removeSSACopy(M); - return Changed; + // If the function has been completely specialized, the original function + // is no longer needed. Mark it unreachable. + if (F->getNumUses() == 0 || + all_of(F->users(), [F](User *U) { + return isa(U) && + cast(U)->getFunction() == F; })) { + Solver.markFunctionUnreachable(F); + FullySpecialized.insert(F); + } } + diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -31,7 +31,6 @@ initializeDAEPass(Registry); initializeDAHPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); - initializeFunctionSpecializationLegacyPassPass(Registry); initializeGlobalDCELegacyPassPass(Registry); initializeGlobalOptLegacyPassPass(Registry); initializeGlobalSplitPass(Registry); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -147,10 +147,6 @@ cl::desc( "Enable pass to eliminate conditions based on linear constraints.")); -cl::opt EnableFunctionSpecialization( - "enable-function-specialization", cl::init(false), cl::Hidden, - cl::desc("Enable Function Specialization pass")); - cl::opt AttributorRun( "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass."), @@ -622,10 +618,6 @@ if (OptLevel > 2) MPM.add(createCallSiteSplittingPass()); - // Propage constant function arguments by specializing the functions. - if (OptLevel > 2 && EnableFunctionSpecialization) - MPM.add(createFunctionSpecializationPass()); - MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -17,27 +17,36 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/SCCPSolver.h" using namespace llvm; +extern cl::opt SpecializeFunctions; + PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) { const DataLayout &DL = M.getDataLayout(); auto &FAM = AM.getResult(M).getManager(); auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { return FAM.getResult(F); }; + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + auto GetAC = [&FAM](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = FAM.getResult(F); return { std::make_unique(F, DT, FAM.getResult(F)), &DT, FAM.getCachedResult(F), - nullptr}; + SpecializeFunctions ? &FAM.getResult(F) : nullptr }; }; - if (!runIPSCCP(M, DL, GetTLI, getAnalysis)) + if (!runIPSCCP(M, DL, GetTLI, GetTTI, GetAC, getAnalysis)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -69,6 +78,12 @@ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { return this->getAnalysis().getTLI(F); }; + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + auto GetAC = [this](Function &F) -> AssumptionCache & { + return this->getAnalysis().getAssumptionCache(F); + }; auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = this->getAnalysis(F).getDomTree(); @@ -82,13 +97,14 @@ nullptr}; }; - return runIPSCCP(M, DL, GetTLI, getAnalysis); + return runIPSCCP(M, DL, GetTLI, GetTTI, GetAC, getAnalysis); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } }; @@ -108,96 +124,3 @@ // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { return new IPSCCPLegacyPass(); } - -PreservedAnalyses FunctionSpecializationPass::run(Module &M, - ModuleAnalysisManager &AM) { - const DataLayout &DL = M.getDataLayout(); - auto &FAM = AM.getResult(M).getManager(); - auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { - return FAM.getResult(F); - }; - auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { - return FAM.getResult(F); - }; - auto GetAC = [&FAM](Function &F) -> AssumptionCache & { - return FAM.getResult(F); - }; - auto GetAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { - DominatorTree &DT = FAM.getResult(F); - return {std::make_unique( - F, DT, FAM.getResult(F)), - &DT, FAM.getCachedResult(F), - &FAM.getResult(F)}; - }; - - if (!runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserve(); - PA.preserve(); - PA.preserve(); - return PA; -} - -namespace { -struct FunctionSpecializationLegacyPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid - FunctionSpecializationLegacyPass() : ModulePass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - const DataLayout &DL = M.getDataLayout(); - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis().getTTI(F); - }; - auto GetAC = [this](Function &F) -> AssumptionCache & { - return this->getAnalysis().getAssumptionCache(F); - }; - - auto GetAnalysis = [this](Function &F) -> AnalysisResultsForFn { - DominatorTree &DT = - this->getAnalysis(F).getDomTree(); - return { - std::make_unique( - F, DT, - this->getAnalysis().getAssumptionCache( - F)), - nullptr, // We cannot preserve the LI, DT, or PDT with the legacy pass - nullptr, // manager, so set them to nullptr. - nullptr}; - }; - return runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis); - } -}; -} // namespace - -char FunctionSpecializationLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN( - FunctionSpecializationLegacyPass, "function-specialization", - "Propagate constant arguments by specializing the function", false, false) - -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(FunctionSpecializationLegacyPass, "function-specialization", - "Propagate constant arguments by specializing the function", - false, false) - -ModulePass *llvm::createFunctionSpecializationPass() { - return new FunctionSpecializationLegacyPass(); -} diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -94,6 +94,7 @@ Analysis Core InstCombine + IPO Support TransformUtils ) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SCCPSolver.h" @@ -74,6 +75,14 @@ IPNumInstReplaced, "Number of instructions replaced with (simpler) instruction by IPSCCP"); +cl::opt SpecializeFunctions("specialize-functions", cl::init(false), + cl::Hidden, cl::desc("Enable function specialization")); + +static cl::opt FuncSpecializationMaxIters( + "func-specialization-max-iters", cl::Hidden, + cl::desc("The maximum number of iterations function specialization is run"), + cl::init(1)); + // Helper to check if \p LV is either a constant or a constant // range with a single element. This should cover exactly the same cases as the // old ValueLatticeElement::isConstant() and is intended to be used in the @@ -240,7 +249,7 @@ } static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, - DomTreeUpdater &DTU, + DomTreeUpdater *DTU, BasicBlock *&NewUnreachableBB); // runSCCP() - Run the Sparse Conditional Constant Propagation algorithm, @@ -295,7 +304,7 @@ BasicBlock *NewUnreachableBB = nullptr; for (BasicBlock &BB : F) - MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB); + MadeChanges |= removeNonFeasibleEdges(Solver, &BB, &DTU, NewUnreachableBB); for (BasicBlock *DeadBB : BlocksToErase) if (!DeadBB->hasAddressTaken()) @@ -420,7 +429,7 @@ } static bool removeNonFeasibleEdges(const SCCPSolver &Solver, BasicBlock *BB, - DomTreeUpdater &DTU, + DomTreeUpdater *DTU, BasicBlock *&NewUnreachableBB) { SmallPtrSet FeasibleSuccessors; bool HasNonFeasibleEdges = false; @@ -447,12 +456,13 @@ SmallVector Updates; for (BasicBlock *Succ : successors(BB)) { Succ->removePredecessor(BB); - if (SeenSuccs.insert(Succ).second) + if (DTU && SeenSuccs.insert(Succ).second) Updates.push_back({DominatorTree::Delete, BB, Succ}); } TI->eraseFromParent(); new UnreachableInst(BB->getContext(), BB); - DTU.applyUpdatesPermissive(Updates); + if (DTU) + DTU->applyUpdatesPermissive(Updates); } else if (FeasibleSuccessors.size() == 1) { // Replace with an unconditional branch to the only feasible successor. BasicBlock *OnlyFeasibleSuccessor = *FeasibleSuccessors.begin(); @@ -467,12 +477,14 @@ } Succ->removePredecessor(BB); - Updates.push_back({DominatorTree::Delete, BB, Succ}); + if (DTU) + Updates.push_back({DominatorTree::Delete, BB, Succ}); } BranchInst::Create(OnlyFeasibleSuccessor, BB); TI->eraseFromParent(); - DTU.applyUpdatesPermissive(Updates); + if (DTU) + DTU->applyUpdatesPermissive(Updates); } else if (FeasibleSuccessors.size() > 1) { SwitchInstProfUpdateWrapper SI(*cast(TI)); SmallVector Updates; @@ -489,8 +501,10 @@ } SI->setDefaultDest(NewUnreachableBB); - Updates.push_back({DominatorTree::Delete, BB, DefaultDest}); - Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB}); + if (DTU) { + Updates.push_back({DominatorTree::Delete, BB, DefaultDest}); + Updates.push_back({DominatorTree::Insert, BB, NewUnreachableBB}); + } } for (auto CI = SI->case_begin(); CI != SI->case_end();) { @@ -501,12 +515,14 @@ BasicBlock *Succ = CI->getCaseSuccessor(); Succ->removePredecessor(BB); - Updates.push_back({DominatorTree::Delete, BB, Succ}); + if (DTU) + Updates.push_back({DominatorTree::Delete, BB, Succ}); SI.removeCase(CI); // Don't increment CI, as we removed a case. } - DTU.applyUpdatesPermissive(Updates); + if (DTU) + DTU->applyUpdatesPermissive(Updates); } else { llvm_unreachable("Must have at least one feasible successor"); } @@ -516,8 +532,11 @@ bool llvm::runIPSCCP( Module &M, const DataLayout &DL, std::function GetTLI, + std::function GetTTI, + std::function GetAC, function_ref getAnalysis) { SCCPSolver Solver(DL, GetTLI, M.getContext()); + FunctionSpecializer Specializer(Solver, M, GetTLI, GetTTI, GetAC); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. @@ -556,21 +575,16 @@ Solver.trackValueOfGlobalVariable(&G); } + bool MadeChanges = false; + // Solve for constants. - bool ResolvedUndefs = true; - Solver.solve(); - while (ResolvedUndefs) { - LLVM_DEBUG(dbgs() << "RESOLVING UNDEFS\n"); - ResolvedUndefs = false; - for (Function &F : M) { - if (Solver.resolvedUndefsIn(F)) - ResolvedUndefs = true; - } - if (ResolvedUndefs) - Solver.solve(); - } + Solver.solveWhileResolvedUndefsIn(M); - bool MadeChanges = false; + if (SpecializeFunctions) { + unsigned Iters = 0; + while (Iters++ < FuncSpecializationMaxIters && + Specializer.specialize()); + } // Iterate over all of the instructions in the module, replacing them with // constants if we have found them to be of constant values. @@ -627,26 +641,36 @@ IPNumInstRemoved, IPNumInstReplaced); } - DomTreeUpdater DTU = Solver.getDTU(F); + Optional OptDTU = + SpecializeFunctions && Specializer.isClonedFunction(&F) ? + None : Optional(Solver.getDTU(F)); + + DomTreeUpdater *DTU = OptDTU ? &*OptDTU : nullptr; + // Change dead blocks to unreachable. We do it after replacing constants // in all executable blocks, because changeToUnreachable may remove PHI // nodes in executable blocks we found values for. The function's entry // block is not part of BlocksToErase, so we have to handle it separately. for (BasicBlock *BB : BlocksToErase) { NumInstRemoved += changeToUnreachable(BB->getFirstNonPHI(), - /*PreserveLCSSA=*/false, &DTU); + /*PreserveLCSSA=*/false, DTU); } if (!Solver.isBlockExecutable(&F.front())) NumInstRemoved += changeToUnreachable(F.front().getFirstNonPHI(), - /*PreserveLCSSA=*/false, &DTU); + /*PreserveLCSSA=*/false, DTU); BasicBlock *NewUnreachableBB = nullptr; for (BasicBlock &BB : F) MadeChanges |= removeNonFeasibleEdges(Solver, &BB, DTU, NewUnreachableBB); - for (BasicBlock *DeadBB : BlocksToErase) - if (!DeadBB->hasAddressTaken()) - DTU.deleteBB(DeadBB); + for (BasicBlock *DeadBB : BlocksToErase) { + if (!DeadBB->hasAddressTaken()) { + if (DTU) + DTU->deleteBB(DeadBB); + else + DeadBB->eraseFromParent(); + } + } for (BasicBlock &BB : F) { for (Instruction &Inst : llvm::make_early_inc_range(BB)) { diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -461,6 +461,26 @@ for (auto &BB : *F) BBExecutable.erase(&BB); } + + void solveWhileResolvedUndefsIn(Module &M) { + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + solve(); + ResolvedUndefs = false; + for (Function &F : M) + ResolvedUndefs |= resolvedUndefsIn(F); + } + } + + void solveWhileResolvedUndefsIn(SmallVectorImpl &WorkList) { + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + solve(); + ResolvedUndefs = false; + for (Function *F : WorkList) + ResolvedUndefs |= resolvedUndefsIn(*F); + } + } }; } // namespace llvm @@ -1507,6 +1527,9 @@ } } + LLVM_DEBUG(if (MadeChange) + dbgs() << "\nResolved undefs in " << F.getName() << '\n'); + return MadeChange; } @@ -1570,6 +1593,14 @@ return Visitor->resolvedUndefsIn(F); } +void SCCPSolver::solveWhileResolvedUndefsIn(Module &M) { + Visitor->solveWhileResolvedUndefsIn(M); +} + +void SCCPSolver::solveWhileResolvedUndefsIn(SmallVectorImpl &WorkList) { + Visitor->solveWhileResolvedUndefsIn(WorkList); +} + bool SCCPSolver::isBlockExecutable(BasicBlock *BB) const { return Visitor->isBlockExecutable(BB); } diff --git a/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll b/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll --- a/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll +++ b/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -S < %s | FileCheck %s %mystruct = type { i32, [2 x i64] } @@ -8,17 +8,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: br i1 true, label [[FOR_COND2:%.*]], label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: call void @callee(%mystruct* nonnull null) -; CHECK-NEXT: br label [[FOR_COND]] +; CHECK-NEXT: br label [[FOR_COND2:%.*]] ; CHECK: for.cond2: -; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_BODY2:%.*]] +; CHECK-NEXT: br label [[FOR_BODY2:%.*]] ; CHECK: for.body2: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[MYSTRUCT:%.*]], %mystruct* null, i64 0, i32 1, i64 3 ; CHECK-NEXT: br label [[FOR_COND2]] -; CHECK: for.end: -; CHECK-NEXT: ret %mystruct* [[ARG:%.*]] ; entry: br label %for.cond @@ -48,7 +42,7 @@ ; CHECK-LABEL: @caller( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call %mystruct* @myfunc(%mystruct* undef) -; CHECK-NEXT: ret %mystruct* [[CALL]] +; CHECK-NEXT: ret %mystruct* undef ; entry: %call = call %mystruct* @myfunc(%mystruct* undef) @@ -56,3 +50,4 @@ } declare void @callee(%mystruct*) + diff --git a/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll b/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll --- a/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll +++ b/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-max-iters=2 -func-specialization-size-threshold=20 -func-specialization-avg-iters-cost=20 -function-specialization-for-literal-constant=true -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -func-specialization-max-clones=1 -function-specialization-for-literal-constant=true -S < %s | FileCheck %s declare hidden i1 @compare(ptr) align 2 declare hidden { i8, ptr } @getType(ptr) align 2 diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s ; CHECK-NOT: foo.{{[0-9]+}} @@ -58,4 +58,4 @@ return: %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] ret i32 %retval.0 -} \ No newline at end of file +} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll @@ -4,7 +4,7 @@ ; Note that this test case shows that function specialization pass would ; transform the function even if no specialization happened. -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s %struct = type { i8, i16, i32, i64, i64} @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4} @@ -18,8 +18,7 @@ define internal i64 @func(i64 *%x, i64 (i64*)* %binop) { ; CHECK-LABEL: @func( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 [[BINOP:%.*]](i64* [[X:%.*]]) -; CHECK-NEXT: ret i64 [[TMP0]] +; CHECK-NEXT: unreachable ; entry: %tmp0 = call i64 %binop(i64* %x) diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that we don't crash and specialise on a constant expression. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s define i32 @main() { ; CHECK-LABEL: @main( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that we don't crash and specialise on a function call with byval attribute. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -force-function-specialization -func-specialization-on-address -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-on-address -S < %s | FileCheck %s ; Check that we don't crash and specialise on a scalar global variable with byval attribute. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s ; Check that the literal constant parameter could be specialized. ; CHECK: @foo.1( @@ -41,4 +41,4 @@ %retval.2 = call i32 @foo(i1 0) %retval = add nsw i32 %retval.1, %retval.2 ret i32 %retval -} \ No newline at end of file +} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-avg-iters-cost=5 -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-avg-iters-cost=5 -func-specialization-size-threshold=10 -S < %s | FileCheck %s ; Check that the loop depth results in a larger specialization bonus. ; CHECK: @foo.1( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -S < %s | FileCheck %s ; CHECK-NOT: @compute.1 ; CHECK-NOT: @compute.2 diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s ; Checks for callsites that have been annotated with MinSize. No specialisation ; expected here: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s ; Checks for callsites that have been annotated with MinSize. We only expect ; specialisation for the call that does not have the attribute: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Function @foo has function attribute 'noduplicate', so check that we don't ; specialize it: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that function foo does not gets specialised as it contains an intrinsic ; that is marked as NoDuplicate. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; The if.then block is not executed, so check that we don't specialise here. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s -; RUN: opt -function-specialization -force-function-specialization -func-specialization-on-address=0 -S < %s | FileCheck %s -; RUN: opt -function-specialization -force-function-specialization -func-specialization-on-address=1 -S < %s | FileCheck %s --check-prefix=ON-ADDRESS +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-on-address=0 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-on-address=1 -S < %s | FileCheck %s --check-prefix=ON-ADDRESS ; Global B is not constant. We do not specialise on addresses unless we ; enable that: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nothing-todo.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nothing-todo.ll deleted file mode 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nothing-todo.ll +++ /dev/null @@ -1,51 +0,0 @@ -; REQUIRES: asserts -; RUN: opt -function-specialization -debug -S < %s 2>&1 | FileCheck %s - -; The purpose of this test is to check that we don't run the solver as there's -; nothing to do here. For a test that doesn't trigger function specialisation, -; it is intentionally 'big' because we also want to check that the ssa.copy -; intrinsics that are introduced by the solver are cleaned up if we bail -; early. Thus, first check the debug messages for the introduction of these -; intrinsics: - -; CHECK: FnSpecialization: Analysing decl: foo -; CHECK: Found replacement{{.*}} call i32 @llvm.ssa.copy.i32 -; CHECK: Found replacement{{.*}} call i32 @llvm.ssa.copy.i32 - -; Then, make sure the solver didn't run: - -; CHECK-NOT: Running solver - -; Finally, check the absence and thus removal of these intrinsics: - -; CHECK-LABEL: @foo -; CHECK-NOT: call i32 @llvm.ssa.copy.i32 - -@N = external dso_local global i32, align 4 -@B = external dso_local global i32*, align 8 -@A = external dso_local global i32*, align 8 - -define dso_local i32 @foo() { -entry: - br label %for.cond - -for.cond: - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %0 = load i32, i32* @N, align 4 - %cmp = icmp slt i32 %i.0, %0 - br i1 %cmp, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: - ret i32 undef - -for.body: - %1 = load i32*, i32** @B, align 8 - %idxprom = sext i32 %i.0 to i64 - %arrayidx = getelementptr inbounds i32, i32* %1, i64 %idxprom - %2 = load i32, i32* %arrayidx, align 4 - %3 = load i32*, i32** @A, align 8 - %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 %idxprom - store i32 %2, i32* %arrayidx2, align 4 - %inc = add nsw i32 %i.0, 1 - br label %for.cond -} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that we don't crash and specialise on a poison value. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll @@ -1,6 +1,6 @@ -; RUN: opt -function-specialization -force-function-specialization -func-specialization-max-iters=2 -inline -instcombine -S < %s | FileCheck %s --check-prefix=ITERS2 -; RUN: opt -function-specialization -force-function-specialization -func-specialization-max-iters=3 -inline -instcombine -S < %s | FileCheck %s --check-prefix=ITERS3 -; RUN: opt -function-specialization -force-function-specialization -func-specialization-max-iters=4 -inline -instcombine -S < %s | FileCheck %s --check-prefix=ITERS4 +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -inline -instcombine -S < %s | FileCheck %s --check-prefix=ITERS2 +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=3 -inline -instcombine -S < %s | FileCheck %s --check-prefix=ITERS3 +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=4 -inline -instcombine -S < %s | FileCheck %s --check-prefix=ITERS4 @low = internal constant i32 0, align 4 @high = internal constant i32 6, align 4 diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s ; Volatile store preventing recursive specialisation: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s ; Duplicate store preventing recursive specialisation: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s ; Alloca is not an integer type: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -stats -function-specialization -S -force-function-specialization < %s 2>&1 | FileCheck %s +; RUN: opt -stats -ipsccp -specialize-functions -S -force-function-specialization < %s 2>&1 | FileCheck %s ; CHECK: 2 function-specialization - Number of functions specialized diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -deadargelim -force-function-specialization -S < %s | FileCheck %s -; RUN: opt -function-specialization -func-specialization-max-iters=1 -deadargelim -force-function-specialization -S < %s | FileCheck %s -; RUN: opt -function-specialization -func-specialization-max-iters=0 -deadargelim -force-function-specialization -S < %s | FileCheck %s --check-prefix=DISABLED -; RUN: opt -function-specialization -func-specialization-avg-iters-cost=1 -deadargelim -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -deadargelim -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-max-iters=1 -deadargelim -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-max-iters=0 -deadargelim -force-function-specialization -S < %s | FileCheck %s --check-prefix=DISABLED +; RUN: opt -ipsccp -specialize-functions -func-specialization-avg-iters-cost=1 -deadargelim -force-function-specialization -S < %s | FileCheck %s ; DISABLED-NOT: @func.1( ; DISABLED-NOT: @func.2( @@ -43,10 +43,11 @@ } define i32 @main(i32* %0, i32 %1) { -; CHECK: [[TMP3:%.*]] = call i32 @func.2(i32* [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; CHECK: call void @func.2(i32* [[TMP0:%.*]], i32 [[TMP1:%.*]]) %3 = call i32 @func(i32* %0, i32 %1, void (i32*)* nonnull @increment) -; CHECK: [[TMP4:%.*]] = call i32 @func.1(i32* [[TMP0]], i32 [[TMP3]]) +; CHECK: call void @func.1(i32* [[TMP0]], i32 0) %4 = call i32 @func(i32* %0, i32 %3, void (i32*)* nonnull @decrement) +; CHECK: ret i32 0 ret i32 %4 } @@ -63,10 +64,10 @@ ; CHECK: call void @decrement(i32* [[TMP9]]) ; CHECK: [[TMP10:%.*]] = load i32, i32* [[TMP3]], align 4 ; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 -; CHECK: [[TMP12:%.*]] = call i32 @func.1(i32* [[TMP0]], i32 [[TMP11]]) -; CHECK: br label [[TMP13]] -; CHECK: 13: -; CHECK: ret i32 0 +; CHECK: call void @func.1(i32* [[TMP0]], i32 [[TMP11]]) +; CHECK: br label [[TMP12:%.*]] +; CHECK: 12: +; CHECK: ret void ; ; ; CHECK: @func.2( @@ -82,6 +83,7 @@ ; CHECK: call void @increment(i32* [[TMP9]]) ; CHECK: [[TMP10:%.*]] = load i32, i32* [[TMP3]], align 4 ; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 -; CHECK: [[TMP12:%.*]] = call i32 @func.2(i32* [[TMP0]], i32 [[TMP11]]) -; CHECK: br label [[TMP13]] -; CHECK: ret i32 0 +; CHECK: call void @func.2(i32* [[TMP0]], i32 [[TMP11]]) +; CHECK: br label [[TMP12:%.*]] +; CHECK: 12: +; CHECK: ret void diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll @@ -1,8 +1,8 @@ -; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -S < %s | \ +; RUN: opt -ipsccp -specialize-functions -func-specialization-avg-iters-cost=3 -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,DISABLED -; RUN: opt -function-specialization -force-function-specialization -S < %s | \ +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,FORCE -; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -force-function-specialization -S < %s | \ +; RUN: opt -ipsccp -specialize-functions -func-specialization-avg-iters-cost=3 -force-function-specialization -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,FORCE ; Test for specializing a constant global. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -1,7 +1,7 @@ -; RUN: opt -function-specialization -force-function-specialization \ +; RUN: opt -ipsccp -specialize-functions -force-function-specialization \ ; RUN: -func-specialization-max-clones=2 -S < %s | FileCheck %s -; RUN: opt -function-specialization -force-function-specialization \ +; RUN: opt -ipsccp -specialize-functions -force-function-specialization \ ; RUN: -func-specialization-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; There's nothing to specialize here as both calls are the same, so check that: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll --- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll +++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp -specialize-functions < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 @@ -11,6 +11,37 @@ ret i32 %sub } +; CHECK-LABEL: define dso_local i32 @f0 +; CHECK: tail call fastcc i32 @g.[[#A:]]({{.*}}@p0) +; +define dso_local i32 @f0(i32 noundef %x) { +entry: + %call = tail call fastcc i32 @g(i32 noundef %x, ptr noundef nonnull @p0) + ret i32 %call +} + +; CHECK-LABEL: define dso_local i32 @f1 +; CHECK: tail call fastcc i32 @g.[[#B:]]({{.*}}@p1) +; +define dso_local i32 @f1(i32 noundef %x) { +entry: + %call = tail call fastcc i32 @g(i32 noundef %x, ptr noundef nonnull @p1) + ret i32 %call +} + +; @g gets fully specialized +; CHECK-NOT: define internal fastcc i32 @g( + +define internal fastcc i32 @g(i32 noundef %x, ptr nocapture noundef readonly %p) noinline { +entry: + %pcall = tail call i32 %p(i32 noundef %x) + %fcall = tail call fastcc i32 @f(i32 noundef %pcall, ptr noundef nonnull %p) + ret i32 %fcall +} + +; CHECK-LABEL: define dso_local i32 @g0 +; CHECK: tail call fastcc i32 @f.[[#C:]]({{.*}}@p0) +; define dso_local i32 @g0(i32 noundef %x) { entry: %call = tail call fastcc i32 @f(i32 noundef %x, ptr noundef nonnull @p0) @@ -24,6 +55,9 @@ ret i32 %add } +; CHECK-LABEL: define dso_local i32 @g1 +; CHECK: tail call fastcc i32 @f.[[#D:]]({{.*}}@p1) +; define dso_local i32 @g1(i32 noundef %x) { entry: %call = tail call fastcc i32 @f(i32 noundef %x, ptr noundef nonnull @p1) @@ -38,5 +72,11 @@ ; Check that a single argument, that cannot be used for specialisation, does not ; prevent specialisation based on other arguments. -; CHECK: @f.1 -; CHECK: @f.2 +; +; Also check that for callsites which reside in the body of newly created +; (specialized) functions, the lattice value of the arguments is known. +; +; CHECK-DAG: define internal fastcc i32 @g.[[#A]] +; CHECK-DAG: define internal fastcc i32 @g.[[#B]] +; CHECK-DAG: define internal fastcc i32 @f.[[#C]] +; CHECK-DAG: define internal fastcc i32 @f.[[#D]] diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll --- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll +++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s define i64 @main(i64 %x, i64 %y, i1 %flag) { ; CHECK-LABEL: @main( @@ -70,7 +70,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP0:%.*]] = call i64 @minus(i64 [[X:%.*]], i64 [[Y:%.*]]) ; CHECK-NEXT: [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @plus) +; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @plus) ; CHECK-LABEL: @compute.3 ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll --- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll +++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll @@ -1,6 +1,6 @@ -; RUN: opt -S --passes=function-specialization \ +; RUN: opt -S --passes=ipsccp -specialize-functions \ ; RUN: -force-function-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT -; RUN: opt -S --passes=function-specialization \ +; RUN: opt -S --passes=ipsccp -specialize-functions \ ; RUN: -function-specialization-for-literal-constant \ ; RUN: -force-function-specialization < %s | FileCheck %s -check-prefix CHECK-LIT diff --git a/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll b/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll --- a/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll +++ b/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization -force-function-specialization -function-specialization-for-literal-constant < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp -specialize-functions -force-function-specialization -function-specialization-for-literal-constant < %s | FileCheck %s define internal i32 @f(i32 %x, i32 %y) noinline { ret i32 %x } @@ -17,4 +17,4 @@ ; to be a constant without the need for function specialisation and ; the second parameter is unused. -; CHECK-NOT: @f. \ No newline at end of file +; CHECK-NOT: @f. diff --git a/llvm/test/Transforms/FunctionSpecialization/noinline.ll b/llvm/test/Transforms/FunctionSpecialization/noinline.ll --- a/llvm/test/Transforms/FunctionSpecialization/noinline.ll +++ b/llvm/test/Transforms/FunctionSpecialization/noinline.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp -specialize-functions < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll --- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll +++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll --- a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization,deadargelim -force-function-specialization < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp,deadargelim -specialize-functions -force-function-specialization < %s | FileCheck %s define dso_local i32 @add(i32 %x, i32 %y) { entry: %add = add nsw i32 %y, %x diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll --- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -function-specialization -func-specialization-max-clones=0 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE -; RUN: opt -function-specialization -func-specialization-max-clones=1 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=ONE -; RUN: opt -function-specialization -func-specialization-max-clones=2 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO -; RUN: opt -function-specialization -func-specialization-max-clones=3 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE +; RUN: opt -ipsccp -specialize-functions -func-specialization-max-clones=0 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -ipsccp -specialize-functions -func-specialization-max-clones=1 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=ONE +; RUN: opt -ipsccp -specialize-functions -func-specialization-max-clones=2 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -ipsccp -specialize-functions -func-specialization-max-clones=3 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE ; Make sure that we iterate correctly after sorting the specializations: ; FnSpecialization: Specializations for function compute diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -6,6 +6,7 @@ "//llvm/lib/IR", "//llvm/lib/Support", "//llvm/lib/Transforms/AggressiveInstCombine", + "//llvm/lib/Transforms/IPO", "//llvm/lib/Transforms/InstCombine", "//llvm/lib/Transforms/Utils", ]