diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -146,7 +146,6 @@ void initializeFloat2IntLegacyPassPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); void initializeFuncletLayoutPass(PassRegistry&); -void initializeFunctionSpecializationLegacyPassPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -217,7 +217,6 @@ (void) llvm::createInjectTLIMappingsLegacyPass(); (void) llvm::createUnifyLoopExitsPass(); (void) llvm::createFixIrreduciblePass(); - (void)llvm::createFunctionSpecializationPass(); (void)llvm::createSelectOptimizePass(); (void)new llvm::IntervalPartition(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -146,11 +146,6 @@ /// ModulePass *createIPSCCPPass(); -//===----------------------------------------------------------------------===// -/// createFunctionSpecializationPass - This pass propagates constants from call -/// sites to the specialized version of the callee function. -ModulePass *createFunctionSpecializationPass(); - //===----------------------------------------------------------------------===// // /// createLoopExtractorPass - This pass extracts all natural loops from the diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -0,0 +1,174 @@ +//===- FunctionSpecialization.h - Function Specialization -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This specialises functions with constant parameters. Constant parameters +// like function pointers and constant globals are propagated to the callee by +// specializing the function. The main benefit of this pass at the moment is +// that indirect calls are transformed into direct calls, which provides inline +// opportunities that the inliner would not have been able to achieve. That's +// why function specialisation is run before the inliner in the optimisation +// pipeline; that is by design. Otherwise, we would only benefit from constant +// passing, which is a valid use-case too, but hasn't been explored much in +// terms of performance uplifts, cost-model and compile-time impact. +// +// Current limitations: +// - It does not yet handle integer ranges. We do support "literal constants", +// but that's off by default under an option. +// - The cost-model could be further looked into (it mainly focuses on inlining +// benefits), +// +// Ideas: +// - With a function specialization attribute for arguments, we could have +// a direct way to steer function specialization, avoiding the cost-model, +// and thus control compile-times / code-size. +// +// Todos: +// - Specializing recursive functions relies on running the transformation a +// number of times, which is controlled by option +// `func-specialization-max-iters`. Thus, increasing this value and the +// number of iterations, will linearly increase the number of times recursive +// functions get specialized, see also the discussion in +// https://reviews.llvm.org/D106426 for details. Perhaps there is a +// compile-time friendlier way to control/limit the number of specialisations +// for recursive functions. +// - Don't transform the function if function specialization does not trigger; +// the SCCPSolver may make IR changes. +// +// References: +// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable +// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H +#define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H + +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SCCPSolver.h" +#include "llvm/Transforms/Utils/SizeOpts.h" + +using namespace llvm; + +namespace llvm { +// Bookkeeping struct to pass data from the analysis and profitability phase +// to the actual transform helper functions. +struct SpecializationInfo { + SmallVector Args; // Stores the {formal,actual} argument pairs. + InstructionCost Gain; // Profitability: Gain = Bonus - Cost. + Function *Clone; // The definition of the specialized function. +}; + +using CallSpecBinding = std::pair; +// We are using MapVector because it guarantees deterministic iteration +// order across executions. +using SpecializationMap = SmallMapVector; + +class FunctionSpecializer { + + /// The IPSCCP Solver. + SCCPSolver &Solver; + + Module &M; + + /// Analysis manager, needed to invalidate analyses. + FunctionAnalysisManager *FAM; + + /// Analyses used to help determine if a function should be specialized. + std::function GetTLI; + std::function GetTTI; + std::function GetAC; + + // The number of functions specialised, used for collecting statistics and + // also in the cost model. + unsigned NbFunctionsSpecialized = 0; + + SmallPtrSet SpecializedFuncs; + SmallPtrSet FullySpecialized; + DenseMap FunctionMetrics; + +public: + FunctionSpecializer( + SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM, + std::function GetTLI, + std::function GetTTI, + std::function GetAC) + : Solver(Solver), M(M), FAM(FAM), GetTLI(GetTLI), GetTTI(GetTTI), + GetAC(GetAC) {} + + ~FunctionSpecializer() { + // Eliminate dead code. + removeDeadFunctions(); + cleanUpSSA(); + } + + bool isClonedFunction(Function *F) { return SpecializedFuncs.count(F); } + + bool run(); + +private: + Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call); + + /// A constant stack value is an AllocaInst that has a single constant + /// value stored to it. Return this constant if such an alloca stack value + /// is a function argument. + Constant *getConstantStackValue(CallInst *Call, Value *Val); + + /// Iterate over the argument tracked functions see if there + /// are any new constant values for the call instruction via + /// stack variables. + void promoteConstantStackValues(); + + /// Clean up fully specialized functions. + void removeDeadFunctions(); + + /// Remove any ssa_copy intrinsics that may have been introduced. + void cleanUpSSA(); + + // Compute the code metrics for function \p F. + CodeMetrics &analyzeFunction(Function *F); + + /// This function decides whether it's worthwhile to specialize function + /// \p F based on the known constant values its arguments can take on. It + /// only discovers potential specialization opportunities without actually + /// applying them. + /// + /// \returns true if any specializations have been found. + bool findSpecializations(Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList); + + bool isCandidateFunction(Function *F); + + Function *createSpecialization(Function *F, CallSpecBinding &Specialization); + + /// Compute and return the cost of specializing function \p F. + InstructionCost getSpecializationCost(Function *F); + + /// Compute a bonus for replacing argument \p A with constant \p C. + InstructionCost getSpecializationBonus(Argument *A, Constant *C, + const LoopInfo &LI); + + /// Determine if it is possible to specialise the function for constant values + /// of the formal parameter \p A. + bool isArgumentInteresting(Argument *A); + + /// Check if the value \p V (an actual argument) is a constant or can only + /// have a constant value. Return that constant. + Constant *getCandidateConstant(Value *V); + + /// Redirects callsites of function \p F to its specialized copies. + void updateCallSites(Function *F, + SmallVectorImpl &Specializations); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H diff --git a/llvm/include/llvm/Transforms/IPO/SCCP.h b/llvm/include/llvm/Transforms/IPO/SCCP.h --- a/llvm/include/llvm/Transforms/IPO/SCCP.h +++ b/llvm/include/llvm/Transforms/IPO/SCCP.h @@ -32,14 +32,6 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -/// Pass to perform interprocedural constant propagation by specializing -/// functions -class FunctionSpecializationPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); -}; - } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_SCCP_H diff --git a/llvm/include/llvm/Transforms/Scalar/SCCP.h b/llvm/include/llvm/Transforms/Scalar/SCCP.h --- a/llvm/include/llvm/Transforms/Scalar/SCCP.h +++ b/llvm/include/llvm/Transforms/Scalar/SCCP.h @@ -40,12 +40,6 @@ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -bool runFunctionSpecialization( - Module &M, FunctionAnalysisManager *FAM, const DataLayout &DL, - std::function GetTLI, - std::function GetTTI, - std::function GetAC, - function_ref GetAnalysis); } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_SCCP_H diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -118,6 +118,10 @@ /// should be rerun. bool resolvedUndefsIn(Function &F); + void solveWhileResolvedUndefsIn(Module &M); + + void solveWhileResolvedUndefsIn(SmallVectorImpl &WorkList); + bool isBlockExecutable(BasicBlock *BB) const; // isEdgeFeasible - Return true if the control flow edge from the 'From' basic diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -267,10 +267,6 @@ cl::desc( "Enable pass to eliminate conditions based on linear constraints")); -static cl::opt EnableFunctionSpecialization( - "enable-function-specialization", cl::init(false), cl::Hidden, - cl::desc("Enable Function Specialization pass")); - static cl::opt AttributorRun( "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), @@ -1016,10 +1012,6 @@ for (auto &C : PipelineEarlySimplificationEPCallbacks) C(MPM, Level); - // Specialize functions with IPSCCP. - if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) - MPM.addPass(FunctionSpecializationPass()); - // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in @@ -1628,8 +1620,6 @@ MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); - if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) - MPM.addPass(FunctionSpecializationPass()); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -59,7 +59,6 @@ MODULE_PASS("extract-blocks", BlockExtractorPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) -MODULE_PASS("function-specialization", FunctionSpecializationPass()) MODULE_PASS("globaldce", GlobalDCEPass()) MODULE_PASS("globalopt", GlobalOptPass()) MODULE_PASS("globalsplit", GlobalSplitPass()) diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -45,6 +45,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InlineCost.h" @@ -70,11 +71,6 @@ cl::desc("Force function specialization for every call site with a " "constant argument")); -static cl::opt FuncSpecializationMaxIters( - "func-specialization-max-iters", cl::Hidden, - cl::desc("The maximum number of iterations function specialization is run"), - cl::init(1)); - static cl::opt MaxClonesThreshold( "func-specialization-max-clones", cl::Hidden, cl::desc("The maximum number of clones allowed for a single function " @@ -97,9 +93,6 @@ cl::desc("Enable function specialization on the address of global values")); // Disabled by default as it can significantly increase compilation times. -// Running nikic's compile time tracker on x86 with instruction count as the -// metric shows 3-4% regression for SPASS while being neutral for all other -// benchmarks of the llvm test suite. // // https://llvm-compile-time-tracker.com // https://github.com/nikic/llvm-compile-time-tracker @@ -108,23 +101,8 @@ cl::desc("Enable specialization of functions that take a literal constant " "as an argument.")); -namespace { -// Bookkeeping struct to pass data from the analysis and profitability phase -// to the actual transform helper functions. -struct SpecializationInfo { - SmallVector Args; // Stores the {formal,actual} argument pairs. - InstructionCost Gain; // Profitability: Gain = Bonus - Cost. -}; -} // Anonymous namespace - -using FuncList = SmallVectorImpl; -using CallArgBinding = std::pair; -using CallSpecBinding = std::pair; -// We are using MapVector because it guarantees deterministic iteration -// order across executions. -using SpecializationMap = SmallMapVector; - -static Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) { +Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca, + CallInst *Call) { Value *StoreValue = nullptr; for (auto *User : Alloca->users()) { // We can't use llvm::isAllocaPromotable() as that would fail because of @@ -147,14 +125,14 @@ // Bail if there is any other unknown usage. return nullptr; } - return dyn_cast_or_null(StoreValue); + return getCandidateConstant(StoreValue); } // A constant stack value is an AllocaInst that has a single constant // value stored to it. Return this constant if such an alloca stack value // is a function argument. -static Constant *getConstantStackValue(CallInst *Call, Value *Val, - SCCPSolver &Solver) { +Constant *FunctionSpecializer::getConstantStackValue(CallInst *Call, + Value *Val) { if (!Val) return nullptr; Val = Val->stripPointerCasts(); @@ -187,19 +165,23 @@ // ret void // } // -static void constantArgPropagation(FuncList &WorkList, Module &M, - SCCPSolver &Solver) { +void FunctionSpecializer::promoteConstantStackValues() { // Iterate over the argument tracked functions see if there // are any new constant values for the call instruction via // stack variables. - for (auto *F : WorkList) { + for (Function &F : M) { + if (!Solver.isArgumentTrackedFunction(&F)) + continue; - for (auto *User : F->users()) { + for (auto *User : F.users()) { auto *Call = dyn_cast(User); if (!Call) continue; + if (!Solver.isBlockExecutable(Call->getParent())) + continue; + bool Changed = false; for (const Use &U : Call->args()) { unsigned Idx = Call->getArgOperandNo(&U); @@ -209,7 +191,7 @@ if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy()) continue; - auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver); + auto *ConstVal = getConstantStackValue(Call, ArgOp); if (!ConstVal) continue; @@ -231,7 +213,7 @@ } // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics -// interfere with the constantArgPropagation optimization. +// interfere with the promoteConstantStackValues() optimization. static void removeSSACopy(Function &F) { for (BasicBlock &BB : F) { for (Instruction &Inst : llvm::make_early_inc_range(BB)) { @@ -246,691 +228,475 @@ } } -static void removeSSACopy(Module &M) { - for (Function &F : M) - removeSSACopy(F); +/// Remove any ssa_copy intrinsics that may have been introduced. +void FunctionSpecializer::cleanUpSSA() { + for (Function *F : SpecializedFuncs) + removeSSACopy(*F); } -namespace { -class FunctionSpecializer { - - /// The IPSCCP Solver. - SCCPSolver &Solver; - - /// Analysis manager, needed to invalidate analyses. - FunctionAnalysisManager *FAM; +/// Attempt to specialize functions in the module to enable constant +/// propagation across function boundaries. +/// +/// \returns true if at least one function is specialized. +bool FunctionSpecializer::run() { + bool Changed = false; - /// Analyses used to help determine if a function should be specialized. - std::function GetAC; - std::function GetTTI; - std::function GetTLI; + for (Function &F : M) { + if (!isCandidateFunction(&F)) + continue; - SmallPtrSet SpecializedFuncs; - SmallPtrSet FullySpecialized; - SmallVector ReplacedWithConstant; - DenseMap FunctionMetrics; + auto Cost = getSpecializationCost(&F); + if (!Cost.isValid()) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost.\n"); + continue; + } -public: - FunctionSpecializer(SCCPSolver &Solver, FunctionAnalysisManager *FAM, - std::function GetAC, - std::function GetTTI, - std::function GetTLI) - : Solver(Solver), FAM(FAM), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) { - } + LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " + << F.getName() << " is " << Cost << "\n"); - ~FunctionSpecializer() { - // Eliminate dead code. - removeDeadInstructions(); - removeDeadFunctions(); - } + SmallVector Specializations; + if (!findSpecializations(&F, Cost, Specializations)) { + LLVM_DEBUG( + dbgs() << "FnSpecialization: No possible specializations found\n"); + continue; + } - /// Attempt to specialize functions in the module to enable constant - /// propagation across function boundaries. - /// - /// \returns true if at least one function is specialized. - bool specializeFunctions(FuncList &Candidates, FuncList &WorkList) { - bool Changed = false; - for (auto *F : Candidates) { - if (!isCandidateFunction(F)) - continue; + Changed = true; - auto Cost = getSpecializationCost(F); - if (!Cost.isValid()) { - LLVM_DEBUG( - dbgs() << "FnSpecialization: Invalid specialization cost.\n"); - continue; - } + SmallVector Clones; + for (CallSpecBinding &Specialization : Specializations) + Clones.push_back(createSpecialization(&F, Specialization)); - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " - << F->getName() << " is " << Cost << "\n"); + Solver.solveWhileResolvedUndefsIn(Clones); + updateCallSites(&F, Specializations); + } - SmallVector Specializations; - if (!findSpecializations(F, Cost, Specializations)) { - LLVM_DEBUG( - dbgs() << "FnSpecialization: No possible specializations found\n"); - continue; - } + promoteConstantStackValues(); - Changed = true; - for (auto &Entry : Specializations) - specializeFunction(F, Entry.second, WorkList); - } + LLVM_DEBUG(if (NbFunctionsSpecialized) dbgs() + << "FnSpecialization: Specialized " << NbFunctionsSpecialized + << " functions in module " << M.getName() << "\n"); - updateSpecializedFuncs(Candidates, WorkList); - NumFuncSpecialized += NbFunctionsSpecialized; - return Changed; - } + NumFuncSpecialized += NbFunctionsSpecialized; + return Changed; +} - void removeDeadInstructions() { - for (auto *I : ReplacedWithConstant) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead instruction " << *I - << "\n"); - I->eraseFromParent(); - } - ReplacedWithConstant.clear(); +void FunctionSpecializer::removeDeadFunctions() { + for (Function *F : FullySpecialized) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " + << F->getName() << "\n"); + if (FAM) + FAM->clear(*F, F->getName()); + F->eraseFromParent(); } + FullySpecialized.clear(); +} - void removeDeadFunctions() { - for (auto *F : FullySpecialized) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function " - << F->getName() << "\n"); - if (FAM) - FAM->clear(*F, F->getName()); - F->eraseFromParent(); - } - FullySpecialized.clear(); +// Compute the code metrics for function \p F. +CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) { + auto I = FunctionMetrics.insert({F, CodeMetrics()}); + CodeMetrics &Metrics = I.first->second; + if (I.second) { + // The code metrics were not cached. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); + for (BasicBlock &BB : *F) + Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " + << F->getName() << " is " << Metrics.NumInsts + << " instructions\n"); } + return Metrics; +} - bool tryToReplaceWithConstant(Value *V) { - if (!V->getType()->isSingleValueType() || isa(V) || - V->user_empty()) - return false; - - const ValueLatticeElement &IV = Solver.getLatticeValueFor(V); - if (isOverdefined(IV)) - return false; - auto *Const = - isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType()); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing " << *V - << "\nFnSpecialization: with " << *Const << "\n"); - - // Record uses of V to avoid visiting irrelevant uses of const later. - SmallVector UseInsts; - for (auto *U : V->users()) - if (auto *I = dyn_cast(U)) - if (Solver.isBlockExecutable(I->getParent())) - UseInsts.push_back(I); - - V->replaceAllUsesWith(Const); - - for (auto *I : UseInsts) - Solver.visit(I); - - // Remove the instruction from Block and Solver. - if (auto *I = dyn_cast(V)) { - if (I->isSafeToRemove()) { - ReplacedWithConstant.push_back(I); - Solver.removeLatticeValueFor(I); - } - } - return true; - } +/// Clone the function \p F and remove the ssa_copy intrinsics added by +/// the SCCPSolver in the cloned version. +static Function *cloneCandidateFunction(Function *F) { + ValueToValueMapTy Mappings; + Function *Clone = CloneFunction(F, Mappings); + removeSSACopy(*Clone); + return Clone; +} -private: - // The number of functions specialised, used for collecting statistics and - // also in the cost model. - unsigned NbFunctionsSpecialized = 0; - - // Compute the code metrics for function \p F. - CodeMetrics &analyzeFunction(Function *F) { - auto I = FunctionMetrics.insert({F, CodeMetrics()}); - CodeMetrics &Metrics = I.first->second; - if (I.second) { - // The code metrics were not cached. - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); - for (BasicBlock &BB : *F) - Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " - << F->getName() << " is " << Metrics.NumInsts - << " instructions\n"); - } - return Metrics; - } +/// This function decides whether it's worthwhile to specialize function +/// \p F based on the known constant values its arguments can take on. It +/// only discovers potential specialization opportunities without actually +/// applying them. +/// +/// \returns true if any specializations have been found. +bool FunctionSpecializer::findSpecializations( + Function *F, InstructionCost Cost, + SmallVectorImpl &WorkList) { + // Get a list of interesting arguments. + SmallVector Args; + for (Argument &Arg : F->args()) + if (isArgumentInteresting(&Arg)) + Args.push_back(&Arg); + + if (!Args.size()) + return false; - /// Clone the function \p F and remove the ssa_copy intrinsics added by - /// the SCCPSolver in the cloned version. - Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) { - Function *Clone = CloneFunction(F, Mappings); - removeSSACopy(*Clone); - return Clone; - } + // Find all the call sites for the function. + SpecializationMap Specializations; + for (User *U : F->users()) { + if (!isa(U) && !isa(U)) + continue; + auto &CS = *cast(U); - /// This function decides whether it's worthwhile to specialize function - /// \p F based on the known constant values its arguments can take on. It - /// only discovers potential specialization opportunities without actually - /// applying them. - /// - /// \returns true if any specializations have been found. - bool findSpecializations(Function *F, InstructionCost Cost, - SmallVectorImpl &WorkList) { - // Get a list of interesting arguments. - SmallVector Args; - for (Argument &Arg : F->args()) - if (isArgumentInteresting(&Arg)) - Args.push_back(&Arg); - - if (!Args.size()) - return false; - - // Find all the call sites for the function. - SpecializationMap Specializations; - for (User *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); - // If the call site has attribute minsize set, that callsite won't be - // specialized. - if (CS.hasFnAttr(Attribute::MinSize)) - continue; + // Skip irrelevant users. + if (CS.getCalledFunction() != F) + continue; - // If the parent of the call site will never be executed, we don't need - // to worry about the passed value. - if (!Solver.isBlockExecutable(CS.getParent())) - continue; + // If the call site has attribute minsize set, that callsite won't be + // specialized. + if (CS.hasFnAttr(Attribute::MinSize)) + continue; - // Examine arguments and create specialization candidates from call sites - // with constant arguments. - bool Added = false; - for (Argument *A : Args) { - Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); - if (!C) - continue; + // If the parent of the call site will never be executed, we don't need + // to worry about the passed value. + if (!Solver.isBlockExecutable(CS.getParent())) + continue; - if (!Added) { - Specializations[&CS] = {{}, 0 - Cost}; - Added = true; - } + // Examine arguments and create specialization candidates from call sites + // with constant arguments. + bool Added = false; + for (Argument *A : Args) { + Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); + if (!C) + continue; - SpecializationInfo &S = Specializations.back().second; - S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F)); - S.Args.push_back({A, C}); + if (!Added) { + Specializations[&CS] = {{}, 0 - Cost, nullptr}; + Added = true; } - Added = false; - } - // Remove unprofitable specializations. - if (!ForceFunctionSpecialization) - Specializations.remove_if( - [](const auto &Entry) { return Entry.second.Gain <= 0; }); - - // Clear the MapVector and return the underlying vector. - WorkList = Specializations.takeVector(); - - // Sort the candidates in descending order. - llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { - return L.second.Gain > R.second.Gain; - }); - - // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. - if (WorkList.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "FnSpecialization: Truncating worklist to " - << MaxClonesThreshold << " candidates.\n"); - WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + SpecializationInfo &S = Specializations.back().second; + S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F)); + S.Args.push_back({A, C}); } - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " - << F->getName() << "\n"; - for (const auto &Entry - : WorkList) { - dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain - << "\n"; - for (const ArgInfo &Arg : Entry.second.Args) - dbgs() << "FnSpecialization: FormalArg = " - << Arg.Formal->getNameOrAsOperand() - << ", ActualArg = " - << Arg.Actual->getNameOrAsOperand() << "\n"; - }); - - return !WorkList.empty(); + Added = false; } - bool isCandidateFunction(Function *F) { - // Do not specialize the cloned function again. - if (SpecializedFuncs.contains(F)) - return false; - - // If we're optimizing the function for size, we shouldn't specialize it. - if (F->hasOptSize() || - shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) - return false; + // Remove unprofitable specializations. + if (!ForceFunctionSpecialization) + Specializations.remove_if( + [](const auto &Entry) { return Entry.second.Gain <= 0; }); + + // Clear the MapVector and return the underlying vector. + WorkList = Specializations.takeVector(); + + // Sort the candidates in descending order. + llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { + return L.second.Gain > R.second.Gain; + }); + + // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. + if (WorkList.size() > MaxClonesThreshold) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " + << "the maximum number of clones threshold.\n" + << "FnSpecialization: Truncating worklist to " + << MaxClonesThreshold << " candidates.\n"); + WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); + } - // Exit if the function is not executable. There's no point in specializing - // a dead function. - if (!Solver.isBlockExecutable(&F->getEntryBlock())) - return false; + LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " + << F->getName() << "\n"; + for (const auto &Entry + : WorkList) { + dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain + << "\n"; + for (const ArgInfo &Arg : Entry.second.Args) + dbgs() << "FnSpecialization: FormalArg = " + << Arg.Formal->getNameOrAsOperand() + << ", ActualArg = " << Arg.Actual->getNameOrAsOperand() + << "\n"; + }); - // It wastes time to specialize a function which would get inlined finally. - if (F->hasFnAttribute(Attribute::AlwaysInline)) - return false; + return !WorkList.empty(); +} - LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() - << "\n"); - return true; - } +bool FunctionSpecializer::isCandidateFunction(Function *F) { + if (F->isDeclaration()) + return false; - void specializeFunction(Function *F, SpecializationInfo &S, - FuncList &WorkList) { - ValueToValueMapTy Mappings; - Function *Clone = cloneCandidateFunction(F, Mappings); - - // Rewrite calls to the function so that they call the clone instead. - rewriteCallSites(Clone, S.Args, Mappings); - - // Initialize the lattice state of the arguments of the function clone, - // marking the argument on which we specialized the function constant - // with the given value. - Solver.markArgInFuncSpecialization(Clone, S.Args); - - // Mark all the specialized functions - WorkList.push_back(Clone); - NbFunctionsSpecialized++; - - // If the function has been completely specialized, the original function - // is no longer needed. Mark it unreachable. - if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) { - if (auto *CS = dyn_cast(U)) - return CS->getFunction() == F; - return false; - })) { - Solver.markFunctionUnreachable(F); - FullySpecialized.insert(F); - } - } + if (F->hasFnAttribute(Attribute::NoDuplicate)) + return false; - /// Compute and return the cost of specializing function \p F. - InstructionCost getSpecializationCost(Function *F) { - CodeMetrics &Metrics = analyzeFunction(F); - // If the code metrics reveal that we shouldn't duplicate the function, we - // shouldn't specialize it. Set the specialization cost to Invalid. - // Or if the lines of codes implies that this function is easy to get - // inlined so that we shouldn't specialize it. - if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || - (!ForceFunctionSpecialization && - !F->hasFnAttribute(Attribute::NoInline) && - Metrics.NumInsts < SmallFunctionThreshold)) - return InstructionCost::getInvalid(); - - // Otherwise, set the specialization cost to be the cost of all the - // instructions in the function and penalty for specializing more functions. - unsigned Penalty = NbFunctionsSpecialized + 1; - return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty; - } + if (!Solver.isArgumentTrackedFunction(F)) + return false; - InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, - const LoopInfo &LI) { - auto *I = dyn_cast_or_null(U); - // If not an instruction we do not know how to evaluate. - // Keep minimum possible cost for now so that it doesnt affect - // specialization. - if (!I) - return std::numeric_limits::min(); - - InstructionCost Cost = - TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); - - // Increase the cost if it is inside the loop. - unsigned LoopDepth = LI.getLoopDepth(I->getParent()); - Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); - - // Traverse recursively if there are more uses. - // TODO: Any other instructions to be added here? - if (I->mayReadFromMemory() || I->isCast()) - for (auto *User : I->users()) - Cost += getUserBonus(User, TTI, LI); - - return Cost; - } + // Do not specialize the cloned function again. + if (SpecializedFuncs.contains(F)) + return false; - /// Compute a bonus for replacing argument \p A with constant \p C. - InstructionCost getSpecializationBonus(Argument *A, Constant *C, - const LoopInfo &LI) { - Function *F = A->getParent(); - auto &TTI = (GetTTI)(*F); - LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " - << C->getNameOrAsOperand() << "\n"); - - InstructionCost TotalCost = 0; - for (auto *U : A->users()) { - TotalCost += getUserBonus(U, TTI, LI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; - TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); - } + // If we're optimizing the function for size, we shouldn't specialize it. + if (F->hasOptSize() || + shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) + return false; - // The below heuristic is only concerned with exposing inlining - // opportunities via indirect call promotion. If the argument is not a - // (potentially casted) function pointer, give up. - Function *CalledFunction = dyn_cast(C->stripPointerCasts()); - if (!CalledFunction) - return TotalCost; - - // Get TTI for the called function (used for the inline cost). - auto &CalleeTTI = (GetTTI)(*CalledFunction); - - // Look at all the call sites whose called value is the argument. - // Specializing the function on the argument would allow these indirect - // calls to be promoted to direct calls. If the indirect call promotion - // would likely enable the called function to be inlined, specializing is a - // good idea. - int Bonus = 0; - for (User *U : A->users()) { - if (!isa(U) && !isa(U)) - continue; - auto *CS = cast(U); - if (CS->getCalledOperand() != A) - continue; + // Exit if the function is not executable. There's no point in specializing + // a dead function. + if (!Solver.isBlockExecutable(&F->getEntryBlock())) + return false; - // Get the cost of inlining the called function at this call site. Note - // that this is only an estimate. The called function may eventually - // change in a way that leads to it not being inlined here, even though - // inlining looks profitable now. For example, one of its called - // functions may be inlined into it, making the called function too large - // to be inlined into this call site. - // - // We apply a boost for performing indirect call promotion by increasing - // the default threshold by the threshold for indirect calls. - auto Params = getInlineParams(); - Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; - InlineCost IC = - getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); - - // We clamp the bonus for this call to be between zero and the default - // threshold. - if (IC.isAlways()) - Bonus += Params.DefaultThreshold; - else if (IC.isVariable() && IC.getCostDelta() > 0) - Bonus += IC.getCostDelta(); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus - << " for user " << *U << "\n"); - } + // It wastes time to specialize a function which would get inlined finally. + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return false; - return TotalCost + Bonus; - } + LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName() + << "\n"); + return true; +} - /// Determine if it is possible to specialise the function for constant values - /// of the formal parameter \p A. - bool isArgumentInteresting(Argument *A) { - // No point in specialization if the argument is unused. - if (A->user_empty()) - return false; - - // For now, don't attempt to specialize functions based on the values of - // composite types. - Type *ArgTy = A->getType(); - if (!ArgTy->isSingleValueType()) - return false; - - // Specialization of integer and floating point types needs to be explicitly - // enabled. - if (!EnableSpecializationForLiteralConstant && - (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy())) - return false; - - // SCCP solver does not record an argument that will be constructed on - // stack. - if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory()) - return false; - - // Check the lattice value and decide if we should attemt to specialize, - // based on this argument. No point in specialization, if the lattice value - // is already a constant. - const ValueLatticeElement &LV = Solver.getLatticeValueFor(A); - if (LV.isUnknownOrUndef() || LV.isConstant() || - (LV.isConstantRange() && LV.getConstantRange().isSingleElement())) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " - << A->getNameOrAsOperand() << " is already constant\n"); - return false; - } +Function * +FunctionSpecializer::createSpecialization(Function *F, + CallSpecBinding &Specialization) { + Function *Clone = cloneCandidateFunction(F); + Specialization.second.Clone = Clone; - return true; - } + // Initialize the lattice state of the arguments of the function clone, + // marking the argument on which we specialized the function constant + // with the given value. + Solver.markArgInFuncSpecialization(Clone, Specialization.second.Args); - /// Check if the valuy \p V (an actual argument) is a constant or can only - /// have a constant value. Return that constant. - Constant *getCandidateConstant(Value *V) { - if (isa(V)) - return nullptr; + Solver.addArgumentTrackedFunction(Clone); + Solver.markBlockExecutable(&Clone->front()); - // TrackValueOfGlobalVariable only tracks scalar global variables. - if (auto *GV = dyn_cast(V)) { - // Check if we want to specialize on the address of non-constant - // global values. - if (!GV->isConstant() && !SpecializeOnAddresses) - return nullptr; + // Mark all the specialized functions + SpecializedFuncs.insert(Clone); + NbFunctionsSpecialized++; - if (!GV->getValueType()->isSingleValueType()) - return nullptr; - } + return Clone; +} - // Select for possible specialisation values that are constants or - // are deduced to be constants or constant ranges with a single element. - Constant *C = dyn_cast(V); - if (!C) { - const ValueLatticeElement &LV = Solver.getLatticeValueFor(V); - if (LV.isConstant()) - C = LV.getConstant(); - else if (LV.isConstantRange() && - LV.getConstantRange().isSingleElement()) { - assert(V->getType()->isIntegerTy() && "Non-integral constant range"); - C = Constant::getIntegerValue( - V->getType(), *LV.getConstantRange().getSingleElement()); - } else - return nullptr; - } +/// Compute and return the cost of specializing function \p F. +InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) { + CodeMetrics &Metrics = analyzeFunction(F); + // If the code metrics reveal that we shouldn't duplicate the function, we + // shouldn't specialize it. Set the specialization cost to Invalid. + // Or if the lines of codes implies that this function is easy to get + // inlined so that we shouldn't specialize it. + if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || + (!ForceFunctionSpecialization && + !F->hasFnAttribute(Attribute::NoInline) && + Metrics.NumInsts < SmallFunctionThreshold)) + return InstructionCost::getInvalid(); + + // Otherwise, set the specialization cost to be the cost of all the + // instructions in the function and penalty for specializing more functions. + unsigned Penalty = NbFunctionsSpecialized + 1; + return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty; +} - LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " - << V->getNameOrAsOperand() << "\n"); +static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, + const LoopInfo &LI) { + auto *I = dyn_cast_or_null(U); + // If not an instruction we do not know how to evaluate. + // Keep minimum possible cost for now so that it doesnt affect + // specialization. + if (!I) + return std::numeric_limits::min(); + + InstructionCost Cost = + TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); + + // Increase the cost if it is inside the loop. + unsigned LoopDepth = LI.getLoopDepth(I->getParent()); + Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); + + // Traverse recursively if there are more uses. + // TODO: Any other instructions to be added here? + if (I->mayReadFromMemory() || I->isCast()) + for (auto *User : I->users()) + Cost += getUserBonus(User, TTI, LI); + + return Cost; +} - return C; +/// Compute a bonus for replacing argument \p A with constant \p C. +InstructionCost +FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, + const LoopInfo &LI) { + Function *F = A->getParent(); + auto &TTI = (GetTTI)(*F); + LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " + << C->getNameOrAsOperand() << "\n"); + + InstructionCost TotalCost = 0; + for (auto *U : A->users()) { + TotalCost += getUserBonus(U, TTI, LI); + LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; + TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); } - /// Rewrite calls to function \p F to call function \p Clone instead. - /// - /// This function modifies calls to function \p F as long as the actual - /// arguments match those in \p Args. Note that for recursive calls we - /// need to compare against the cloned formal arguments. - /// - /// Callsites that have been marked with the MinSize function attribute won't - /// be specialized and rewritten. - void rewriteCallSites(Function *Clone, const SmallVectorImpl &Args, - ValueToValueMapTy &Mappings) { - assert(!Args.empty() && "Specialization without arguments"); - Function *F = Args[0].Formal->getParent(); - - SmallVector CallSitesToRewrite; - for (auto *U : F->users()) { - if (!isa(U) && !isa(U)) - continue; - auto &CS = *cast(U); - if (!CS.getCalledFunction() || CS.getCalledFunction() != F) - continue; - CallSitesToRewrite.push_back(&CS); - } + // The below heuristic is only concerned with exposing inlining + // opportunities via indirect call promotion. If the argument is not a + // (potentially casted) function pointer, give up. + Function *CalledFunction = dyn_cast(C->stripPointerCasts()); + if (!CalledFunction) + return TotalCost; + + // Get TTI for the called function (used for the inline cost). + auto &CalleeTTI = (GetTTI)(*CalledFunction); + + // Look at all the call sites whose called value is the argument. + // Specializing the function on the argument would allow these indirect + // calls to be promoted to direct calls. If the indirect call promotion + // would likely enable the called function to be inlined, specializing is a + // good idea. + int Bonus = 0; + for (User *U : A->users()) { + if (!isa(U) && !isa(U)) + continue; + auto *CS = cast(U); + if (CS->getCalledOperand() != A) + continue; - LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call sites of " - << F->getName() << " with " << Clone->getName() << "\n"); - - for (auto *CS : CallSitesToRewrite) { - LLVM_DEBUG(dbgs() << "FnSpecialization: " - << CS->getFunction()->getName() << " ->" << *CS - << "\n"); - if (/* recursive call */ - (CS->getFunction() == Clone && - all_of(Args, - [CS, &Mappings](const ArgInfo &Arg) { - unsigned ArgNo = Arg.Formal->getArgNo(); - return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; - })) || - /* normal call */ - all_of(Args, [CS](const ArgInfo &Arg) { - unsigned ArgNo = Arg.Formal->getArgNo(); - return CS->getArgOperand(ArgNo) == Arg.Actual; - })) { - CS->setCalledFunction(Clone); - Solver.markOverdefined(CS); - } - } + // Get the cost of inlining the called function at this call site. Note + // that this is only an estimate. The called function may eventually + // change in a way that leads to it not being inlined here, even though + // inlining looks profitable now. For example, one of its called + // functions may be inlined into it, making the called function too large + // to be inlined into this call site. + // + // We apply a boost for performing indirect call promotion by increasing + // the default threshold by the threshold for indirect calls. + auto Params = getInlineParams(); + Params.DefaultThreshold += InlineConstants::IndirectCallThreshold; + InlineCost IC = + getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI); + + // We clamp the bonus for this call to be between zero and the default + // threshold. + if (IC.isAlways()) + Bonus += Params.DefaultThreshold; + else if (IC.isVariable() && IC.getCostDelta() > 0) + Bonus += IC.getCostDelta(); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus + << " for user " << *U << "\n"); } - void updateSpecializedFuncs(FuncList &Candidates, FuncList &WorkList) { - for (auto *F : WorkList) { - SpecializedFuncs.insert(F); + return TotalCost + Bonus; +} - // Initialize the state of the newly created functions, marking them - // argument-tracked and executable. - if (F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked)) - Solver.addTrackedFunction(F); +/// Determine if it is possible to specialise the function for constant values +/// of the formal parameter \p A. +bool FunctionSpecializer::isArgumentInteresting(Argument *A) { + // No point in specialization if the argument is unused. + if (A->user_empty()) + return false; - Solver.addArgumentTrackedFunction(F); - Candidates.push_back(F); - Solver.markBlockExecutable(&F->front()); + // For now, don't attempt to specialize functions based on the values of + // composite types. + Type *ArgTy = A->getType(); + if (!ArgTy->isSingleValueType()) + return false; - // Replace the function arguments for the specialized functions. - for (Argument &Arg : F->args()) - if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg)) - LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: " - << Arg.getNameOrAsOperand() << "\n"); - } - } -}; -} // namespace - -bool llvm::runFunctionSpecialization( - Module &M, FunctionAnalysisManager *FAM, const DataLayout &DL, - std::function GetTLI, - std::function GetTTI, - std::function GetAC, - function_ref GetAnalysis) { - SCCPSolver Solver(DL, GetTLI, M.getContext()); - FunctionSpecializer FS(Solver, FAM, GetAC, GetTTI, GetTLI); - bool Changed = false; + // Specialization of integer and floating point types needs to be explicitly + // enabled. + if (!EnableSpecializationForLiteralConstant && + (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy())) + return false; - // Loop over all functions, marking arguments to those with their addresses - // taken or that are external as overdefined. - for (Function &F : M) { - if (F.isDeclaration()) - continue; - if (F.hasFnAttribute(Attribute::NoDuplicate)) - continue; + // SCCP solver does not record an argument that will be constructed on + // stack. + if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory()) + return false; - LLVM_DEBUG(dbgs() << "\nFnSpecialization: Analysing decl: " << F.getName() - << "\n"); - Solver.addAnalysis(F, GetAnalysis(F)); + // Check the lattice value and decide if we should attemt to specialize, + // based on this argument. No point in specialization, if the lattice value + // is already a constant. + const ValueLatticeElement &LV = Solver.getLatticeValueFor(A); + if (LV.isUnknownOrUndef() || LV.isConstant() || + (LV.isConstantRange() && LV.getConstantRange().isSingleElement())) { + LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument " + << A->getNameOrAsOperand() << " is already constant\n"); + return false; + } - // Determine if we can track the function's arguments. If so, add the - // function to the solver's set of argument-tracked functions. - if (canTrackArgumentsInterprocedurally(&F)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Can track arguments\n"); - Solver.addArgumentTrackedFunction(&F); - continue; - } else { - LLVM_DEBUG(dbgs() << "FnSpecialization: Can't track arguments!\n" - << "FnSpecialization: Doesn't have local linkage, or " - << "has its address taken\n"); - } + return true; +} - // Assume the function is called. - Solver.markBlockExecutable(&F.front()); +/// Check if the valuy \p V (an actual argument) is a constant or can only +/// have a constant value. Return that constant. +Constant *FunctionSpecializer::getCandidateConstant(Value *V) { + if (isa(V)) + return nullptr; - // Assume nothing about the incoming arguments. - for (Argument &AI : F.args()) - Solver.markOverdefined(&AI); - } + // TrackValueOfGlobalVariable only tracks scalar global variables. + if (auto *GV = dyn_cast(V)) { + // Check if we want to specialize on the address of non-constant + // global values. + if (!GV->isConstant() && !SpecializeOnAddresses) + return nullptr; - // Determine if we can track any of the module's global variables. If so, add - // the global variables we can track to the solver's set of tracked global - // variables. - for (GlobalVariable &G : M.globals()) { - G.removeDeadConstantUsers(); - if (canTrackGlobalVariableInterprocedurally(&G)) - Solver.trackValueOfGlobalVariable(&G); + if (!GV->getValueType()->isSingleValueType()) + return nullptr; } - auto &TrackedFuncs = Solver.getArgumentTrackedFunctions(); - SmallVector FuncDecls(TrackedFuncs.begin(), - TrackedFuncs.end()); - - // No tracked functions, so nothing to do: don't run the solver and remove - // the ssa_copy intrinsics that may have been introduced. - if (TrackedFuncs.empty()) { - removeSSACopy(M); - return false; + // Select for possible specialisation values that are constants or + // are deduced to be constants or constant ranges with a single element. + Constant *C = dyn_cast(V); + if (!C) { + const ValueLatticeElement &LV = Solver.getLatticeValueFor(V); + if (LV.isConstant()) + C = LV.getConstant(); + else if (LV.isConstantRange() && LV.getConstantRange().isSingleElement()) { + assert(V->getType()->isIntegerTy() && "Non-integral constant range"); + C = Constant::getIntegerValue(V->getType(), + *LV.getConstantRange().getSingleElement()); + } else + return nullptr; } - // Solve for constants. - auto RunSCCPSolver = [&](auto &WorkList) { - bool ResolvedUndefs = true; - - while (ResolvedUndefs) { - // Not running the solver unnecessary is checked in regression test - // nothing-to-do.ll, so if this debug message is changed, this regression - // test needs updating too. - LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n"); - - Solver.solve(); - LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n"); - ResolvedUndefs = false; - for (Function *F : WorkList) - if (Solver.resolvedUndefsIn(*F)) - ResolvedUndefs = true; - } - - for (auto *F : WorkList) { - for (BasicBlock &BB : *F) { - if (!Solver.isBlockExecutable(&BB)) - continue; - // FIXME: The solver may make changes to the function here, so set - // Changed, even if later function specialization does not trigger. - for (auto &I : make_early_inc_range(BB)) - Changed |= FS.tryToReplaceWithConstant(&I); - } - } - }; + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " + << V->getNameOrAsOperand() << "\n"); -#ifndef NDEBUG - LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n"); - for (auto *F : FuncDecls) - LLVM_DEBUG(dbgs() << "FnSpecialization: *) " << F->getName() << "\n"); -#endif + return C; +} - // Initially resolve the constants in all the argument tracked functions. - RunSCCPSolver(FuncDecls); +/// Redirects callsites of function \p F to its specialized copies. +void FunctionSpecializer::updateCallSites( + Function *F, SmallVectorImpl &Specializations) { + SmallVector ToUpdate; + for (User *U : F->users()) { + if (auto *CS = dyn_cast(U)) + if (CS->getCalledFunction() == F && + Solver.isBlockExecutable(CS->getParent())) + ToUpdate.push_back(CS); + } - SmallVector WorkList; - unsigned I = 0; - while (FuncSpecializationMaxIters != I++ && - FS.specializeFunctions(FuncDecls, WorkList)) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Finished iteration " << I << "\n"); + unsigned NCallsLeft = ToUpdate.size(); + for (CallBase *CS : ToUpdate) { + // Decrement the counter if the callsite is either recursive or updated. + bool ShouldDecrementCount = CS->getFunction() == F; + for (CallSpecBinding &Specialization : Specializations) { + Function *Clone = Specialization.second.Clone; + SmallVectorImpl &Args = Specialization.second.Args; - // Run the solver for the specialized functions. - RunSCCPSolver(WorkList); + if (any_of(Args, [CS, this](const ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual; + })) + continue; - // Replace some unresolved constant arguments. - constantArgPropagation(FuncDecls, M, Solver); + LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call site " << *CS + << " with " << Clone->getName() << "\n"); - WorkList.clear(); - Changed = true; + CS->setCalledFunction(Clone); + ShouldDecrementCount = true; + break; + } + if (ShouldDecrementCount) + --NCallsLeft; } - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of specializations = " - << NumFuncSpecialized << "\n"); - - // Remove any ssa_copy intrinsics that may have been introduced. - removeSSACopy(M); - return Changed; + // If the function has been completely specialized, the original function + // is no longer needed. Mark it unreachable. + if (NCallsLeft == 0) { + Solver.markFunctionUnreachable(F); + FullySpecialized.insert(F); + } } diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -30,7 +30,6 @@ initializeDAEPass(Registry); initializeDAHPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); - initializeFunctionSpecializationLegacyPassPass(Registry); initializeGlobalDCELegacyPassPass(Registry); initializeGlobalOptLegacyPassPass(Registry); initializeGlobalSplitPass(Registry); diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -23,8 +23,10 @@ #include "llvm/InitializePasses.h" #include "llvm/IR/Constants.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ModRef.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SCCPSolver.h" @@ -40,6 +42,13 @@ STATISTIC(NumInstReplaced, "Number of instructions replaced with (simpler) instruction"); +static cl::opt SpecializeFunctions("specialize-functions", + cl::init(false), cl::Hidden, cl::desc("Enable function specialization")); + +static cl::opt FuncSpecializationMaxIters( + "func-specialization-max-iters", cl::init(1), cl::Hidden, cl::desc( + "The maximum number of iterations function specialization is run")); + static void findReturnsToZap(Function &F, SmallVector &ReturnsToZap, SCCPSolver &Solver) { @@ -93,10 +102,13 @@ } static bool runIPSCCP( - Module &M, const DataLayout &DL, + Module &M, const DataLayout &DL, FunctionAnalysisManager *FAM, std::function GetTLI, + std::function GetTTI, + std::function GetAC, function_ref getAnalysis) { SCCPSolver Solver(DL, GetTLI, M.getContext()); + FunctionSpecializer Specializer(Solver, M, FAM, GetTLI, GetTTI, GetAC); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. @@ -136,24 +148,16 @@ } // Solve for constants. - bool ResolvedUndefs = true; - Solver.solve(); - while (ResolvedUndefs) { - LLVM_DEBUG(dbgs() << "RESOLVING UNDEFS\n"); - ResolvedUndefs = false; - for (Function &F : M) { - if (Solver.resolvedUndefsIn(F)) - ResolvedUndefs = true; - } - if (ResolvedUndefs) - Solver.solve(); - } + Solver.solveWhileResolvedUndefsIn(M); - bool MadeChanges = false; + if (SpecializeFunctions) { + unsigned Iters = 0; + while (Iters++ < FuncSpecializationMaxIters && Specializer.run()); + } // Iterate over all of the instructions in the module, replacing them with // constants if we have found them to be of constant values. - + bool MadeChanges = false; for (Function &F : M) { if (F.isDeclaration()) continue; @@ -213,7 +217,10 @@ NumInstRemoved, NumInstReplaced); } - DomTreeUpdater DTU = Solver.getDTU(F); + DomTreeUpdater DTU = SpecializeFunctions && Specializer.isClonedFunction(&F) + ? DomTreeUpdater(DomTreeUpdater::UpdateStrategy::Lazy) + : Solver.getDTU(F); + // Change dead blocks to unreachable. We do it after replacing constants // in all executable blocks, because changeToUnreachable may remove PHI // nodes in executable blocks we found values for. The function's entry @@ -364,15 +371,21 @@ auto GetTLI = [&FAM](Function &F) -> const TargetLibraryInfo & { return FAM.getResult(F); }; + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + auto GetAC = [&FAM](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; auto getAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = FAM.getResult(F); return { std::make_unique(F, DT, FAM.getResult(F)), &DT, FAM.getCachedResult(F), - nullptr}; + SpecializeFunctions ? &FAM.getResult(F) : nullptr }; }; - if (!runIPSCCP(M, DL, GetTLI, getAnalysis)) + if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, getAnalysis)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -404,6 +417,12 @@ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & { return this->getAnalysis().getTLI(F); }; + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + auto GetAC = [this](Function &F) -> AssumptionCache & { + return this->getAnalysis().getAssumptionCache(F); + }; auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn { DominatorTree &DT = this->getAnalysis(F).getDomTree(); @@ -417,13 +436,14 @@ nullptr}; }; - return runIPSCCP(M, DL, GetTLI, getAnalysis); + return runIPSCCP(M, DL, nullptr, GetTLI, GetTTI, GetAC, getAnalysis); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } }; @@ -444,95 +464,3 @@ // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { return new IPSCCPLegacyPass(); } -PreservedAnalyses FunctionSpecializationPass::run(Module &M, - ModuleAnalysisManager &AM) { - const DataLayout &DL = M.getDataLayout(); - auto &FAM = AM.getResult(M).getManager(); - auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { - return FAM.getResult(F); - }; - auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { - return FAM.getResult(F); - }; - auto GetAC = [&FAM](Function &F) -> AssumptionCache & { - return FAM.getResult(F); - }; - auto GetAnalysis = [&FAM](Function &F) -> AnalysisResultsForFn { - DominatorTree &DT = FAM.getResult(F); - return {std::make_unique( - F, DT, FAM.getResult(F)), - &DT, FAM.getCachedResult(F), - &FAM.getResult(F)}; - }; - - if (!runFunctionSpecialization(M, &FAM, DL, GetTLI, GetTTI, GetAC, GetAnalysis)) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - PA.preserve(); - PA.preserve(); - PA.preserve(); - return PA; -} - -namespace { -struct FunctionSpecializationLegacyPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid - FunctionSpecializationLegacyPass() : ModulePass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - const DataLayout &DL = M.getDataLayout(); - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis().getTTI(F); - }; - auto GetAC = [this](Function &F) -> AssumptionCache & { - return this->getAnalysis().getAssumptionCache(F); - }; - - auto GetAnalysis = [this](Function &F) -> AnalysisResultsForFn { - DominatorTree &DT = - this->getAnalysis(F).getDomTree(); - return { - std::make_unique( - F, DT, - this->getAnalysis().getAssumptionCache( - F)), - nullptr, // We cannot preserve the LI, DT, or PDT with the legacy pass - nullptr, // manager, so set them to nullptr. - nullptr}; - }; - return runFunctionSpecialization(M, nullptr, DL, GetTLI, GetTTI, GetAC, GetAnalysis); - } -}; -} // namespace - -char FunctionSpecializationLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN( - FunctionSpecializationLegacyPass, "function-specialization", - "Propagate constant arguments by specializing the function", false, false) - -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(FunctionSpecializationLegacyPass, "function-specialization", - "Propagate constant arguments by specializing the function", - false, false) - -ModulePass *llvm::createFunctionSpecializationPass() { - return new FunctionSpecializationLegacyPass(); -} diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -704,6 +704,26 @@ for (auto &BB : *F) BBExecutable.erase(&BB); } + + void solveWhileResolvedUndefsIn(Module &M) { + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + solve(); + ResolvedUndefs = false; + for (Function &F : M) + ResolvedUndefs |= resolvedUndefsIn(F); + } + } + + void solveWhileResolvedUndefsIn(SmallVectorImpl &WorkList) { + bool ResolvedUndefs = true; + while (ResolvedUndefs) { + solve(); + ResolvedUndefs = false; + for (Function *F : WorkList) + ResolvedUndefs |= resolvedUndefsIn(*F); + } + } }; } // namespace llvm @@ -1771,6 +1791,9 @@ } } + LLVM_DEBUG(if (MadeChange) dbgs() + << "\nResolved undefs in " << F.getName() << '\n'); + return MadeChange; } @@ -1834,6 +1857,15 @@ return Visitor->resolvedUndefsIn(F); } +void SCCPSolver::solveWhileResolvedUndefsIn(Module &M) { + Visitor->solveWhileResolvedUndefsIn(M); +} + +void +SCCPSolver::solveWhileResolvedUndefsIn(SmallVectorImpl &WorkList) { + Visitor->solveWhileResolvedUndefsIn(WorkList); +} + bool SCCPSolver::isBlockExecutable(BasicBlock *BB) const { return Visitor->isBlockExecutable(BB); } diff --git a/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll b/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll --- a/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll +++ b/llvm/test/Transforms/FunctionSpecialization/bug52821-use-after-free.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -S < %s | FileCheck %s %mystruct = type { i32, [2 x i64] } @@ -8,17 +8,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: br i1 true, label [[FOR_COND2:%.*]], label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: call void @callee(ptr nonnull null) -; CHECK-NEXT: br label [[FOR_COND]] +; CHECK-NEXT: br label [[FOR_COND2:%.*]] ; CHECK: for.cond2: -; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[FOR_BODY2:%.*]] +; CHECK-NEXT: br label [[FOR_BODY2:%.*]] ; CHECK: for.body2: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[MYSTRUCT:%.*]], ptr null, i64 0, i32 1, i64 3 ; CHECK-NEXT: br label [[FOR_COND2]] -; CHECK: for.end: -; CHECK-NEXT: ret ptr [[ARG:%.*]] ; entry: br label %for.cond @@ -48,7 +42,7 @@ ; CHECK-LABEL: @caller( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call ptr @myfunc(ptr undef) -; CHECK-NEXT: ret ptr [[CALL]] +; CHECK-NEXT: ret ptr undef ; entry: %call = call ptr @myfunc(ptr undef) @@ -56,3 +50,4 @@ } declare void @callee(ptr) + diff --git a/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll b/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll --- a/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll +++ b/llvm/test/Transforms/FunctionSpecialization/bug55000-read-uninitialized-value.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -func-specialization-max-iters=2 -func-specialization-size-threshold=20 -func-specialization-avg-iters-cost=20 -function-specialization-for-literal-constant=true -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -func-specialization-max-clones=1 -function-specialization-for-literal-constant=true -S < %s | FileCheck %s declare hidden i1 @compare(ptr) align 2 declare hidden { i8, ptr } @getType(ptr) align 2 diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll --- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll +++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes='default' -enable-function-specialization < %s | FileCheck %s +; RUN: opt -S --passes='default' -specialize-functions < %s | FileCheck %s define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-always-inline.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s ; CHECK-NOT: foo.{{[0-9]+}} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll @@ -4,7 +4,7 @@ ; Note that this test case shows that function specialization pass would ; transform the function even if no specialization happened. -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s %struct = type { i8, i16, i32, i64, i64} @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4} @@ -18,8 +18,7 @@ define internal i64 @func(ptr %x, ptr %binop) { ; CHECK-LABEL: @func( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 [[BINOP:%.*]](ptr [[X:%.*]]) -; CHECK-NEXT: ret i64 [[TMP0]] +; CHECK-NEXT: unreachable ; entry: %tmp0 = call i64 %binop(ptr %x) diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that we don't crash and specialise on a constant expression. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression3.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s define i32 @main() { ; CHECK-LABEL: @main( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression4.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that we don't crash and specialise on a function call with byval attribute. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression5.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -force-function-specialization -func-specialization-on-address -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -func-specialization-on-address -S < %s | FileCheck %s ; Check that we don't crash and specialise on a scalar global variable with byval attribute. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s ; Check that the literal constant parameter could be specialized. ; CHECK: @foo.1( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -func-specialization-avg-iters-cost=5 -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-avg-iters-cost=5 -func-specialization-size-threshold=10 -S < %s | FileCheck %s ; Check that the loop depth results in a larger specialization bonus. ; CHECK: @foo.1( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -S < %s | FileCheck %s ; CHECK-NOT: @compute.1 ; CHECK-NOT: @compute.2 diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize2.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s ; Checks for callsites that have been annotated with MinSize. No specialisation ; expected here: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s ; Checks for callsites that have been annotated with MinSize. We only expect ; specialisation for the call that does not have the attribute: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Function @foo has function attribute 'noduplicate', so check that we don't ; specialize it: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nodup2.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that function foo does not gets specialised as it contains an intrinsic ; that is marked as NoDuplicate. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-noexec.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; The if.then block is not executed, so check that we don't specialise here. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s -; RUN: opt -passes=function-specialization -force-function-specialization -func-specialization-on-address=0 -S < %s | FileCheck %s -; RUN: opt -passes=function-specialization -force-function-specialization -func-specialization-on-address=1 -S < %s | FileCheck %s --check-prefix=ON-ADDRESS +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -func-specialization-on-address=0 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -func-specialization-on-address=1 -S < %s | FileCheck %s --check-prefix=ON-ADDRESS ; Global B is not constant. We do not specialise on addresses unless we ; enable that: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nothing-todo.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nothing-todo.ll deleted file mode 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nothing-todo.ll +++ /dev/null @@ -1,51 +0,0 @@ -; REQUIRES: asserts -; RUN: opt -passes=function-specialization -debug -S < %s 2>&1 | FileCheck %s - -; The purpose of this test is to check that we don't run the solver as there's -; nothing to do here. For a test that doesn't trigger function specialisation, -; it is intentionally 'big' because we also want to check that the ssa.copy -; intrinsics that are introduced by the solver are cleaned up if we bail -; early. Thus, first check the debug messages for the introduction of these -; intrinsics: - -; CHECK: FnSpecialization: Analysing decl: foo -; CHECK: Found replacement{{.*}} call i32 @llvm.ssa.copy.i32 -; CHECK: Found replacement{{.*}} call i32 @llvm.ssa.copy.i32 - -; Then, make sure the solver didn't run: - -; CHECK-NOT: Running solver - -; Finally, check the absence and thus removal of these intrinsics: - -; CHECK-LABEL: @foo -; CHECK-NOT: call i32 @llvm.ssa.copy.i32 - -@N = external dso_local global i32, align 4 -@B = external dso_local global ptr, align 8 -@A = external dso_local global ptr, align 8 - -define dso_local i32 @foo() { -entry: - br label %for.cond - -for.cond: - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %0 = load i32, ptr @N, align 4 - %cmp = icmp slt i32 %i.0, %0 - br i1 %cmp, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: - ret i32 undef - -for.body: - %1 = load ptr, ptr @B, align 8 - %idxprom = sext i32 %i.0 to i64 - %arrayidx = getelementptr inbounds i32, ptr %1, i64 %idxprom - %2 = load i32, ptr %arrayidx, align 4 - %3 = load ptr, ptr @A, align 8 - %arrayidx2 = getelementptr inbounds i32, ptr %3, i64 %idxprom - store i32 %2, ptr %arrayidx2, align 4 - %inc = add nsw i32 %i.0, 1 - br label %for.cond -} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-poison.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; Check that we don't crash and specialise on a poison value. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll @@ -1,6 +1,6 @@ -; RUN: opt -passes=function-specialization,inline,instcombine -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s --check-prefix=ITERS2 -; RUN: opt -passes=function-specialization,inline,instcombine -force-function-specialization -func-specialization-max-iters=3 -S < %s | FileCheck %s --check-prefix=ITERS3 -; RUN: opt -passes=function-specialization,inline,instcombine -force-function-specialization -func-specialization-max-iters=4 -S < %s | FileCheck %s --check-prefix=ITERS4 +; RUN: opt -passes=ipsccp,inline,instcombine -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s --check-prefix=ITERS2 +; RUN: opt -passes=ipsccp,inline,instcombine -specialize-functions -force-function-specialization -func-specialization-max-iters=3 -S < %s | FileCheck %s --check-prefix=ITERS3 +; RUN: opt -passes=ipsccp,inline,instcombine -specialize-functions -force-function-specialization -func-specialization-max-iters=4 -S < %s | FileCheck %s --check-prefix=ITERS4 @low = internal constant i32 0, align 4 @high = internal constant i32 6, align 4 diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive2.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s ; Volatile store preventing recursive specialisation: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive3.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s ; Duplicate store preventing recursive specialisation: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive4.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -func-specialization-max-iters=2 -S < %s | FileCheck %s ; Alloca is not an integer type: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt -stats -passes=function-specialization -S -force-function-specialization < %s 2>&1 | FileCheck %s +; RUN: opt -stats -passes=ipsccp -specialize-functions -S -force-function-specialization < %s 2>&1 | FileCheck %s ; CHECK: 2 function-specialization - Number of functions specialized diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization,deadargelim -force-function-specialization -S < %s | FileCheck %s -; RUN: opt -passes=function-specialization,deadargelim -func-specialization-max-iters=1 -force-function-specialization -S < %s | FileCheck %s -; RUN: opt -passes=function-specialization,deadargelim -func-specialization-max-iters=0 -force-function-specialization -S < %s | FileCheck %s --check-prefix=DISABLED -; RUN: opt -passes=function-specialization,deadargelim -func-specialization-avg-iters-cost=1 -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp,deadargelim -specialize-functions -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp,deadargelim -specialize-functions -func-specialization-max-iters=1 -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp,deadargelim -specialize-functions -func-specialization-max-iters=0 -force-function-specialization -S < %s | FileCheck %s --check-prefix=DISABLED +; RUN: opt -passes=ipsccp,deadargelim -specialize-functions -func-specialization-avg-iters-cost=1 -force-function-specialization -S < %s | FileCheck %s ; DISABLED-NOT: @func.1( ; DISABLED-NOT: @func.2( @@ -43,10 +43,11 @@ } define i32 @main(ptr %0, i32 %1) { -; CHECK: [[TMP3:%.*]] = call i32 @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment) -; CHECK: [[TMP4:%.*]] = call i32 @func.1(ptr [[TMP0]], i32 [[TMP3]]) +; CHECK: call void @func.1(ptr [[TMP0]], i32 0) %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement) +; CHECK: ret i32 0 ret i32 %4 } @@ -63,10 +64,10 @@ ; CHECK: call void @decrement(ptr [[TMP9]]) ; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 -; CHECK: [[TMP12:%.*]] = call i32 @func.1(ptr [[TMP0]], i32 [[TMP11]]) -; CHECK: br label [[TMP13]] -; CHECK: 13: -; CHECK: ret i32 0 +; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]]) +; CHECK: br label [[TMP12:%.*]] +; CHECK: 12: +; CHECK: ret void ; ; ; CHECK: @func.2( @@ -82,6 +83,7 @@ ; CHECK: call void @increment(ptr [[TMP9]]) ; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 -; CHECK: [[TMP12:%.*]] = call i32 @func.2(ptr [[TMP0]], i32 [[TMP11]]) -; CHECK: br label [[TMP13]] -; CHECK: ret i32 0 +; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]]) +; CHECK: br label [[TMP12:%.*]] +; CHECK: 12: +; CHECK: ret void diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll @@ -1,8 +1,8 @@ -; RUN: opt -passes=function-specialization -func-specialization-avg-iters-cost=3 -S < %s | \ +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-avg-iters-cost=3 -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,DISABLED -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | \ +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,FORCE -; RUN: opt -passes=function-specialization -func-specialization-avg-iters-cost=3 -force-function-specialization -S < %s | \ +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-avg-iters-cost=3 -force-function-specialization -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,FORCE ; Test for specializing a constant global. diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -1,7 +1,7 @@ -; RUN: opt -passes=function-specialization -force-function-specialization \ +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization \ ; RUN: -func-specialization-max-clones=2 -S < %s | FileCheck %s -; RUN: opt -passes=function-specialization -force-function-specialization \ +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization \ ; RUN: -func-specialization-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization5.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s ; There's nothing to specialize here as both calls are the same, so check that: ; diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll --- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll +++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp -specialize-functions < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 @@ -11,6 +11,37 @@ ret i32 %sub } +; CHECK-LABEL: define dso_local i32 @f0 +; CHECK: tail call fastcc i32 @g.[[#A:]]({{.*}}@p0) +; +define dso_local i32 @f0(i32 noundef %x) { +entry: + %call = tail call fastcc i32 @g(i32 noundef %x, ptr noundef nonnull @p0) + ret i32 %call +} + +; CHECK-LABEL: define dso_local i32 @f1 +; CHECK: tail call fastcc i32 @g.[[#B:]]({{.*}}@p1) +; +define dso_local i32 @f1(i32 noundef %x) { +entry: + %call = tail call fastcc i32 @g(i32 noundef %x, ptr noundef nonnull @p1) + ret i32 %call +} + +; @g gets fully specialized +; CHECK-NOT: define internal fastcc i32 @g( + +define internal fastcc i32 @g(i32 noundef %x, ptr nocapture noundef readonly %p) noinline { +entry: + %pcall = tail call i32 %p(i32 noundef %x) + %fcall = tail call fastcc i32 @f(i32 noundef %pcall, ptr noundef nonnull %p) + ret i32 %fcall +} + +; CHECK-LABEL: define dso_local i32 @g0 +; CHECK: tail call fastcc i32 @f.[[#C:]]({{.*}}@p0) +; define dso_local i32 @g0(i32 noundef %x) { entry: %call = tail call fastcc i32 @f(i32 noundef %x, ptr noundef nonnull @p0) @@ -24,6 +55,9 @@ ret i32 %add } +; CHECK-LABEL: define dso_local i32 @g1 +; CHECK: tail call fastcc i32 @f.[[#D:]]({{.*}}@p1) +; define dso_local i32 @g1(i32 noundef %x) { entry: %call = tail call fastcc i32 @f(i32 noundef %x, ptr noundef nonnull @p1) @@ -38,5 +72,11 @@ ; Check that a single argument, that cannot be used for specialisation, does not ; prevent specialisation based on other arguments. -; CHECK: @f.1 -; CHECK: @f.2 +; +; Also check that for callsites which reside in the body of newly created +; (specialized) functions, the lattice value of the arguments is known. +; +; CHECK-DAG: define internal fastcc i32 @g.[[#A]] +; CHECK-DAG: define internal fastcc i32 @g.[[#B]] +; CHECK-DAG: define internal fastcc i32 @f.[[#C]] +; CHECK-DAG: define internal fastcc i32 @f.[[#D]] diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll --- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll +++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -force-function-specialization -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -force-function-specialization -S < %s | FileCheck %s define i64 @main(i64 %x, i64 %y, i1 %flag) { ; CHECK-LABEL: @main( @@ -70,7 +70,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP0:%.*]] = call i64 @minus(i64 [[X:%.*]], i64 [[Y:%.*]]) ; CHECK-NEXT: [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) +; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) ; CHECK-LABEL: @compute.3 ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll --- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll +++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll @@ -1,6 +1,6 @@ -; RUN: opt -S --passes=function-specialization \ +; RUN: opt -S --passes=ipsccp -specialize-functions \ ; RUN: -force-function-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT -; RUN: opt -S --passes=function-specialization \ +; RUN: opt -S --passes=ipsccp -specialize-functions \ ; RUN: -function-specialization-for-literal-constant \ ; RUN: -force-function-specialization < %s | FileCheck %s -check-prefix CHECK-LIT diff --git a/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll b/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll --- a/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll +++ b/llvm/test/Transforms/FunctionSpecialization/no-spec-unused-arg.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization -force-function-specialization -function-specialization-for-literal-constant < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp -specialize-functions -force-function-specialization -function-specialization-for-literal-constant < %s | FileCheck %s define internal i32 @f(i32 %x, i32 %y) noinline { ret i32 %x } @@ -17,4 +17,4 @@ ; to be a constant without the need for function specialisation and ; the second parameter is unused. -; CHECK-NOT: @f. \ No newline at end of file +; CHECK-NOT: @f. diff --git a/llvm/test/Transforms/FunctionSpecialization/noinline.ll b/llvm/test/Transforms/FunctionSpecialization/noinline.ll --- a/llvm/test/Transforms/FunctionSpecialization/noinline.ll +++ b/llvm/test/Transforms/FunctionSpecialization/noinline.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp -specialize-functions < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll --- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll +++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-size-threshold=3 -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll --- a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes=function-specialization,deadargelim -force-function-specialization < %s | FileCheck %s +; RUN: opt -S --passes=ipsccp,deadargelim -specialize-functions -force-function-specialization < %s | FileCheck %s define dso_local i32 @add(i32 %x, i32 %y) { entry: %add = add nsw i32 %y, %x diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll --- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=function-specialization -func-specialization-max-clones=0 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE -; RUN: opt -passes=function-specialization -func-specialization-max-clones=1 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=ONE -; RUN: opt -passes=function-specialization -func-specialization-max-clones=2 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO -; RUN: opt -passes=function-specialization -func-specialization-max-clones=3 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-max-clones=0 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-max-clones=1 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=ONE +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-max-clones=2 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -passes=ipsccp -specialize-functions -func-specialization-max-clones=3 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE ; Make sure that we iterate correctly after sorting the specializations: ; FnSpecialization: Specializations for function compute