diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -290,9 +290,7 @@ /// \note It is an error to call V->takeName(V). void takeName(Value *V); -#ifndef NDEBUG std::string getNameOrAsOperand() const; -#endif /// Change all uses of this to point to a new Value. /// diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -62,16 +62,39 @@ namespace llvm { // Bookkeeping struct to pass data from the analysis and profitability phase // to the actual transform helper functions. -struct SpecializationInfo { +struct SpecInfo { SmallVector Args; // Stores the {formal,actual} argument pairs. InstructionCost Gain; // Profitability: Gain = Bonus - Cost. Function *Clone; // The definition of the specialized function. + + bool operator<(const SpecInfo &Other) const { + if (Gain == Other.Gain) { + if (Args.size() == Other.Args.size()) { + for (size_t I = 0; I < Args.size(); ++I) + if (Args[I] < Other.Args[I]) + return true; + return false; + } + return Args.size() < Other.Args.size(); + } + return Gain < Other.Gain; + } + + bool operator>(const SpecInfo &Other) const { return Other < *this; } +}; + +using SpecSet = std::set>; + +struct SpecMapEntry { + Function *F; + SpecSet Specs; + SpecSet::iterator IterEnd; + + SpecMapEntry(Function *F_, SpecSet &&Specs_) + : F(F_), Specs(Specs_), IterEnd(Specs.begin()) {} }; -using CallSpecBinding = std::pair; -// We are using MapVector because it guarantees deterministic iteration -// order across executions. -using SpecializationMap = SmallMapVector; +using SpecMap = SmallVector; class FunctionSpecializer { @@ -88,10 +111,6 @@ std::function GetTTI; std::function GetAC; - // The number of functions specialised, used for collecting statistics and - // also in the cost model. - unsigned NbFunctionsSpecialized = 0; - SmallPtrSet SpecializedFuncs; SmallPtrSet FullySpecialized; DenseMap FunctionMetrics; @@ -143,12 +162,11 @@ /// applying them. /// /// \returns true if any specializations have been found. - bool findSpecializations(Function *F, InstructionCost Cost, - SmallVectorImpl &WorkList); + bool findSpecializations(Function *F, InstructionCost Cost, SpecSet &Specs); bool isCandidateFunction(Function *F); - Function *createSpecialization(Function *F, CallSpecBinding &Specialization); + Function *createSpecialization(Function *F, SpecInfo &Specialization); /// Compute and return the cost of specializing function \p F. InstructionCost getSpecializationCost(Function *F); @@ -166,8 +184,7 @@ Constant *getCandidateConstant(Value *V); /// Redirects callsites of function \p F to its specialized copies. - void updateCallSites(Function *F, - SmallVectorImpl &Specializations); + void updateCallSites(Function *F, SpecSet &Specs, SpecSet::iterator IterEnd); }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -17,6 +17,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Transforms/Utils/PredicateInfo.h" #include @@ -52,7 +53,51 @@ Argument *Formal; // The Formal argument being analysed. Constant *Actual; // A corresponding actual constant argument. - ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A){}; + ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A) {}; + + static bool LessThan(const Value *LHS, const Value *RHS) { + // First compare ConstantInt/ConstantFP. + auto *Int1 = dyn_cast(LHS); + auto *Int2 = dyn_cast(RHS); + if (Int1 && Int2) + return Int1->getValue().ult(Int2->getValue()); + auto *FP1 = dyn_cast(LHS); + auto *FP2 = dyn_cast(RHS); + if (FP1 && FP2) + return FP1->getValue() < FP2->getValue(); + if (Int1 && FP2) + return Int1->getValue().ult(FP2->getValue().bitcastToAPInt()); + if (FP1 && Int2) + return FP1->getValue().bitcastToAPInt().ult(Int2->getValue()); + // Let ConstantInt and ConstantFP be 'smaller' than anything else. + if (Int1 || FP1) + return true; + if (Int2 || FP2) + return false; + // Compare names if possible. + if (LHS->hasName() && RHS->hasName()) + return LHS->getName() < RHS->getName(); + // Compare operands (handles constant expressions). + auto *User1 = dyn_cast(LHS); + auto *User2 = dyn_cast(RHS); + if (User1 && User2) { + if (User1->getNumOperands() == User2->getNumOperands()) { + for (unsigned I = 0; I < User1->getNumOperands(); ++I) + if (LessThan(User1->getOperand(I), User2->getOperand(I))) + return true; + return false; + } + return User1->getNumOperands() < User2->getNumOperands(); + } + // Last and most expensive comparison. + return LHS->getNameOrAsOperand() < RHS->getNameOrAsOperand(); + } + + bool operator<(const ArgInfo &Other) const { + if (Formal == Other.Formal) + return LessThan(Actual, Other.Actual); + return Formal->getArgNo() < Other.Formal->getArgNo(); + } }; class SCCPInstVisitor; diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -439,7 +439,6 @@ ST->reinsertValue(this); } -#ifndef NDEBUG std::string Value::getNameOrAsOperand() const { if (!getName().empty()) return std::string(getName()); @@ -449,7 +448,6 @@ printAsOperand(OS, false); return OS.str(); } -#endif void Value::assertModuleIsMaterializedImpl() const { #ifndef NDEBUG diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -64,7 +64,7 @@ #define DEBUG_TYPE "function-specialization" -STATISTIC(NumFuncSpecialized, "Number of functions specialized"); +STATISTIC(NumSpecsCreated, "Number of specializations created"); static cl::opt ForceFunctionSpecialization( "force-function-specialization", cl::init(false), cl::Hidden, @@ -239,46 +239,74 @@ /// /// \returns true if at least one function is specialized. bool FunctionSpecializer::run() { - bool Changed = false; - + SpecMap SM; for (Function &F : M) { if (!isCandidateFunction(&F)) continue; auto Cost = getSpecializationCost(&F); if (!Cost.isValid()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost.\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for " + << F.getName() << "\n"); continue; } LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " << F.getName() << " is " << Cost << "\n"); - SmallVector Specializations; - if (!findSpecializations(&F, Cost, Specializations)) { + SpecSet Specs; + if (!findSpecializations(&F, Cost, Specs)) { LLVM_DEBUG( - dbgs() << "FnSpecialization: No possible specializations found\n"); + dbgs() << "FnSpecialization: No possible specializations found for " + << F.getName() << "\n"); continue; } + SM.emplace_back(&F, std::move(Specs)); + } - Changed = true; - - SmallVector Clones; - for (CallSpecBinding &Specialization : Specializations) - Clones.push_back(createSpecialization(&F, Specialization)); + if (SM.size() == 0) + return false; - Solver.solveWhileResolvedUndefsIn(Clones); - updateCallSites(&F, Specializations); + unsigned NSpecsFound = 0; + for (auto &[_0, Specs, _1] : SM) + NSpecsFound += Specs.size(); + + // Truncate the total number of specializations. + const auto MaxClonesPerModule = SM.size() * MaxClonesThreshold; + if (NSpecsFound > MaxClonesPerModule) { + for (unsigned I = 0; I < MaxClonesPerModule; ++I) { + auto It = std::max_element(SM.begin(), SM.end(), + [](const SpecMapEntry &LHS, const SpecMapEntry &RHS) { + if (LHS.IterEnd == LHS.Specs.end()) + return RHS.IterEnd != RHS.Specs.end(); + if (RHS.IterEnd == RHS.Specs.end()) + return false; + return *LHS.IterEnd < *RHS.IterEnd; + }); + ++(It->IterEnd); + } + } else { + for (auto &[_, Specs, IterEnd] : SM) + IterEnd = Specs.end(); } + SmallVector Clones; + for (auto &[F, Specs, IterEnd] : SM) + for (auto I = Specs.begin(); I != IterEnd; ++I) + Clones.push_back(createSpecialization(F, const_cast(*I))); + + Solver.solveWhileResolvedUndefsIn(Clones); + + for (auto &[F, Specs, IterEnd] : SM) + updateCallSites(F, Specs, IterEnd); + promoteConstantStackValues(); - LLVM_DEBUG(if (NbFunctionsSpecialized) dbgs() - << "FnSpecialization: Specialized " << NbFunctionsSpecialized - << " functions in module " << M.getName() << "\n"); + LLVM_DEBUG(dbgs() << "FnSpecialization: Created " << Clones.size() + << " specializations in module " << M.getName() << "\n"); - NumFuncSpecialized += NbFunctionsSpecialized; - return Changed; + NumSpecsCreated += Clones.size(); + return true; } void FunctionSpecializer::removeDeadFunctions() { @@ -325,26 +353,25 @@ /// applying them. /// /// \returns true if any specializations have been found. -bool FunctionSpecializer::findSpecializations( - Function *F, InstructionCost Cost, - SmallVectorImpl &WorkList) { +bool +FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost, + SpecSet &Specs) { // Get a list of interesting arguments. SmallVector Args; for (Argument &Arg : F->args()) if (isArgumentInteresting(&Arg)) Args.push_back(&Arg); - if (!Args.size()) + if (Args.empty()) return false; // Find all the call sites for the function. - SpecializationMap Specializations; for (User *U : F->users()) { if (!isa(U) && !isa(U)) continue; auto &CS = *cast(U); - // Skip irrelevant users. + // The user instruction does not call our function. if (CS.getCalledFunction() != F) continue; @@ -358,62 +385,37 @@ if (!Solver.isBlockExecutable(CS.getParent())) continue; + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting CallSite " + << CS << "\n"); + // Examine arguments and create specialization candidates from call sites // with constant arguments. - bool Added = false; + SpecInfo Spec{{}, 0 - Cost, /*Clone=*/nullptr}; for (Argument *A : Args) { Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo())); if (!C) continue; - if (!Added) { - Specializations[&CS] = {{}, 0 - Cost, nullptr}; - Added = true; - } - - SpecializationInfo &S = Specializations.back().second; - S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F)); - S.Args.push_back({A, C}); + Spec.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F)); + Spec.Args.push_back({A, C}); } - Added = false; - } + if (Spec.Args.empty() || (Spec.Gain <= 0 && !ForceFunctionSpecialization)) + continue; + auto Res = Specs.insert(std::move(Spec)); + LLVM_DEBUG( + if (Res.second) { + dbgs() << "FnSpecialization: Found Specialization for function " + << F->getName() << "\n"; + dbgs() << "FnSpecialization: Gain = " << Res.first->Gain << "\n"; + for (const ArgInfo &Arg : Res.first->Args) + dbgs() << "FnSpecialization: FormalArg = " + << Arg.Formal->getNameOrAsOperand() + << ", ActualArg = " << Arg.Actual->getNameOrAsOperand() + << "\n"; + }); - // Remove unprofitable specializations. - if (!ForceFunctionSpecialization) - Specializations.remove_if( - [](const auto &Entry) { return Entry.second.Gain <= 0; }); - - // Clear the MapVector and return the underlying vector. - WorkList = Specializations.takeVector(); - - // Sort the candidates in descending order. - llvm::stable_sort(WorkList, [](const auto &L, const auto &R) { - return L.second.Gain > R.second.Gain; - }); - - // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary. - if (WorkList.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "FnSpecialization: Truncating worklist to " - << MaxClonesThreshold << " candidates.\n"); - WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end()); } - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function " - << F->getName() << "\n"; - for (const auto &Entry - : WorkList) { - dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain - << "\n"; - for (const ArgInfo &Arg : Entry.second.Args) - dbgs() << "FnSpecialization: FormalArg = " - << Arg.Formal->getNameOrAsOperand() - << ", ActualArg = " << Arg.Actual->getNameOrAsOperand() - << "\n"; - }); - - return !WorkList.empty(); + return !Specs.empty(); } bool FunctionSpecializer::isCandidateFunction(Function *F) { @@ -451,21 +453,23 @@ Function * FunctionSpecializer::createSpecialization(Function *F, - CallSpecBinding &Specialization) { + SpecInfo &Specialization) { Function *Clone = cloneCandidateFunction(F); - Specialization.second.Clone = Clone; + Specialization.Clone = Clone; + + LLVM_DEBUG(dbgs() << "FnSpecialization: Creating clone " << Clone->getName() + << "\n"); // Initialize the lattice state of the arguments of the function clone, // marking the argument on which we specialized the function constant // with the given value. - Solver.markArgInFuncSpecialization(Clone, Specialization.second.Args); + Solver.markArgInFuncSpecialization(Clone, Specialization.Args); Solver.addArgumentTrackedFunction(Clone); Solver.markBlockExecutable(&Clone->front()); // Mark all the specialized functions SpecializedFuncs.insert(Clone); - NbFunctionsSpecialized++; return Clone; } @@ -484,9 +488,8 @@ return InstructionCost::getInvalid(); // Otherwise, set the specialization cost to be the cost of all the - // instructions in the function and penalty for specializing more functions. - unsigned Penalty = NbFunctionsSpecialized + 1; - return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty; + // instructions in the function. (FIXME: NumInsts contains cost already) + return Metrics.NumInsts * InlineConstants::getInstrCost(); } static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, @@ -658,8 +661,8 @@ } /// Redirects callsites of function \p F to its specialized copies. -void FunctionSpecializer::updateCallSites( - Function *F, SmallVectorImpl &Specializations) { +void FunctionSpecializer::updateCallSites(Function *F, SpecSet &Specs, + SpecSet::iterator IterEnd) { SmallVector ToUpdate; for (User *U : F->users()) { if (auto *CS = dyn_cast(U)) @@ -672,9 +675,9 @@ for (CallBase *CS : ToUpdate) { // Decrement the counter if the callsite is either recursive or updated. bool ShouldDecrementCount = CS->getFunction() == F; - for (CallSpecBinding &Specialization : Specializations) { - Function *Clone = Specialization.second.Clone; - SmallVectorImpl &Args = Specialization.second.Args; + for (auto I = Specs.begin(); I != IterEnd; ++I) { + Function *Clone = I->Clone; + const SmallVectorImpl &Args = I->Args; if (any_of(Args, [CS, this](const ArgInfo &Arg) { unsigned ArgNo = Arg.Formal->getArgNo(); diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -43,7 +43,7 @@ "Number of instructions replaced with (simpler) instruction"); static cl::opt SpecializeFunctions("specialize-functions", - cl::init(false), cl::Hidden, cl::desc("Enable function specialization")); + cl::init(true), cl::Hidden, cl::desc("Enable function specialization")); static cl::opt FuncSpecializationMaxIters( "func-specialization-max-iters", cl::init(1), cl::Hidden, cl::desc( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll @@ -7,8 +7,8 @@ ; Global B is not constant. We do not specialise on addresses unless we ; enable that: -; ON-ADDRESS: call i32 @foo.1(i32 %x, ptr @A) -; ON-ADDRESS: call i32 @foo.2(i32 %y, ptr @B) +; ON-ADDRESS: call i32 @foo.2(i32 %x, ptr @A) +; ON-ADDRESS: call i32 @foo.1(i32 %y, ptr @B) target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll @@ -1,7 +1,7 @@ ; REQUIRES: asserts ; RUN: opt -stats -passes=ipsccp -specialize-functions -S -force-function-specialization < %s 2>&1 | FileCheck %s -; CHECK: 2 function-specialization - Number of functions specialized +; CHECK: 2 function-specialization - Number of specializations created define i64 @main(i64 %x, i1 %flag) { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll @@ -43,9 +43,9 @@ } define i32 @main(ptr %0, i32 %1) { -; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; CHECK: call void @func.1(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment) -; CHECK: call void @func.1(ptr [[TMP0]], i32 0) +; CHECK: call void @func.2(ptr [[TMP0]], i32 0) %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement) ; CHECK: ret i32 0 ret i32 %4 @@ -61,7 +61,7 @@ ; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 ; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]] -; CHECK: call void @decrement(ptr [[TMP9]]) +; CHECK: call void @increment(ptr [[TMP9]]) ; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 ; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]]) @@ -80,7 +80,7 @@ ; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 ; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]] -; CHECK: call void @increment(ptr [[TMP9]]) +; CHECK: call void @decrement(ptr [[TMP9]]) ; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 ; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]]) diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll @@ -14,8 +14,8 @@ define dso_local i32 @bar(i32 %x, i32 %y) { ; COMMON-LABEL: @bar -; FORCE: %call = call i32 @foo.1(i32 %x, ptr @A) -; FORCE: %call1 = call i32 @foo.2(i32 %y, ptr @B) +; FORCE: %call = call i32 @foo.2(i32 %x, ptr @A) +; FORCE: %call1 = call i32 @foo.1(i32 %y, ptr @B) ; DISABLED-NOT: %call1 = call i32 @foo.1( entry: %tobool = icmp ne i32 %x, 0 @@ -38,14 +38,14 @@ ; ; FORCE: define internal i32 @foo.1(i32 %x, ptr %b) { ; FORCE-NEXT: entry: -; FORCE-NEXT: %0 = load i32, ptr @A, align 4 +; FORCE-NEXT: %0 = load i32, ptr @B, align 4 ; FORCE-NEXT: %add = add nsw i32 %x, %0 ; FORCE-NEXT: ret i32 %add ; FORCE-NEXT: } ; ; FORCE: define internal i32 @foo.2(i32 %x, ptr %b) { ; FORCE-NEXT: entry: -; FORCE-NEXT: %0 = load i32, ptr @B, align 4 +; FORCE-NEXT: %0 = load i32, ptr @A, align 4 ; FORCE-NEXT: %add = add nsw i32 %x, %0 ; FORCE-NEXT: ret i32 %add ; FORCE-NEXT: } diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -44,18 +44,18 @@ ; CHECK: define internal i32 @foo.1(i32 %x, ptr %b, ptr %c) { ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = load i32, ptr @A, align 4 +; CHECK-NEXT: %0 = load i32, ptr @B, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 -; CHECK-NEXT: %1 = load i32, ptr @C, align 4 +; CHECK-NEXT: %1 = load i32, ptr @D, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 ; CHECK-NEXT: ret i32 %add1 ; CHECK-NEXT: } ; CHECK: define internal i32 @foo.2(i32 %x, ptr %b, ptr %c) { ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = load i32, ptr @B, align 4 +; CHECK-NEXT: %0 = load i32, ptr @A, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 -; CHECK-NEXT: %1 = load i32, ptr @D, align 4 +; CHECK-NEXT: %1 = load i32, ptr @C, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 ; CHECK-NEXT: ret i32 %add1 ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll @@ -0,0 +1,51 @@ +; RUN: opt -S --passes=ipsccp -specialize-functions -func-specialization-max-clones=1 < %s | FileCheck %s +define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline { +entry: + %call = tail call i32 %p(i32 noundef %x) + %call1 = tail call i32 %q(i32 noundef %x) + %add = add nsw i32 %call1, %call + ret i32 %add +} + +define internal i32 @g(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline { +entry: + %call = tail call i32 %p(i32 noundef %x) + %call1 = tail call i32 %q(i32 noundef %x) + %sub = sub nsw i32 %call, %call1 + ret i32 %sub +} + +define i32 @h0(i32 noundef %x) { +entry: + %call = tail call i32 @f(i32 noundef %x, ptr noundef nonnull @pp, ptr noundef nonnull @qq) + ret i32 %call +} + +define i32 @h1(i32 noundef %x) { +entry: + %call = tail call i32 @f(i32 noundef %x, ptr noundef nonnull @qq, ptr noundef nonnull @pp) + ret i32 %call +} + +define i32 @h2(i32 noundef %x, ptr nocapture noundef readonly %p) { +entry: + %call = tail call i32 @g(i32 noundef %x, ptr noundef %p, ptr noundef nonnull @pp) + ret i32 %call +} + +define i32 @h3(i32 noundef %x, ptr nocapture noundef readonly %p) { +entry: + %call = tail call i32 @g(i32 noundef %x, ptr noundef %p, ptr noundef nonnull @qq) + ret i32 %call +} + +declare i32 @pp(i32 noundef) +declare i32 @qq(i32 noundef) + + +; Check that the global ranking causes two specialisations of +; `f` to be chosen, whereas the old algorithm would choose +; one specialsation of `f` and one of `g`. + +; CHECK-DAG: define internal i32 @f.1 +; CHECK-DAG: define internal i32 @f.2 diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll --- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll +++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll @@ -6,14 +6,14 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] ; CHECK: plus: -; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.1(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus) +; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus) ; CHECK-NEXT: br label [[MERGE:%.*]] ; CHECK: minus: -; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) +; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: ; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ] -; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[PH]], i64 42, ptr @plus, ptr @minus) +; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.2(i64 [[PH]], i64 42, ptr @plus, ptr @minus) ; CHECK-NEXT: ret i64 [[CMP2]] ; entry: @@ -60,20 +60,20 @@ ret i64 %sub } -; CHECK-LABEL: @compute.1 -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]]) -; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus) - -; CHECK-LABEL: @compute.2 +; CHECK: define internal i64 @compute.1 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP0:%.*]] = call i64 @minus(i64 [[X:%.*]], i64 [[Y:%.*]]) ; CHECK-NEXT: [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) +; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) -; CHECK-LABEL: @compute.3 +; CHECK: define internal i64 @compute.2 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]]) ; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus) +; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus) + +; CHECK: define internal i64 @compute.3 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = call i64 %binop1(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]]) +; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr %binop1, ptr @plus)