diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -29,15 +29,37 @@ // // Todos: // - Specializing recursive functions relies on running the transformation a -// number of times, which is controlled by option -// `func-specialization-max-iters`. Thus, increasing this value and the -// number of iterations, will linearly increase the number of times recursive -// functions get specialized, see also the discussion in -// https://reviews.llvm.org/D106426 for details. Perhaps there is a -// compile-time friendlier way to control/limit the number of specialisations -// for recursive functions. -// - Don't transform the function if function specialization does not trigger; -// the SCCPSolver may make IR changes. +// number of times, which is controlled by the option `funcspec-max-iters`. +// Perhaps there is a compile-time friendlier way for such specializations. +// (see also the discussion in https://reviews.llvm.org/D106426 for details) +// +// Cost Model: +// ----------- +// To determine whether a specialization is profitable, we compute its score as +// the ratio of latency savings over codesize increase. The score must exceed +// a minimum threshold, which is controlled by the option `funcspec-min-score`. +// +// To compute the latency savings attributed to each of the constant arguments, +// we traverse the use-def chain (ignoring ssa copies) up to a maximum depth +// controlled by the option `funcspec-max-user-depth`. The latency of a user is +// exponentiated if inside a loop nest, but the deeper in the use-def chain we +// are the less the latency is "worth". The average loop iteration count is +// controlled by the option `funcspec-avg-loop-iters`. Inlining benefits are +// estimated as part of the latency savings. +// +// To compute the codesize increase, we first estimate the initial codesize of +// the function as [number of instructions] + [number of inline candidates] x +// [small function size]. The "small function size" corresponds to the minimum +// size a function needs to have to be considered for specialization. This is +// controlled by the option `funcspec-min-function-size`. Similarly to latency +// savings, we accumulate the codesize reduction attributed to the users of a +// constant argument. To prevent a function from being specialized linearly to +// the number of times the Specializer runs (controlled by `funcspec-max-iters`) +// we penalize specialization on the same argument. The penalty is reflected on +// the codesize increase. +// +// When accumulating latency and codesize savings of constant arguments, we keep +// a set of visited users to avoid accounting for the same gains multiple times. // // References: // - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable @@ -60,6 +82,15 @@ using namespace llvm; namespace llvm { +// Map of potential specializations for each function. The FunctionSpecializer +// keeps the discovered specialisation opportunities for the module in a single +// vector, where the specialisations of each function form a contiguous range. +// This map's value is the beginning and the end of that range. +using SpecMap = DenseMap>; + +// Just a shorter abreviation. +using Cost = InstructionCost; + // Specialization signature, used to uniquely designate a specialization within // a function. struct SpecSig { @@ -95,23 +126,17 @@ SpecSig Sig; // Profitability of the specialization. - InstructionCost Gain; + Cost Score; // List of call sites, matching this specialization. SmallVector CallSites; - Spec(Function *F, const SpecSig &S, InstructionCost G) - : F(F), Sig(S), Gain(G) {} - Spec(Function *F, const SpecSig &&S, InstructionCost G) - : F(F), Sig(S), Gain(G) {} + Spec(Function *F, const SpecSig &S, Cost Score) + : F(F), Sig(S), Score(Score) {} + Spec(Function *F, const SpecSig &&S, Cost Score) + : F(F), Sig(S), Score(Score) {} }; -// Map of potential specializations for each function. The FunctionSpecializer -// keeps the discovered specialisation opportunities for the module in a single -// vector, where the specialisations of each function form a contiguous range. -// This map's value is the beginning and the end of that range. -using SpecMap = DenseMap>; - class FunctionSpecializer { /// The IPSCCP Solver. @@ -130,6 +155,7 @@ SmallPtrSet Specializations; SmallPtrSet FullySpecialized; DenseMap FunctionMetrics; + DenseMap NumSpecs; public: FunctionSpecializer( @@ -165,18 +191,13 @@ /// Remove any ssa_copy intrinsics that may have been introduced. void cleanUpSSA(); - // Compute the code metrics for function \p F. - CodeMetrics &analyzeFunction(Function *F); - /// @brief Find potential specialization opportunities. /// @param F Function to specialize - /// @param Cost Cost of specializing a function. Final gain is this cost - /// minus benefit /// @param AllSpecs A vector to add potential specializations to. /// @param SM A map for a function's specialisation range /// @return True, if any potential specializations were found - bool findSpecializations(Function *F, InstructionCost Cost, - SmallVectorImpl &AllSpecs, SpecMap &SM); + bool findSpecializations(Function *F, SpecMap &SM, + SmallVectorImpl &AllSpecs); bool isCandidateFunction(Function *F); @@ -186,12 +207,10 @@ /// @return The new, cloned function Function *createSpecialization(Function *F, const SpecSig &S); - /// Compute and return the cost of specializing function \p F. - InstructionCost getSpecializationCost(Function *F); - /// Compute a bonus for replacing argument \p A with constant \p C. - InstructionCost getSpecializationBonus(Argument *A, Constant *C, - const LoopInfo &LI); + void getSpecializationBonus(Argument *A, Constant *C, const LoopInfo &LI, + Cost &Latency, Cost &CodeSize, + DenseSet &Visited); /// Determine if it is possible to specialise the function for constant values /// of the formal parameter \p A. diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -5,45 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This specialises functions with constant parameters. Constant parameters -// like function pointers and constant globals are propagated to the callee by -// specializing the function. The main benefit of this pass at the moment is -// that indirect calls are transformed into direct calls, which provides inline -// opportunities that the inliner would not have been able to achieve. That's -// why function specialisation is run before the inliner in the optimisation -// pipeline; that is by design. Otherwise, we would only benefit from constant -// passing, which is a valid use-case too, but hasn't been explored much in -// terms of performance uplifts, cost-model and compile-time impact. -// -// Current limitations: -// - It does not yet handle integer ranges. We do support "literal constants", -// but that's off by default under an option. -// - The cost-model could be further looked into (it mainly focuses on inlining -// benefits), -// -// Ideas: -// - With a function specialization attribute for arguments, we could have -// a direct way to steer function specialization, avoiding the cost-model, -// and thus control compile-times / code-size. -// -// Todos: -// - Specializing recursive functions relies on running the transformation a -// number of times, which is controlled by option -// `func-specialization-max-iters`. Thus, increasing this value and the -// number of iterations, will linearly increase the number of times recursive -// functions get specialized, see also the discussion in -// https://reviews.llvm.org/D106426 for details. Perhaps there is a -// compile-time friendlier way to control/limit the number of specialisations -// for recursive functions. -// - Don't transform the function if function specialization does not trigger; -// the SCCPSolver may make IR changes. -// -// References: -// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable -// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q -// -//===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/FunctionSpecialization.h" #include "llvm/ADT/Statistic.h" @@ -76,6 +37,16 @@ "The maximum number of clones allowed for a single function " "specialization")); +static cl::opt MaxUserDepth( + "funcspec-max-user-depth", cl::init(3), cl::Hidden, cl::desc( + "The maximum recursion depth on a use-def chain for calculating " + "the specialization bonus of a constant argument")); + +static cl::opt MinScore( + "funcspec-min-score", cl::init(80), cl::Hidden, cl::desc( + "Do not specialize functions with score lower than this value " + "(the ratio of latency gains over codesize increase)")); + static cl::opt MinFunctionSize( "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc( "Don't specialize functions that have less than this number of " @@ -273,17 +244,7 @@ if (!isCandidateFunction(&F)) continue; - auto Cost = getSpecializationCost(&F); - if (!Cost.isValid()) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for " - << F.getName() << "\n"); - continue; - } - - LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " - << F.getName() << " is " << Cost << "\n"); - - if (!findSpecializations(&F, Cost, AllSpecs, SM)) { + if (!findSpecializations(&F, SM, AllSpecs)) { LLVM_DEBUG( dbgs() << "FnSpecialization: No possible specializations found for " << F.getName() << "\n"); @@ -303,8 +264,8 @@ // Choose the most profitable specialisations, which fit in the module // specialization budget, which is derived from maximum number of // specializations per specialization candidate function. - auto CompareGain = [&AllSpecs](unsigned I, unsigned J) { - return AllSpecs[I].Gain > AllSpecs[J].Gain; + auto CompareScore = [&AllSpecs](unsigned I, unsigned J) { + return AllSpecs[I].Score > AllSpecs[J].Score; }; const unsigned NSpecs = std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size())); @@ -316,11 +277,11 @@ << "FnSpecialization: Specializing the " << NSpecs << " most profitable candidates.\n"); - std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareGain); + std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareScore); for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) { BestSpecs[NSpecs] = I; - std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain); - std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain); + std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore); + std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore); } } @@ -328,7 +289,7 @@ for (unsigned I = 0; I < NSpecs; ++I) { const Spec &S = AllSpecs[BestSpecs[I]]; dbgs() << "FnSpecialization: Function " << S.F->getName() - << " , gain " << S.Gain << "\n"; + << " , score " << S.Score << "\n"; for (const ArgInfo &Arg : S.Sig.Args) dbgs() << "FnSpecialization: FormalArg = " << Arg.Formal->getNameOrAsOperand() @@ -383,24 +344,6 @@ FullySpecialized.clear(); } -// Compute the code metrics for function \p F. -CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) { - auto I = FunctionMetrics.insert({F, CodeMetrics()}); - CodeMetrics &Metrics = I.first->second; - if (I.second) { - // The code metrics were not cached. - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); - for (BasicBlock &BB : *F) - Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); - - LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function " - << F->getName() << " is " << Metrics.NumInsts - << " instructions\n"); - } - return Metrics; -} - /// Clone the function \p F and remove the ssa_copy intrinsics added by /// the SCCPSolver in the cloned version. static Function *cloneCandidateFunction(Function *F) { @@ -410,13 +353,26 @@ return Clone; } -bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost, - SmallVectorImpl &AllSpecs, - SpecMap &SM) { - // A mapping from a specialisation signature to the index of the respective - // entry in the all specialisation array. Used to ensure uniqueness of - // specialisations. - DenseMap UM; +bool FunctionSpecializer::findSpecializations(Function *F, SpecMap &SM, + SmallVectorImpl &AllSpecs) { + // Analyze the function if not done yet. + auto [It, Inserted] = FunctionMetrics.try_emplace(F, CodeMetrics()); + CodeMetrics &Metrics = It->second; + if (Inserted) { + // The code metrics were not cached. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues); + for (BasicBlock &BB : *F) + Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues); + } + // If the code metrics reveal that we shouldn't duplicate the function, we + // shouldn't specialize it. Set the specialization cost to Invalid. + // Or if the lines of codes implies that this function is easy to get + // inlined so that we shouldn't specialize it. + if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || + (!ForceSpecialization && !F->hasFnAttribute(Attribute::NoInline) && + Metrics.NumInsts < MinFunctionSize)) + return false; // Get a list of interesting arguments. SmallVector Args; @@ -427,6 +383,14 @@ if (Args.empty()) return false; + // A mapping from a specialisation signature to the index of the respective + // entry in the all specialisation array. Used to ensure uniqueness of + // specialisations. + DenseMap UM; + + // LoopInfo is required for the bonus estimation of each argument's users. + const LoopInfo &LI = Solver.getLoopInfo(*F); + bool Found = false; for (User *U : F->users()) { if (!isa(U) && !isa(U)) @@ -477,17 +441,26 @@ AllSpecs[Index].CallSites.push_back(&CS); } else { // Calculate the specialisation gain. - InstructionCost Gain = 0 - Cost; + Cost Latency = 0; + Cost CodeSize = Metrics.NumInsts + + Metrics.NumInlineCandidates * MinFunctionSize; + DenseSet Visited; for (ArgInfo &A : S.Args) - Gain += - getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F)); - + getSpecializationBonus(A.Formal, A.Actual, LI, Latency, CodeSize, + Visited); + assert (CodeSize >= 0 && + "The code size bonus cannot be larger than the function"); + Cost Score = Latency / (CodeSize + 1); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Score {Latency " << Latency + << ", CodeSize " << CodeSize << "} = " << Score + << "\n"); // Discard unprofitable specialisations. - if (!ForceSpecialization && Gain <= 0) + if (!ForceSpecialization && Score < MinScore) continue; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, Gain); + auto &Spec = AllSpecs.emplace_back(F, S, Score); if (CS.getFunction() != F) Spec.CallSites.push_back(&CS); const unsigned Index = AllSpecs.size() - 1; @@ -542,67 +515,72 @@ // Mark all the specialized functions Specializations.insert(Clone); - ++NumSpecsCreated; - return Clone; -} + // Update the cost model. + for (const ArgInfo &A : S.Args) + ++NumSpecs[A.Formal]; -/// Compute and return the cost of specializing function \p F. -InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) { - CodeMetrics &Metrics = analyzeFunction(F); - // If the code metrics reveal that we shouldn't duplicate the function, we - // shouldn't specialize it. Set the specialization cost to Invalid. - // Or if the lines of codes implies that this function is easy to get - // inlined so that we shouldn't specialize it. - if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() || - (!ForceSpecialization && !F->hasFnAttribute(Attribute::NoInline) && - Metrics.NumInsts < MinFunctionSize)) - return InstructionCost::getInvalid(); + ++NumSpecsCreated; - // Otherwise, set the specialization cost to be the cost of all the - // instructions in the function. - return Metrics.NumInsts * InlineConstants::getInstrCost(); + return Clone; } -static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, - const LoopInfo &LI) { +static void getUserBonus(User *U, TargetTransformInfo &TTI, + const LoopInfo &LI, unsigned UserDepth, + Cost &Latency, Cost &CodeSize, + DenseSet &Visited) { + // If the user is not an instruction we do not know how to evaluate. + // If we have already visited this user there's nothing to do. + // If the user is deep in the use-def chain then stop traversing. + auto [It, Inserted] = Visited.insert(U); auto *I = dyn_cast_or_null(U); // If not an instruction we do not know how to evaluate. // Keep minimum possible cost for now so that it doesnt affect // specialization. - if (!I) - return std::numeric_limits::min(); - - InstructionCost Cost = - TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency); - - // Increase the cost if it is inside the loop. - unsigned LoopDepth = LI.getLoopDepth(I->getParent()); - Cost *= std::pow((double)AvgLoopIters, LoopDepth); - + if (!I || !Inserted || UserDepth > MaxUserDepth) + return; + + // Ignore SSA copies. + auto *II = dyn_cast(I); + bool IsSSACopy = II && II->getIntrinsicID() == Intrinsic::ssa_copy; + + if (!IsSSACopy) { + // Increase the Latency if inside a loop and modulate by UserDepth. + unsigned LoopDepth = LI.getLoopDepth(I->getParent()); + Latency += (TTI.getInstructionCost(U, TargetTransformInfo::TCK_Latency) * + std::pow((double)AvgLoopIters, LoopDepth)) / UserDepth; + CodeSize += TTI.getInstructionCost(U, TargetTransformInfo::TCK_CodeSize); + + LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus { Latency = " << Latency + << ", CodeSize = " << CodeSize << "} after user " << *U + << "\n"); + ++UserDepth; + } // Traverse recursively if there are more uses. - // TODO: Any other instructions to be added here? - if (I->mayReadFromMemory() || I->isCast()) - for (auto *User : I->users()) - Cost += getUserBonus(User, TTI, LI); - - return Cost; + for (User *User : I->users()) + getUserBonus(User, TTI, LI, UserDepth, Latency, CodeSize, Visited); } /// Compute a bonus for replacing argument \p A with constant \p C. -InstructionCost -FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, - const LoopInfo &LI) { +void FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C, + const LoopInfo &LI, Cost &Latency, Cost &CodeSize, + DenseSet &Visited) { Function *F = A->getParent(); auto &TTI = (GetTTI)(*F); LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: " << C->getNameOrAsOperand() << "\n"); + // The more we specialize an argument, the more expensive it gets. + CodeSize *= NumSpecs[A] + 1; - InstructionCost TotalCost = 0; - for (auto *U : A->users()) { - TotalCost += getUserBonus(U, TTI, LI); - LLVM_DEBUG(dbgs() << "FnSpecialization: User cost "; - TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n"); + for (User *U : A->users()) { + Cost UserLatency = 0; + Cost UserSize = 0; + getUserBonus(U, TTI, LI, /*UserDepth=*/1, UserLatency, UserSize, Visited); + LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus { Latency = " + << UserLatency << ", CodeSize = " << UserSize + << "} for user " << *U << "\n"); + Latency += UserLatency; + CodeSize -= UserSize; } // The below heuristic is only concerned with exposing inlining @@ -610,7 +588,7 @@ // (potentially casted) function pointer, give up. Function *CalledFunction = dyn_cast(C->stripPointerCasts()); if (!CalledFunction) - return TotalCost; + return; // Get TTI for the called function (used for the inline cost). auto &CalleeTTI = (GetTTI)(*CalledFunction); @@ -653,9 +631,8 @@ LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus << " for user " << *U << "\n"); + Latency += Bonus; } - - return TotalCost + Bonus; } static bool isValidArgumentType(Type *Ty) { @@ -745,7 +722,7 @@ // Find the best matching specialisation. const Spec *BestSpec = nullptr; for (const Spec &S : make_range(Begin, End)) { - if (!S.Clone || (BestSpec && S.Gain <= BestSpec->Gain)) + if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score)) continue; if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) { diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=true -funcspec-min-function-size=10 -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=true -force-specialization -S < %s | FileCheck %s ; Check that the literal constant parameter could be specialized. ; CHECK: @foo.1( diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp" -funcspec-avg-loop-iters=5 -funcspec-min-function-size=10 -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -funcspec-avg-loop-iters=11 -funcspec-min-function-size=10 -S < %s | FileCheck %s ; Check that the loop depth results in a larger specialization bonus. ; CHECK: @foo.1( diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll --- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll +++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -force-specialization < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll @@ -0,0 +1,64 @@ +; REQUIRES: asserts +; RUN: opt -passes="ipsccp,inline,instcombine,simplifycfg" -S \ +; RUN: -funcspec-min-function-size=23 -funcspec-max-iters=100 \ +; RUN: -debug-only=function-specialization < %s 2>&1 | FileCheck %s + +; Make sure the number of specializations created are not +; linear to the number of iterations (funcspec-max-iters). + +; CHECK: FnSpecialization: Created 10 specializations in module + +@Global = internal constant i32 1, align 4 + +define internal void @recursiveFunc(ptr readonly %arg) { + %temp = alloca i32, align 4 + %arg.load = load i32, ptr %arg, align 4 + %arg.cmp = icmp slt i32 %arg.load, 10000 + br i1 %arg.cmp, label %loop1, label %ret.block + +loop1: + br label %loop2 + +loop2: + br label %loop3 + +loop3: + br label %loop4 + +loop4: + br label %block6 + +block6: + call void @print_val(i32 %arg.load) + %arg.add = add nsw i32 %arg.load, 1 + store i32 %arg.add, ptr %temp, align 4 + call void @recursiveFunc(ptr %temp) + br label %loop4.end + +loop4.end: + %exit_cond1 = call i1 @exit_cond() + br i1 %exit_cond1, label %loop4, label %loop3.end + +loop3.end: + %exit_cond2 = call i1 @exit_cond() + br i1 %exit_cond2, label %loop3, label %loop2.end + +loop2.end: + %exit_cond3 = call i1 @exit_cond() + br i1 %exit_cond3, label %loop2, label %loop1.end + +loop1.end: + %exit_cond4 = call i1 @exit_cond() + br i1 %exit_cond4, label %loop1, label %ret.block + +ret.block: + ret void +} + +define i32 @main() { + call void @recursiveFunc(ptr @Global) + ret i32 0 +} + +declare dso_local void @print_val(i32) +declare dso_local i1 @exit_cond() diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll --- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll +++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp" -funcspec-min-function-size=3 -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { entry: