diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -29,13 +29,9 @@
 //
 // Todos:
 // - Specializing recursive functions relies on running the transformation a
-//   number of times, which is controlled by option
-//   `func-specialization-max-iters`. Thus, increasing this value and the
-//   number of iterations, will linearly increase the number of times recursive
-//   functions get specialized, see also the discussion in
-//   https://reviews.llvm.org/D106426 for details. Perhaps there is a
-//   compile-time friendlier way to control/limit the number of specialisations
-//   for recursive functions.
+//   number of times, which is controlled by the option `funcspec-max-iters`.
+//   Perhaps there is a compile-time friendlier way for such specializations.
+//   (see also the discussion in https://reviews.llvm.org/D106426 for details)
 // - Don't transform the function if function specialization does not trigger;
 //   the SCCPSolver may make IR changes.
 //
@@ -60,6 +56,15 @@
 using namespace llvm;
 
 namespace llvm {
+// Map of potential specializations for each function. The FunctionSpecializer
+// keeps the discovered specialisation opportunities for the module in a single
+// vector, where the specialisations of each function form a contiguous range.
+// This map's value is the beginning and the end of that range.
+using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
+
+// Just a shorter abreviation.
+using Cost = InstructionCost;
+
 // Specialization signature, used to uniquely designate a specialization within
 // a function.
 struct SpecSig {
@@ -95,23 +100,17 @@
   SpecSig Sig;
 
   // Profitability of the specialization.
-  InstructionCost Gain;
+  Cost Score;
 
   // List of call sites, matching this specialization.
   SmallVector<CallBase *> CallSites;
 
-  Spec(Function *F, const SpecSig &S, InstructionCost G)
-      : F(F), Sig(S), Gain(G) {}
-  Spec(Function *F, const SpecSig &&S, InstructionCost G)
-      : F(F), Sig(S), Gain(G) {}
+  Spec(Function *F, const SpecSig &S, Cost Score)
+      : F(F), Sig(S), Score(Score) {}
+  Spec(Function *F, const SpecSig &&S, Cost Score)
+      : F(F), Sig(S), Score(Score) {}
 };
 
-// Map of potential specializations for each function. The FunctionSpecializer
-// keeps the discovered specialisation opportunities for the module in a single
-// vector, where the specialisations of each function form a contiguous range.
-// This map's value is the beginning and the end of that range.
-using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
-
 class FunctionSpecializer {
 
   /// The IPSCCP Solver.
@@ -130,6 +129,7 @@
   SmallPtrSet<Function *, 32> Specializations;
   SmallPtrSet<Function *, 32> FullySpecialized;
   DenseMap<Function *, CodeMetrics> FunctionMetrics;
+  DenseMap<Argument *, unsigned> NumSpecs;
 
 public:
   FunctionSpecializer(
@@ -165,18 +165,13 @@
   /// Remove any ssa_copy intrinsics that may have been introduced.
   void cleanUpSSA();
 
-  // Compute the code metrics for function \p F.
-  CodeMetrics &analyzeFunction(Function *F);
-
   /// @brief  Find potential specialization opportunities.
   /// @param F Function to specialize
-  /// @param Cost Cost of specializing a function. Final gain is this cost
-  /// minus benefit
   /// @param AllSpecs A vector to add potential specializations to.
   /// @param SM  A map for a function's specialisation range
   /// @return True, if any potential specializations were found
-  bool findSpecializations(Function *F, InstructionCost Cost,
-                           SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
+  bool findSpecializations(Function *F, SpecMap &SM,
+                           SmallVectorImpl<Spec> &AllSpecs);
 
   bool isCandidateFunction(Function *F);
 
@@ -186,12 +181,10 @@
   /// @return The new, cloned function
   Function *createSpecialization(Function *F, const SpecSig &S);
 
-  /// Compute and return the cost of specializing function \p F.
-  InstructionCost getSpecializationCost(Function *F);
-
   /// Compute a bonus for replacing argument \p A with constant \p C.
-  InstructionCost getSpecializationBonus(Argument *A, Constant *C,
-                                         const LoopInfo &LI);
+  void getSpecializationBonus(Argument *A, Constant *C, const LoopInfo &LI,
+                              Cost &Latency, Cost &CodeSize,
+                              DenseSet<User *> &Visited);
 
   /// Determine if it is possible to specialise the function for constant values
   /// of the formal parameter \p A.
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -5,45 +5,6 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-//
-// This specialises functions with constant parameters. Constant parameters
-// like function pointers and constant globals are propagated to the callee by
-// specializing the function. The main benefit of this pass at the moment is
-// that indirect calls are transformed into direct calls, which provides inline
-// opportunities that the inliner would not have been able to achieve. That's
-// why function specialisation is run before the inliner in the optimisation
-// pipeline; that is by design. Otherwise, we would only benefit from constant
-// passing, which is a valid use-case too, but hasn't been explored much in
-// terms of performance uplifts, cost-model and compile-time impact.
-//
-// Current limitations:
-// - It does not yet handle integer ranges. We do support "literal constants",
-//   but that's off by default under an option.
-// - The cost-model could be further looked into (it mainly focuses on inlining
-//   benefits),
-//
-// Ideas:
-// - With a function specialization attribute for arguments, we could have
-//   a direct way to steer function specialization, avoiding the cost-model,
-//   and thus control compile-times / code-size.
-//
-// Todos:
-// - Specializing recursive functions relies on running the transformation a
-//   number of times, which is controlled by option
-//   `func-specialization-max-iters`. Thus, increasing this value and the
-//   number of iterations, will linearly increase the number of times recursive
-//   functions get specialized, see also the discussion in
-//   https://reviews.llvm.org/D106426 for details. Perhaps there is a
-//   compile-time friendlier way to control/limit the number of specialisations
-//   for recursive functions.
-// - Don't transform the function if function specialization does not trigger;
-//   the SCCPSolver may make IR changes.
-//
-// References:
-// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
-//   it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
-//
-//===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/FunctionSpecialization.h"
 #include "llvm/ADT/Statistic.h"
@@ -76,6 +37,16 @@
     "The maximum number of clones allowed for a single function "
     "specialization"));
 
+static cl::opt<unsigned> MaxUserDepth(
+    "funcspec-max-user-depth", cl::init(3), cl::Hidden, cl::desc(
+    "The maximum recursion depth on a use-def chain for calculating "
+    "the specialization bonus of a constant argument"));
+
+static cl::opt<unsigned> MinScore(
+    "funcspec-min-score", cl::init(80), cl::Hidden, cl::desc(
+    "Do not specialize functions with score lower than this value "
+    "(the ratio of latency gains over codesize increase)"));
+
 static cl::opt<unsigned> MinFunctionSize(
     "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc(
     "Don't specialize functions that have less than this number of "
@@ -273,17 +244,7 @@
     if (!isCandidateFunction(&F))
       continue;
 
-    auto Cost = getSpecializationCost(&F);
-    if (!Cost.isValid()) {
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for "
-                        << F.getName() << "\n");
-      continue;
-    }
-
-    LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
-                      << F.getName() << " is " << Cost << "\n");
-
-    if (!findSpecializations(&F, Cost, AllSpecs, SM)) {
+    if (!findSpecializations(&F, SM, AllSpecs)) {
       LLVM_DEBUG(
           dbgs() << "FnSpecialization: No possible specializations found for "
                  << F.getName() << "\n");
@@ -303,8 +264,8 @@
   // Choose the most profitable specialisations, which fit in the module
   // specialization budget, which is derived from maximum number of
   // specializations per specialization candidate function.
-  auto CompareGain = [&AllSpecs](unsigned I, unsigned J) {
-    return AllSpecs[I].Gain > AllSpecs[J].Gain;
+  auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {
+    return AllSpecs[I].Score > AllSpecs[J].Score;
   };
   const unsigned NSpecs =
       std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size()));
@@ -316,11 +277,11 @@
                       << "FnSpecialization: Specializing the "
                       << NSpecs
                       << " most profitable candidates.\n");
-    std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareGain);
+    std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareScore);
     for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) {
       BestSpecs[NSpecs] = I;
-      std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
-      std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
+      std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);
+      std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);
     }
   }
 
@@ -328,7 +289,7 @@
              for (unsigned I = 0; I < NSpecs; ++I) {
                const Spec &S = AllSpecs[BestSpecs[I]];
                dbgs() << "FnSpecialization: Function " << S.F->getName()
-                      << " , gain " << S.Gain << "\n";
+                      << " , score " << S.Score << "\n";
                for (const ArgInfo &Arg : S.Sig.Args)
                  dbgs() << "FnSpecialization:   FormalArg = "
                         << Arg.Formal->getNameOrAsOperand()
@@ -379,24 +340,6 @@
   FullySpecialized.clear();
 }
 
-// Compute the code metrics for function \p F.
-CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) {
-  auto I = FunctionMetrics.insert({F, CodeMetrics()});
-  CodeMetrics &Metrics = I.first->second;
-  if (I.second) {
-    // The code metrics were not cached.
-    SmallPtrSet<const Value *, 32> EphValues;
-    CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
-    for (BasicBlock &BB : *F)
-      Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
-
-    LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
-                      << F->getName() << " is " << Metrics.NumInsts
-                      << " instructions\n");
-  }
-  return Metrics;
-}
-
 /// Clone the function \p F and remove the ssa_copy intrinsics added by
 /// the SCCPSolver in the cloned version.
 static Function *cloneCandidateFunction(Function *F) {
@@ -406,13 +349,26 @@
   return Clone;
 }
 
-bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
-                                              SmallVectorImpl<Spec> &AllSpecs,
-                                              SpecMap &SM) {
-  // A mapping from a specialisation signature to the index of the respective
-  // entry in the all specialisation array. Used to ensure uniqueness of
-  // specialisations.
-  DenseMap<SpecSig, unsigned> UM;
+bool FunctionSpecializer::findSpecializations(Function *F, SpecMap &SM,
+                                              SmallVectorImpl<Spec> &AllSpecs) {
+  // Analyze the function if not done yet.
+  auto [It, Inserted] = FunctionMetrics.try_emplace(F, CodeMetrics());
+  CodeMetrics &Metrics = It->second;
+  if (Inserted) {
+    // The code metrics were not cached.
+    SmallPtrSet<const Value *, 32> EphValues;
+    CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
+    for (BasicBlock &BB : *F)
+      Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
+  }
+  // If the code metrics reveal that we shouldn't duplicate the function, we
+  // shouldn't specialize it. Set the specialization cost to Invalid.
+  // Or if the lines of codes implies that this function is easy to get
+  // inlined so that we shouldn't specialize it.
+  if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
+      (!ForceSpecialization && !F->hasFnAttribute(Attribute::NoInline) &&
+       Metrics.NumInsts < MinFunctionSize))
+    return false;
 
   // Get a list of interesting arguments.
   SmallVector<Argument *> Args;
@@ -423,6 +379,14 @@
   if (Args.empty())
     return false;
 
+  // A mapping from a specialisation signature to the index of the respective
+  // entry in the all specialisation array. Used to ensure uniqueness of
+  // specialisations.
+  DenseMap<SpecSig, unsigned> UM;
+
+  // LoopInfo is required for the bonus estimation of each argument's users.
+  const LoopInfo &LI = Solver.getLoopInfo(*F);
+
   bool Found = false;
   for (User *U : F->users()) {
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
@@ -473,17 +437,26 @@
       AllSpecs[Index].CallSites.push_back(&CS);
     } else {
       // Calculate the specialisation gain.
-      InstructionCost Gain = 0 - Cost;
+      Cost Latency = 0;
+      Cost CodeSize = Metrics.NumInsts +
+                      Metrics.NumInlineCandidates * MinFunctionSize;
+      DenseSet<User *> Visited;
       for (ArgInfo &A : S.Args)
-        Gain +=
-            getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F));
-
+        getSpecializationBonus(A.Formal, A.Actual, LI, Latency, CodeSize,
+                               Visited);
+      assert (CodeSize >= 0 &&
+              "The code size bonus cannot be larger than the function");
+      Cost Score = Latency / (CodeSize + 1);
+
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Score {Latency " << Latency
+                        << ", CodeSize " << CodeSize << "} = " << Score
+                        << "\n");
       // Discard unprofitable specialisations.
-      if (!ForceSpecialization && Gain <= 0)
+      if (!ForceSpecialization && Score < MinScore)
         continue;
 
       // Create a new specialisation entry.
-      auto &Spec = AllSpecs.emplace_back(F, S, Gain);
+      auto &Spec = AllSpecs.emplace_back(F, S, Score);
       if (CS.getFunction() != F)
         Spec.CallSites.push_back(&CS);
       const unsigned Index = AllSpecs.size() - 1;
@@ -543,67 +516,72 @@
 
   // Mark all the specialized functions
   Specializations.insert(Clone);
-  ++NumSpecsCreated;
 
-  return Clone;
-}
+  // Update the cost model.
+  for (const ArgInfo &A : S.Args)
+    ++NumSpecs[A.Formal];
 
-/// Compute and return the cost of specializing function \p F.
-InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) {
-  CodeMetrics &Metrics = analyzeFunction(F);
-  // If the code metrics reveal that we shouldn't duplicate the function, we
-  // shouldn't specialize it. Set the specialization cost to Invalid.
-  // Or if the lines of codes implies that this function is easy to get
-  // inlined so that we shouldn't specialize it.
-  if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
-      (!ForceSpecialization && !F->hasFnAttribute(Attribute::NoInline) &&
-       Metrics.NumInsts < MinFunctionSize))
-    return InstructionCost::getInvalid();
+  ++NumSpecsCreated;
 
-  // Otherwise, set the specialization cost to be the cost of all the
-  // instructions in the function.
-  return Metrics.NumInsts * InlineConstants::getInstrCost();
+  return Clone;
 }
 
-static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI,
-                                    const LoopInfo &LI) {
+static void getUserBonus(User *U, TargetTransformInfo &TTI,
+                         const LoopInfo &LI, unsigned UserDepth,
+                         Cost &Latency, Cost &CodeSize,
+                         DenseSet<User *> &Visited) {
+  // If the user is not an instruction we do not know how to evaluate.
+  // If we have already visited this user there's nothing to do.
+  // If the user is deep in the use-def chain then stop traversing.
+  auto [It, Inserted] = Visited.insert(U);
   auto *I = dyn_cast_or_null<Instruction>(U);
   // If not an instruction we do not know how to evaluate.
   // Keep minimum possible cost for now so that it doesnt affect
   // specialization.
-  if (!I)
-    return std::numeric_limits<unsigned>::min();
-
-  InstructionCost Cost =
-      TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency);
-
-  // Increase the cost if it is inside the loop.
-  unsigned LoopDepth = LI.getLoopDepth(I->getParent());
-  Cost *= std::pow((double)AvgLoopIters, LoopDepth);
-
+  if (!I || !Inserted || UserDepth > MaxUserDepth)
+    return;
+
+  // Ignore SSA copies.
+  auto *II = dyn_cast<IntrinsicInst>(I);
+  bool IsSSACopy = II && II->getIntrinsicID() == Intrinsic::ssa_copy;
+
+  if (!IsSSACopy) {
+    // Increase the Latency if inside a loop and modulate by UserDepth.
+    unsigned LoopDepth = LI.getLoopDepth(I->getParent());
+    Latency += (TTI.getInstructionCost(U, TargetTransformInfo::TCK_Latency) *
+                std::pow((double)AvgLoopIters, LoopDepth)) / UserDepth;
+    CodeSize += TTI.getInstructionCost(U, TargetTransformInfo::TCK_CodeSize);
+
+    LLVM_DEBUG(dbgs() << "FnSpecialization:     Bonus { Latency = " << Latency
+                      << ", CodeSize = " << CodeSize << "} after user " << *U
+                      << "\n");
+    ++UserDepth;
+  }
   // Traverse recursively if there are more uses.
-  // TODO: Any other instructions to be added here?
-  if (I->mayReadFromMemory() || I->isCast())
-    for (auto *User : I->users())
-      Cost += getUserBonus(User, TTI, LI);
-
-  return Cost;
+  for (User *User : I->users())
+    getUserBonus(User, TTI, LI, UserDepth, Latency, CodeSize, Visited);
 }
 
 /// Compute a bonus for replacing argument \p A with constant \p C.
-InstructionCost
-FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
-                                            const LoopInfo &LI) {
+void FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
+                              const LoopInfo &LI, Cost &Latency, Cost &CodeSize,
+                              DenseSet<User *> &Visited) {
   Function *F = A->getParent();
   auto &TTI = (GetTTI)(*F);
   LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
                     << C->getNameOrAsOperand() << "\n");
+  // The more we specialize an argument, the more expensive it gets.
+  CodeSize *= NumSpecs[A] + 1;
 
-  InstructionCost TotalCost = 0;
-  for (auto *U : A->users()) {
-    TotalCost += getUserBonus(U, TTI, LI);
-    LLVM_DEBUG(dbgs() << "FnSpecialization:   User cost ";
-               TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
+  for (User *U : A->users()) {
+    Cost UserLatency = 0;
+    Cost UserSize = 0;
+    getUserBonus(U, TTI, LI, /*UserDepth=*/1, UserLatency, UserSize, Visited);
+    LLVM_DEBUG(dbgs() << "FnSpecialization:   Accumulated bonus { Latency = "
+                      << UserLatency << ", CodeSize = " << UserSize
+                      << "} for user " << *U << "\n");
+    Latency += UserLatency;
+    CodeSize -= UserSize;
   }
 
   // The below heuristic is only concerned with exposing inlining
@@ -611,7 +589,7 @@
   // (potentially casted) function pointer, give up.
   Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
   if (!CalledFunction)
-    return TotalCost;
+    return;
 
   // Get TTI for the called function (used for the inline cost).
   auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -654,9 +632,8 @@
 
     LLVM_DEBUG(dbgs() << "FnSpecialization:   Inlining bonus " << Bonus
                       << " for user " << *U << "\n");
+    Latency += Bonus;
   }
-
-  return TotalCost + Bonus;
 }
 
 static bool isValidArgumentType(Type *Ty) {
@@ -739,7 +716,7 @@
     // Find the best matching specialisation.
     const Spec *BestSpec = nullptr;
     for (const Spec &S : make_range(Begin, End)) {
-      if (!S.Clone || (BestSpec && S.Gain <= BestSpec->Gain))
+      if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score))
         continue;
 
       if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=true -funcspec-min-function-size=10 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=true -force-specialization -S < %s | FileCheck %s
 
 ; Check that the literal constant parameter could be specialized.
 ; CHECK: @foo.1(
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=5 -funcspec-min-function-size=10 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-avg-loop-iters=11 -funcspec-min-function-size=10 -S < %s | FileCheck %s
 
 ; Check that the loop depth results in a larger specialization bonus.
 ; CHECK: @foo.1(
diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
--- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
 define dso_local i32 @p0(i32 noundef %x) {
 entry:
   %add = add nsw i32 %x, 1
diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll
@@ -0,0 +1,64 @@
+; REQUIRES: asserts
+; RUN: opt -passes="ipsccp<func-spec>,inline,instcombine,simplifycfg" -S \
+; RUN:     -funcspec-min-function-size=23 -funcspec-max-iters=100 \
+; RUN:     -debug-only=function-specialization < %s 2>&1 | FileCheck %s
+
+; Make sure the number of specializations created are not
+; linear to the number of iterations (funcspec-max-iters).
+
+; CHECK: FnSpecialization: Created 8 specializations in module
+
+@Global = internal constant i32 1, align 4
+
+define internal void @recursiveFunc(ptr readonly %arg) {
+  %temp = alloca i32, align 4
+  %arg.load = load i32, ptr %arg, align 4
+  %arg.cmp = icmp slt i32 %arg.load, 10000
+  br i1 %arg.cmp, label %loop1, label %ret.block
+
+loop1:
+  br label %loop2
+
+loop2:
+  br label %loop3
+
+loop3:
+  br label %loop4
+
+loop4:
+  br label %block6
+
+block6:
+  call void @print_val(i32 %arg.load)
+  %arg.add = add nsw i32 %arg.load, 1
+  store i32 %arg.add, ptr %temp, align 4
+  call void @recursiveFunc(ptr %temp)
+  br label %loop4.end
+
+loop4.end:
+  %exit_cond1 = call i1 @exit_cond()
+  br i1 %exit_cond1, label %loop4, label %loop3.end
+
+loop3.end:
+  %exit_cond2 = call i1 @exit_cond()
+  br i1 %exit_cond2, label %loop3, label %loop2.end
+
+loop2.end:
+  %exit_cond3 = call i1 @exit_cond()
+  br i1 %exit_cond3, label %loop2, label %loop1.end
+
+loop1.end:
+  %exit_cond4 = call i1 @exit_cond()
+  br i1 %exit_cond4, label %loop1, label %ret.block
+
+ret.block:
+  ret void
+}
+
+define i32 @main() {
+  call void @recursiveFunc(ptr @Global)
+  ret i32 0
+}
+
+declare dso_local void @print_val(i32)
+declare dso_local i1 @exit_cond()
diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
--- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
 
 define i64 @main(i64 %x, i1 %flag) {
 entry: