diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -60,18 +60,57 @@
 using namespace llvm;
 
 namespace llvm {
-// Bookkeeping struct to pass data from the analysis and profitability phase
-// to the actual transform helper functions.
-struct SpecializationInfo {
-  SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
-  InstructionCost Gain;         // Profitability: Gain = Bonus - Cost.
-  Function *Clone;              // The definition of the specialized function.
+// Specialization signature, used to uniquely designate a specialization within
+// a function.
+struct SpecSig {
+  // Hashing support, used to distinguish between ordinary, empty, or tombstone
+  // keys.
+  unsigned Key = 0;
+  SmallVector<ArgInfo, 4> Args;
+
+  bool operator==(const SpecSig &Other) const {
+    if (Key != Other.Key || Args.size() != Other.Args.size())
+      return false;
+    for (size_t I = 0; I < Args.size(); ++I)
+      if (Args[I] != Other.Args[I])
+        return false;
+    return true;
+  }
+
+  friend hash_code hash_value(const SpecSig &S) {
+    return hash_combine(hash_value(S.Key),
+                        hash_combine_range(S.Args.begin(), S.Args.end()));
+  }
+};
+
+// Specialization instance.
+struct Spec {
+  // Original function.
+  Function *F;
+
+  // Cloned function, a specialized version of the original one.
+  Function *Clone = nullptr;
+
+  // Specialization signature.
+  SpecSig Sig;
+
+  // Profitability of the specialization.
+  InstructionCost Gain;
+
+  // List of call sites, matching this specialization.
+  SmallVector<CallBase *> CallSites;
+
+  Spec(Function *F, const SpecSig &S, InstructionCost G)
+      : F(F), Sig(S), Gain(G) {}
+  Spec(Function *F, const SpecSig &&S, InstructionCost G)
+      : F(F), Sig(S), Gain(G) {}
 };
 
-using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>;
-// We are using MapVector because it guarantees deterministic iteration
-// order across executions.
-using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>;
+// Map of potential specializations for each function. The FunctionSpecializer
+// keeps the discovered specialisation opportunities for the module in a single
+// vector, where the specialisations of each function form a contiguous range.
+// This map's value is the beginning and the end of that range.
+using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
 
 class FunctionSpecializer {
 
@@ -137,18 +176,23 @@
   // Compute the code metrics for function \p F.
   CodeMetrics &analyzeFunction(Function *F);
 
-  /// This function decides whether it's worthwhile to specialize function
-  /// \p F based on the known constant values its arguments can take on. It
-  /// only discovers potential specialization opportunities without actually
-  /// applying them.
-  ///
-  /// \returns true if any specializations have been found.
+  /// @brief  Find potential specialization opportunities.
+  /// @param F Function to specialize
+  /// @param Cost Cost of specializing a function. Final gain is this cost
+  /// minus benefit
+  /// @param AllSpecs A vector to add potential specializations to.
+  /// @param SM  A map for a function's specialisation range
+  /// @return True, if any potential specializations were found
   bool findSpecializations(Function *F, InstructionCost Cost,
-                           SmallVectorImpl<CallSpecBinding> &WorkList);
+                           SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
 
   bool isCandidateFunction(Function *F);
 
-  Function *createSpecialization(Function *F, CallSpecBinding &Specialization);
+  /// @brief Create a specialization of \p F and prime the SCCPSolver
+  /// @param F Function to specialize
+  /// @param S Which specialization to create
+  /// @return The new, cloned function
+  Function *createSpecialization(Function *F, const SpecSig &S);
 
   /// Compute and return the cost of specializing function \p F.
   InstructionCost getSpecializationCost(Function *F);
@@ -165,9 +209,11 @@
   /// have a constant value. Return that constant.
   Constant *getCandidateConstant(Value *V);
 
-  /// Redirects callsites of function \p F to its specialized copies.
-  void updateCallSites(Function *F,
-                       SmallVectorImpl<CallSpecBinding> &Specializations);
+  /// @brief Find and update calls to \p F, which match a specialization
+  /// @param F Orginal function
+  /// @param Begin Start of a range of possibly matching specialisations
+  /// @param End End of a range (exclusive) of possibly matching specialisations
+  void updateCallSites(Function *F, const Spec *Begin, const Spec *End);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -52,7 +52,17 @@
   Argument *Formal; // The Formal argument being analysed.
   Constant *Actual; // A corresponding actual constant argument.
 
-  ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A){};
+  ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A) {}
+
+  bool operator==(const ArgInfo &Other) const {
+    return Formal == Other.Formal && Actual == Other.Actual;
+  }
+
+  bool operator!=(const ArgInfo &Other) const { return !(*this == Other); }
+
+  friend hash_code hash_value(const ArgInfo &A) {
+    return hash_combine(hash_value(A.Formal), hash_value(A.Actual));
+  }
 };
 
 class SCCPInstVisitor;
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -234,51 +234,132 @@
     removeSSACopy(*F);
 }
 
+
+template <> struct llvm::DenseMapInfo<SpecSig> {
+  static inline SpecSig getEmptyKey() { return {~0U, {}}; }
+
+  static inline SpecSig getTombstoneKey() { return {~1U, {}}; }
+
+  static unsigned getHashValue(const SpecSig &S) {
+    return static_cast<unsigned>(hash_value(S));
+  }
+
+  static bool isEqual(const SpecSig &LHS, const SpecSig &RHS) {
+    return LHS == RHS;
+  }
+};
+
 /// Attempt to specialize functions in the module to enable constant
 /// propagation across function boundaries.
 ///
 /// \returns true if at least one function is specialized.
 bool FunctionSpecializer::run() {
-  bool Changed = false;
-
+  // Find possible specializations for each function.
+  SpecMap SM;
+  SmallVector<Spec, 32> AllSpecs;
+  unsigned NumCandidates = 0;
   for (Function &F : M) {
     if (!isCandidateFunction(&F))
       continue;
 
     auto Cost = getSpecializationCost(&F);
     if (!Cost.isValid()) {
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost.\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for "
+                        << F.getName() << "\n");
       continue;
     }
 
     LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
                       << F.getName() << " is " << Cost << "\n");
 
-    SmallVector<CallSpecBinding, 8> Specializations;
-    if (!findSpecializations(&F, Cost, Specializations)) {
+    if (!findSpecializations(&F, Cost, AllSpecs, SM)) {
       LLVM_DEBUG(
-          dbgs() << "FnSpecialization: No possible specializations found\n");
+          dbgs() << "FnSpecialization: No possible specializations found for "
+                 << F.getName() << "\n");
       continue;
     }
 
-    Changed = true;
+    ++NumCandidates;
+  }
+
+  if (!NumCandidates) {
+    LLVM_DEBUG(
+        dbgs()
+        << "FnSpecialization: No possible specializations found in module\n");
+    return false;
+  }
+
+  // Choose the most profitable specialisations, which fit in the module
+  // specialization budget, which is derived from maximum number of
+  // specializations per specialization candidate function.
+  auto CompareGain = [&AllSpecs](unsigned I, unsigned J) {
+    return AllSpecs[I].Gain > AllSpecs[J].Gain;
+  };
+  const unsigned NSpecs =
+      std::min(NumCandidates * MaxClonesThreshold, unsigned(AllSpecs.size()));
+  SmallVector<unsigned> BestSpecs(NSpecs + 1);
+  std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);
+  if (AllSpecs.size() > NSpecs) {
+    LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
+                      << "the maximum number of clones threshold.\n"
+                      << "FnSpecialization: Specializing the "
+                      << NSpecs
+                      << " most profitable candidates.\n");
+    std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareGain);
+    for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) {
+      BestSpecs[NSpecs] = I;
+      std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
+      std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "FnSpecialization: List of specializations \n";
+             for (unsigned I = 0; I < NSpecs; ++I) {
+               const Spec &S = AllSpecs[BestSpecs[I]];
+               dbgs() << "FnSpecialization: Function " << S.F->getName()
+                      << " , gain " << S.Gain << "\n";
+               for (const ArgInfo &Arg : S.Sig.Args)
+                 dbgs() << "FnSpecialization:   FormalArg = "
+                        << Arg.Formal->getNameOrAsOperand()
+                        << ", ActualArg = " << Arg.Actual->getNameOrAsOperand()
+                        << "\n";
+             });
+
+  // Create the chosen specializations.
+  SmallPtrSet<Function *, 8> OriginalFuncs;
+  SmallVector<Function *> Clones;
+  for (unsigned I = 0; I < NSpecs; ++I) {
+    Spec &S = AllSpecs[BestSpecs[I]];
+    S.Clone = createSpecialization(S.F, S.Sig);
+
+    // Update the known call sites to call the clone.
+    for (CallBase *Call : S.CallSites) {
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
+                        << " to call " << S.Clone->getName() << "\n");
+      Call->setCalledFunction(S.Clone);
+    }
+
+    Clones.push_back(S.Clone);
+    OriginalFuncs.insert(S.F);
+  }
 
-    SmallVector<Function *, 4> Clones;
-    for (CallSpecBinding &Specialization : Specializations)
-      Clones.push_back(createSpecialization(&F, Specialization));
+  Solver.solveWhileResolvedUndefsIn(Clones);
 
-    Solver.solveWhileResolvedUndefsIn(Clones);
-    updateCallSites(&F, Specializations);
+  // Update the rest of the call sites - these are the recursive calls, calls
+  // to discarded specialisations and calls that may match a specialisation
+  // after the solver runs.
+  for (Function *F : OriginalFuncs) {
+    auto [Begin, End] = SM[F];
+    updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);
   }
 
   promoteConstantStackValues();
-
   LLVM_DEBUG(if (NbFunctionsSpecialized) dbgs()
              << "FnSpecialization: Specialized " << NbFunctionsSpecialized
              << " functions in module " << M.getName() << "\n");
 
   NumFuncSpecialized += NbFunctionsSpecialized;
-  return Changed;
+  return true;
 }
 
 void FunctionSpecializer::removeDeadFunctions() {
@@ -319,32 +400,30 @@
   return Clone;
 }
 
-/// This function decides whether it's worthwhile to specialize function
-/// \p F based on the known constant values its arguments can take on. It
-/// only discovers potential specialization opportunities without actually
-/// applying them.
-///
-/// \returns true if any specializations have been found.
-bool FunctionSpecializer::findSpecializations(
-    Function *F, InstructionCost Cost,
-    SmallVectorImpl<CallSpecBinding> &WorkList) {
+bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
+                                              SmallVectorImpl<Spec> &AllSpecs,
+                                              SpecMap &SM) {
+  // A mapping from a specialisation signature to the index of the respective
+  // entry in the all specialisation array. Used to ensure uniqueness of
+  // specialisations.
+  DenseMap<SpecSig, unsigned> UM;
+
   // Get a list of interesting arguments.
-  SmallVector<Argument *, 4> Args;
+  SmallVector<Argument *> Args;
   for (Argument &Arg : F->args())
     if (isArgumentInteresting(&Arg))
       Args.push_back(&Arg);
 
-  if (!Args.size())
+  if (Args.empty())
     return false;
 
-  // Find all the call sites for the function.
-  SpecializationMap Specializations;
+  bool Found = false;
   for (User *U : F->users()) {
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
       continue;
     auto &CS = *cast<CallBase>(U);
 
-    // Skip irrelevant users.
+    // The user instruction does not call our function.
     if (CS.getCalledFunction() != F)
       continue;
 
@@ -358,62 +437,58 @@
     if (!Solver.isBlockExecutable(CS.getParent()))
       continue;
 
-    // Examine arguments and create specialization candidates from call sites
-    // with constant arguments.
-    bool Added = false;
+    // Examine arguments and create a specialisation candidate from the
+    // constant operands of this call site.
+    SpecSig S;
     for (Argument *A : Args) {
       Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
       if (!C)
         continue;
-
-      if (!Added) {
-        Specializations[&CS] = {{}, 0 - Cost, nullptr};
-        Added = true;
-      }
-
-      SpecializationInfo &S = Specializations.back().second;
-      S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F));
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
+                        << A->getName() << " : " << C->getNameOrAsOperand()
+                        << "\n");
       S.Args.push_back({A, C});
     }
-    Added = false;
-  }
 
-  // Remove unprofitable specializations.
-  if (!ForceFunctionSpecialization)
-    Specializations.remove_if(
-        [](const auto &Entry) { return Entry.second.Gain <= 0; });
-
-  // Clear the MapVector and return the underlying vector.
-  WorkList = Specializations.takeVector();
+    if (S.Args.empty())
+      continue;
 
-  // Sort the candidates in descending order.
-  llvm::stable_sort(WorkList, [](const auto &L, const auto &R) {
-    return L.second.Gain > R.second.Gain;
-  });
+    // Check if we have encountered the same specialisation already.
+    if (auto It = UM.find(S); It != UM.end()) {
+      // Existing specialisation. Add the call to the list to rewrite, unless
+      // it's a recursive call. A specialisation, generated because of a
+      // recursive call may end up as not the best specialisation for all
+      // the cloned instances of this call, which result from specialising
+      // functions. Hence we don't rewrite the call directly, but match it with
+      // the best specialisation once all specialisations are known.
+      if (CS.getFunction() == F)
+        continue;
+      const unsigned Index = It->second;
+      AllSpecs[Index].CallSites.push_back(&CS);
+    } else {
+      // Calculate the specialisation gain.
+      InstructionCost Gain = 0 - Cost;
+      for (ArgInfo &A : S.Args)
+        Gain +=
+            getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F));
+
+      // Discard unprofitable specialisations.
+      if (!ForceFunctionSpecialization && Gain <= 0)
+        continue;
 
-  // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary.
-  if (WorkList.size() > MaxClonesThreshold) {
-    LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
-                      << "the maximum number of clones threshold.\n"
-                      << "FnSpecialization: Truncating worklist to "
-                      << MaxClonesThreshold << " candidates.\n");
-    WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
+      // Create a new specialisation entry.
+      auto &Spec = AllSpecs.emplace_back(F, S, Gain);
+      if (CS.getFunction() != F)
+        Spec.CallSites.push_back(&CS);
+      const unsigned Index = AllSpecs.size() - 1;
+      UM[S] = Index;
+      if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
+        It->second.second = Index + 1;
+      Found = true;
+    }
   }
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
-                    << F->getName() << "\n";
-             for (const auto &Entry
-                  : WorkList) {
-               dbgs() << "FnSpecialization:   Gain = " << Entry.second.Gain
-                      << "\n";
-               for (const ArgInfo &Arg : Entry.second.Args)
-                 dbgs() << "FnSpecialization:   FormalArg = "
-                        << Arg.Formal->getNameOrAsOperand()
-                        << ", ActualArg = " << Arg.Actual->getNameOrAsOperand()
-                        << "\n";
-             });
-
-  return !WorkList.empty();
+  return Found;
 }
 
 bool FunctionSpecializer::isCandidateFunction(Function *F) {
@@ -449,16 +524,13 @@
   return true;
 }
 
-Function *
-FunctionSpecializer::createSpecialization(Function *F,
-                                          CallSpecBinding &Specialization) {
+Function *FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S) {
   Function *Clone = cloneCandidateFunction(F);
-  Specialization.second.Clone = Clone;
 
   // Initialize the lattice state of the arguments of the function clone,
   // marking the argument on which we specialized the function constant
   // with the given value.
-  Solver.markArgInFuncSpecialization(Clone, Specialization.second.Args);
+  Solver.markArgInFuncSpecialization(Clone, S.Args);
 
   Solver.addArgumentTrackedFunction(Clone);
   Solver.markBlockExecutable(&Clone->front());
@@ -484,9 +556,8 @@
     return InstructionCost::getInvalid();
 
   // Otherwise, set the specialization cost to be the cost of all the
-  // instructions in the function and penalty for specializing more functions.
-  unsigned Penalty = NbFunctionsSpecialized + 1;
-  return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty;
+  // instructions in the function.
+  return Metrics.NumInsts * InlineConstants::getInstrCost();
 }
 
 static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI,
@@ -611,11 +682,14 @@
   const ValueLatticeElement &LV = Solver.getLatticeValueFor(A);
   if (LV.isUnknownOrUndef() || LV.isConstant() ||
       (LV.isConstantRange() && LV.getConstantRange().isSingleElement())) {
-    LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, argument "
+    LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, parameter "
                       << A->getNameOrAsOperand() << " is already constant\n");
     return false;
   }
 
+  LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting parameter "
+                    << A->getNameOrAsOperand() << "\n");
+
   return true;
 }
 
@@ -651,44 +725,45 @@
       return nullptr;
   }
 
-  LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
-                    << V->getNameOrAsOperand() << "\n");
-
   return C;
 }
 
-/// Redirects callsites of function \p F to its specialized copies.
-void FunctionSpecializer::updateCallSites(
-    Function *F, SmallVectorImpl<CallSpecBinding> &Specializations) {
-  SmallVector<CallBase *, 8> ToUpdate;
-  for (User *U : F->users()) {
-    if (auto *CS = dyn_cast<CallBase>(U))
-      if (CS->getCalledFunction() == F &&
-          Solver.isBlockExecutable(CS->getParent()))
-        ToUpdate.push_back(CS);
-  }
+void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
+                                          const Spec *End) {
+  // Collect the call sites that need updating.
+  SmallVector<CallBase *> ToUpdate;
+  for (User *U : F->users())
+    if (auto *CS = dyn_cast<CallBase>(U);
+        CS && CS->getCalledFunction() == F &&
+        Solver.isBlockExecutable(CS->getParent()))
+      ToUpdate.push_back(CS);
 
   unsigned NCallsLeft = ToUpdate.size();
   for (CallBase *CS : ToUpdate) {
-    // Decrement the counter if the callsite is either recursive or updated.
     bool ShouldDecrementCount = CS->getFunction() == F;
-    for (CallSpecBinding &Specialization : Specializations) {
-      Function *Clone = Specialization.second.Clone;
-      SmallVectorImpl<ArgInfo> &Args = Specialization.second.Args;
 
-      if (any_of(Args, [CS, this](const ArgInfo &Arg) {
+    // Find the best matching specialisation.
+    const Spec *BestSpec = nullptr;
+    for (const Spec &S : make_range(Begin, End)) {
+      if (!S.Clone || (BestSpec && S.Gain <= BestSpec->Gain))
+        continue;
+
+      if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) {
             unsigned ArgNo = Arg.Formal->getArgNo();
             return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual;
           }))
         continue;
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Replacing call site " << *CS
-                        << " with " << Clone->getName() << "\n");
+      BestSpec = &S;
+    }
 
-      CS->setCalledFunction(Clone);
+    if (BestSpec) {
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *CS
+                        << " to call " << BestSpec->Clone->getName() << "\n");
+      CS->setCalledFunction(BestSpec->Clone);
       ShouldDecrementCount = true;
-      break;
     }
+
     if (ShouldDecrementCount)
       --NCallsLeft;
   }
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S --passes=ipsccp  -specialize-functions -func-specialization-max-clones=1 < %s | FileCheck %s
+define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
+entry:
+  %call = tail call i32 %p(i32 noundef %x)
+  %call1 = tail call i32 %q(i32 noundef %x)
+  %add = add nsw i32 %call1, %call
+  ret i32 %add
+}
+
+define internal i32 @g(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
+entry:
+  %call = tail call i32 %p(i32 noundef %x)
+  %call1 = tail call i32 %q(i32 noundef %x)
+  %sub = sub nsw i32 %call, %call1
+  ret i32 %sub
+}
+
+define i32 @h0(i32 noundef %x) {
+entry:
+  %call = tail call i32 @f(i32 noundef %x, ptr noundef nonnull @pp, ptr noundef nonnull @qq)
+  ret i32 %call
+}
+
+define i32 @h1(i32 noundef %x) {
+entry:
+  %call = tail call i32 @f(i32 noundef %x, ptr noundef nonnull @qq, ptr noundef nonnull @pp)
+  ret i32 %call
+}
+
+define i32 @h2(i32 noundef %x, ptr nocapture noundef readonly %p) {
+entry:
+  %call = tail call i32 @g(i32 noundef %x, ptr noundef %p, ptr noundef nonnull @pp)
+  ret i32 %call
+}
+
+define i32 @h3(i32 noundef %x, ptr nocapture noundef readonly %p) {
+entry:
+  %call = tail call i32 @g(i32 noundef %x, ptr noundef %p, ptr noundef nonnull @qq)
+  ret i32 %call
+}
+
+declare i32 @pp(i32 noundef)
+declare i32 @qq(i32 noundef)
+
+
+; Check that the global ranking causes two specialisations of
+; `f` to be chosen, whereas the old algorithm would choose
+; one specialsation of `f` and one of `g`.
+
+; CHECK-DAG: define internal i32 @f.1
+; CHECK-DAG: define internal i32 @f.2
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -6,14 +6,14 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
 ; CHECK:       plus:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.1(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
 ; CHECK-NEXT:    br label [[MERGE:%.*]]
 ; CHECK:       minus:
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
 ; CHECK-NEXT:    [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.1(i64 [[PH]], i64 42, ptr @plus, ptr @minus)
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.2(i64 [[PH]], i64 42, ptr @plus, ptr @minus)
 ; CHECK-NEXT:    ret i64 [[CMP2]]
 ;
 entry:
@@ -62,18 +62,18 @@
 
 ; CHECK-LABEL: @compute.1
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 %binop1(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr %binop1, ptr @plus)
 
 ; CHECK-LABEL: @compute.2
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @minus(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
 
 ; CHECK-LABEL: @compute.3
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @minus(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll
--- a/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialization-order.ll
@@ -21,7 +21,7 @@
 
 define dso_local i32 @g0(i32 %x, i32 %y) {
 ; CHECK-LABEL: @g0
-; CHECK:       call i32 @f.2(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK:       call i32 @f.3(i32 [[X:%.*]], i32 [[Y:%.*]])
 entry:
   %call = tail call i32 @f(i32 %x, i32 %y, ptr @add, ptr @add)
   ret i32 %call
@@ -30,7 +30,7 @@
 
 define dso_local i32 @g1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @g1(
-; CHECK:       call i32 @f.1(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK:       call i32 @f.2(i32 [[X:%.*]], i32 [[Y:%.*]])
 entry:
   %call = tail call i32 @f(i32 %x, i32 %y, ptr @sub, ptr @add)
   ret i32 %call
@@ -38,7 +38,7 @@
 
 define dso_local i32 @g2(i32 %x, i32 %y, ptr %v) {
 ; CHECK-LABEL @g2
-; CHECK       call i32 @f.3(i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[V:%.*]])
+; CHECK       call i32 @f.1(i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[V:%.*]])
 entry:
   %call = tail call i32 @f(i32 %x, i32 %y, ptr @sub, ptr %v)
   ret i32 %call
@@ -46,13 +46,13 @@
 
 ; CHECK-LABEL: define {{.*}} i32 @f.1
 ; CHECK:       call i32 @sub(i32 %x, i32 %y)
-; CHECK-NEXT:  call i32 @add(i32 %x, i32 %y)
+; CHECK-NEXT:  call i32 %v(i32 %x, i32 %y)
 
 ; CHECK-LABEL: define {{.*}} i32 @f.2
-; CHECK:       call i32 @add(i32 %x, i32 %y)
-; CHECK-NEXT   call i32 @add(i32 %x, i32 %y)
+; CHECK:       call i32 @sub(i32 %x, i32 %y)
+; CHECK-NEXT:  call i32 @add(i32 %x, i32 %y)
 
 ; CHECK-LABEL: define {{.*}} i32 @f.3
-; CHECK:       call i32 @sub(i32 %x, i32 %y)
-; CHECK-NEXT:  call i32 %v(i32 %x, i32 %y)
+; CHECK:       call i32 @add(i32 %x, i32 %y)
+; CHECK-NEXT   call i32 @add(i32 %x, i32 %y)
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -52,25 +52,25 @@
 ; TWO-NEXT:    [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @power, ptr @mul)
 ; TWO-NEXT:    br label [[MERGE:%.*]]
 ; TWO:       minus:
-; TWO-NEXT:    [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
+; TWO-NEXT:    [[TMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
 ; TWO-NEXT:    br label [[MERGE]]
 ; TWO:       merge:
 ; TWO-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
-; TWO-NEXT:    [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
+; TWO-NEXT:    [[TMP3:%.*]] = call i64 @compute.1(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
 ; TWO-NEXT:    ret i64 [[TMP3]]
 ;
 ; THREE-LABEL: @main(
 ; THREE-NEXT:  entry:
 ; THREE-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
 ; THREE:       plus:
-; THREE-NEXT:    [[TMP0:%.*]] = call i64 @compute.3(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @power, ptr @mul)
+; THREE-NEXT:    [[TMP0:%.*]] = call i64 @compute.1(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @power, ptr @mul)
 ; THREE-NEXT:    br label [[MERGE:%.*]]
 ; THREE:       minus:
-; THREE-NEXT:    [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
+; THREE-NEXT:    [[TMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @plus, ptr @minus)
 ; THREE-NEXT:    br label [[MERGE]]
 ; THREE:       merge:
 ; THREE-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
-; THREE-NEXT:    [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
+; THREE-NEXT:    [[TMP3:%.*]] = call i64 @compute.3(i64 [[TMP2]], i64 42, ptr @minus, ptr @power)
 ; THREE-NEXT:    ret i64 [[TMP3]]
 ;
 entry:
@@ -94,8 +94,8 @@
 ;
 ; THREE-LABEL: define internal i64 @compute.1(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
 ; THREE-NEXT:  entry:
-; THREE-NEXT:    [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y)
-; THREE-NEXT:    [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y)
 ; THREE-NEXT:    [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
 ; THREE-NEXT:    [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
 ; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], %y
@@ -105,8 +105,8 @@
 ;
 ; THREE-LABEL: define internal i64 @compute.2(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
 ; THREE-NEXT:  entry:
-; THREE-NEXT:    [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
-; THREE-NEXT:    [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y)
 ; THREE-NEXT:    [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
 ; THREE-NEXT:    [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
 ; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], %y
@@ -116,8 +116,8 @@
 ;
 ; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
 ; THREE-NEXT:  entry:
-; THREE-NEXT:    [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y)
-; THREE-NEXT:    [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
 ; THREE-NEXT:    [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
 ; THREE-NEXT:    [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
 ; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], %y