diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -290,9 +290,7 @@
   /// \note It is an error to call V->takeName(V).
   void takeName(Value *V);
 
-#ifndef NDEBUG
   std::string getNameOrAsOperand() const;
-#endif
 
   /// Change all uses of this to point to a new Value.
   ///
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -62,16 +62,39 @@
 namespace llvm {
 // Bookkeeping struct to pass data from the analysis and profitability phase
 // to the actual transform helper functions.
-struct SpecializationInfo {
+struct SpecInfo {
   SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
   InstructionCost Gain;         // Profitability: Gain = Bonus - Cost.
   Function *Clone;              // The definition of the specialized function.
+
+  bool operator<(const SpecInfo &Other) const {
+    if (Gain == Other.Gain) {
+      if (Args.size() == Other.Args.size()) {
+        for (size_t I = 0; I < Args.size(); ++I)
+          if (Args[I] < Other.Args[I])
+            return true;
+        return false;
+      }
+      return Args.size() < Other.Args.size();
+    }
+    return Gain < Other.Gain;
+  }
+
+  bool operator>(const SpecInfo &Other) const { return Other < *this; }
+};
+
+using SpecSet = std::set<SpecInfo, std::greater<SpecInfo>>;
+
+struct SpecMapEntry {
+  Function *F;
+  SpecSet Specs;
+  SpecSet::iterator IterEnd;
+
+  SpecMapEntry(Function *F_, SpecSet &&Specs_)
+    : F(F_), Specs(Specs_), IterEnd(Specs.begin()) {}
 };
 
-using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>;
-// We are using MapVector because it guarantees deterministic iteration
-// order across executions.
-using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>;
+using SpecMap = SmallVector<SpecMapEntry>;
 
 class FunctionSpecializer {
 
@@ -88,10 +111,6 @@
   std::function<TargetTransformInfo &(Function &)> GetTTI;
   std::function<AssumptionCache &(Function &)> GetAC;
 
-  // The number of functions specialised, used for collecting statistics and
-  // also in the cost model.
-  unsigned NbFunctionsSpecialized = 0;
-
   SmallPtrSet<Function *, 32> SpecializedFuncs;
   SmallPtrSet<Function *, 32> FullySpecialized;
   DenseMap<Function *, CodeMetrics> FunctionMetrics;
@@ -143,12 +162,11 @@
   /// applying them.
   ///
   /// \returns true if any specializations have been found.
-  bool findSpecializations(Function *F, InstructionCost Cost,
-                           SmallVectorImpl<CallSpecBinding> &WorkList);
+  bool findSpecializations(Function *F, InstructionCost Cost, SpecSet &Specs);
 
   bool isCandidateFunction(Function *F);
 
-  Function *createSpecialization(Function *F, CallSpecBinding &Specialization);
+  Function *createSpecialization(Function *F, SpecInfo &Specialization);
 
   /// Compute and return the cost of specializing function \p F.
   InstructionCost getSpecializationCost(Function *F);
@@ -166,8 +184,7 @@
   Constant *getCandidateConstant(Value *V);
 
   /// Redirects callsites of function \p F to its specialized copies.
-  void updateCallSites(Function *F,
-                       SmallVectorImpl<CallSpecBinding> &Specializations);
+  void updateCallSites(Function *F, SpecSet &Specs, SpecSet::iterator IterEnd);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Transforms/Utils/PredicateInfo.h"
 #include <vector>
@@ -52,7 +53,51 @@
   Argument *Formal; // The Formal argument being analysed.
   Constant *Actual; // A corresponding actual constant argument.
 
-  ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A){};
+  ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A) {};
+
+  static bool LessThan(const Value *LHS, const Value *RHS) {
+    // First compare ConstantInt/ConstantFP.
+    auto *Int1 = dyn_cast<ConstantInt>(LHS);
+    auto *Int2 = dyn_cast<ConstantInt>(RHS);
+    if (Int1 && Int2)
+      return Int1->getValue().ult(Int2->getValue());
+    auto *FP1 = dyn_cast<ConstantFP>(LHS);
+    auto *FP2 = dyn_cast<ConstantFP>(RHS);
+    if (FP1 && FP2)
+      return FP1->getValue() < FP2->getValue();
+    if (Int1 && FP2)
+      return Int1->getValue().ult(FP2->getValue().bitcastToAPInt());
+    if (FP1 && Int2)
+      return FP1->getValue().bitcastToAPInt().ult(Int2->getValue());
+    // Let ConstantInt and ConstantFP be 'smaller' than anything else.
+    if (Int1 || FP1)
+      return true;
+    if (Int2 || FP2)
+      return false;
+    // Compare names if possible.
+    if (LHS->hasName() && RHS->hasName())
+      return LHS->getName() < RHS->getName();
+    // Compare operands (handles constant expressions).
+    auto *User1 = dyn_cast<User>(LHS);
+    auto *User2 = dyn_cast<User>(RHS);
+    if (User1 && User2) {
+      if (User1->getNumOperands() == User2->getNumOperands()) {
+        for (unsigned I = 0; I < User1->getNumOperands(); ++I)
+          if (LessThan(User1->getOperand(I), User2->getOperand(I)))
+            return true;
+        return false;
+      }
+      return User1->getNumOperands() < User2->getNumOperands();
+    }
+    // Last and most expensive comparison.
+    return LHS->getNameOrAsOperand() < RHS->getNameOrAsOperand();
+  }
+
+  bool operator<(const ArgInfo &Other) const {
+    if (Formal == Other.Formal)
+      return LessThan(Actual, Other.Actual);
+    return Formal->getArgNo() < Other.Formal->getArgNo();
+  }
 };
 
 class SCCPInstVisitor;
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -439,7 +439,6 @@
     ST->reinsertValue(this);
 }
 
-#ifndef NDEBUG
 std::string Value::getNameOrAsOperand() const {
   if (!getName().empty())
     return std::string(getName());
@@ -449,7 +448,6 @@
   printAsOperand(OS, false);
   return OS.str();
 }
-#endif
 
 void Value::assertModuleIsMaterializedImpl() const {
 #ifndef NDEBUG
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -64,7 +64,7 @@
 
 #define DEBUG_TYPE "function-specialization"
 
-STATISTIC(NumFuncSpecialized, "Number of functions specialized");
+STATISTIC(NumSpecsCreated, "Number of specializations created");
 
 static cl::opt<bool> ForceFunctionSpecialization(
     "force-function-specialization", cl::init(false), cl::Hidden,
@@ -239,46 +239,74 @@
 ///
 /// \returns true if at least one function is specialized.
 bool FunctionSpecializer::run() {
-  bool Changed = false;
-
+  SpecMap SM;
   for (Function &F : M) {
     if (!isCandidateFunction(&F))
       continue;
 
     auto Cost = getSpecializationCost(&F);
     if (!Cost.isValid()) {
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost.\n");
+      LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for "
+                        << F.getName() << "\n");
       continue;
     }
 
     LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
                       << F.getName() << " is " << Cost << "\n");
 
-    SmallVector<CallSpecBinding, 8> Specializations;
-    if (!findSpecializations(&F, Cost, Specializations)) {
+    SpecSet Specs;
+    if (!findSpecializations(&F, Cost, Specs)) {
       LLVM_DEBUG(
-          dbgs() << "FnSpecialization: No possible specializations found\n");
+          dbgs() << "FnSpecialization: No possible specializations found for "
+                 << F.getName() << "\n");
       continue;
     }
+    SM.emplace_back(&F, std::move(Specs));
+  }
 
-    Changed = true;
-
-    SmallVector<Function *, 4> Clones;
-    for (CallSpecBinding &Specialization : Specializations)
-      Clones.push_back(createSpecialization(&F, Specialization));
+  if (SM.size() == 0)
+    return false;
 
-    Solver.solveWhileResolvedUndefsIn(Clones);
-    updateCallSites(&F, Specializations);
+  unsigned NSpecsFound = 0;
+  for (auto &[_0, Specs, _1] : SM)
+    NSpecsFound += Specs.size();
+
+  // Truncate the total number of specializations.
+  const auto MaxClonesPerModule = SM.size() * MaxClonesThreshold;
+  if (NSpecsFound > MaxClonesPerModule) {
+    for (unsigned I = 0; I < MaxClonesPerModule; ++I) {
+      auto It = std::max_element(SM.begin(), SM.end(),
+        [](const SpecMapEntry &LHS, const SpecMapEntry &RHS) {
+          if (LHS.IterEnd == LHS.Specs.end())
+            return RHS.IterEnd != RHS.Specs.end();
+          if (RHS.IterEnd == RHS.Specs.end())
+            return false;
+          return *LHS.IterEnd < *RHS.IterEnd;
+        });
+      ++(It->IterEnd);
+    }
+  } else {
+    for (auto &[_, Specs, IterEnd] : SM)
+      IterEnd = Specs.end();
   }
 
+  SmallVector<Function *, 16> Clones;
+  for (auto &[F, Specs, IterEnd] : SM)
+    for (auto I = Specs.begin(); I != IterEnd; ++I)
+      Clones.push_back(createSpecialization(F, const_cast<SpecInfo &>(*I)));
+
+  Solver.solveWhileResolvedUndefsIn(Clones);
+
+  for (auto &[F, Specs, IterEnd] : SM)
+    updateCallSites(F, Specs, IterEnd);
+
   promoteConstantStackValues();
 
-  LLVM_DEBUG(if (NbFunctionsSpecialized) dbgs()
-             << "FnSpecialization: Specialized " << NbFunctionsSpecialized
-             << " functions in module " << M.getName() << "\n");
+  LLVM_DEBUG(dbgs() << "FnSpecialization: Created " << Clones.size()
+             << " specializations in module " << M.getName() << "\n");
 
-  NumFuncSpecialized += NbFunctionsSpecialized;
-  return Changed;
+  NumSpecsCreated += Clones.size();
+  return true;
 }
 
 void FunctionSpecializer::removeDeadFunctions() {
@@ -325,26 +353,25 @@
 /// applying them.
 ///
 /// \returns true if any specializations have been found.
-bool FunctionSpecializer::findSpecializations(
-    Function *F, InstructionCost Cost,
-    SmallVectorImpl<CallSpecBinding> &WorkList) {
+bool
+FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
+                                         SpecSet &Specs) {
   // Get a list of interesting arguments.
   SmallVector<Argument *, 4> Args;
   for (Argument &Arg : F->args())
     if (isArgumentInteresting(&Arg))
       Args.push_back(&Arg);
 
-  if (!Args.size())
+  if (Args.empty())
     return false;
 
   // Find all the call sites for the function.
-  SpecializationMap Specializations;
   for (User *U : F->users()) {
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
       continue;
     auto &CS = *cast<CallBase>(U);
 
-    // Skip irrelevant users.
+    // The user instruction does not call our function.
     if (CS.getCalledFunction() != F)
       continue;
 
@@ -358,62 +385,37 @@
     if (!Solver.isBlockExecutable(CS.getParent()))
       continue;
 
+    LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting CallSite "
+                      << CS << "\n");
+
     // Examine arguments and create specialization candidates from call sites
     // with constant arguments.
-    bool Added = false;
+    SpecInfo Spec{{}, 0 - Cost, /*Clone=*/nullptr};
     for (Argument *A : Args) {
       Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
       if (!C)
         continue;
 
-      if (!Added) {
-        Specializations[&CS] = {{}, 0 - Cost, nullptr};
-        Added = true;
-      }
-
-      SpecializationInfo &S = Specializations.back().second;
-      S.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F));
-      S.Args.push_back({A, C});
+      Spec.Gain += getSpecializationBonus(A, C, Solver.getLoopInfo(*F));
+      Spec.Args.push_back({A, C});
     }
-    Added = false;
-  }
+    if (Spec.Args.empty() || (Spec.Gain <= 0 && !ForceFunctionSpecialization))
+      continue;
+    auto Res = Specs.insert(std::move(Spec));
+    LLVM_DEBUG(
+        if (Res.second) {
+          dbgs() << "FnSpecialization: Found Specialization for function "
+                 << F->getName() << "\n";
+          dbgs() << "FnSpecialization:   Gain = " << Res.first->Gain << "\n";
+          for (const ArgInfo &Arg : Res.first->Args)
+            dbgs() << "FnSpecialization:   FormalArg = "
+                   << Arg.Formal->getNameOrAsOperand()
+                   << ", ActualArg = " << Arg.Actual->getNameOrAsOperand()
+                   << "\n";
+        });
 
-  // Remove unprofitable specializations.
-  if (!ForceFunctionSpecialization)
-    Specializations.remove_if(
-        [](const auto &Entry) { return Entry.second.Gain <= 0; });
-
-  // Clear the MapVector and return the underlying vector.
-  WorkList = Specializations.takeVector();
-
-  // Sort the candidates in descending order.
-  llvm::stable_sort(WorkList, [](const auto &L, const auto &R) {
-    return L.second.Gain > R.second.Gain;
-  });
-
-  // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary.
-  if (WorkList.size() > MaxClonesThreshold) {
-    LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
-                      << "the maximum number of clones threshold.\n"
-                      << "FnSpecialization: Truncating worklist to "
-                      << MaxClonesThreshold << " candidates.\n");
-    WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
   }
-
-  LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
-                    << F->getName() << "\n";
-             for (const auto &Entry
-                  : WorkList) {
-               dbgs() << "FnSpecialization:   Gain = " << Entry.second.Gain
-                      << "\n";
-               for (const ArgInfo &Arg : Entry.second.Args)
-                 dbgs() << "FnSpecialization:   FormalArg = "
-                        << Arg.Formal->getNameOrAsOperand()
-                        << ", ActualArg = " << Arg.Actual->getNameOrAsOperand()
-                        << "\n";
-             });
-
-  return !WorkList.empty();
+  return !Specs.empty();
 }
 
 bool FunctionSpecializer::isCandidateFunction(Function *F) {
@@ -451,21 +453,23 @@
 
 Function *
 FunctionSpecializer::createSpecialization(Function *F,
-                                          CallSpecBinding &Specialization) {
+                                          SpecInfo &Specialization) {
   Function *Clone = cloneCandidateFunction(F);
-  Specialization.second.Clone = Clone;
+  Specialization.Clone = Clone;
+
+  LLVM_DEBUG(dbgs() << "FnSpecialization: Creating clone " << Clone->getName()
+                    << "\n");
 
   // Initialize the lattice state of the arguments of the function clone,
   // marking the argument on which we specialized the function constant
   // with the given value.
-  Solver.markArgInFuncSpecialization(Clone, Specialization.second.Args);
+  Solver.markArgInFuncSpecialization(Clone, Specialization.Args);
 
   Solver.addArgumentTrackedFunction(Clone);
   Solver.markBlockExecutable(&Clone->front());
 
   // Mark all the specialized functions
   SpecializedFuncs.insert(Clone);
-  NbFunctionsSpecialized++;
 
   return Clone;
 }
@@ -484,9 +488,8 @@
     return InstructionCost::getInvalid();
 
   // Otherwise, set the specialization cost to be the cost of all the
-  // instructions in the function and penalty for specializing more functions.
-  unsigned Penalty = NbFunctionsSpecialized + 1;
-  return Metrics.NumInsts * InlineConstants::getInstrCost() * Penalty;
+  // instructions in the function. (FIXME: NumInsts contains cost already)
+  return Metrics.NumInsts * InlineConstants::getInstrCost();
 }
 
 static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI,
@@ -658,8 +661,8 @@
 }
 
 /// Redirects callsites of function \p F to its specialized copies.
-void FunctionSpecializer::updateCallSites(
-    Function *F, SmallVectorImpl<CallSpecBinding> &Specializations) {
+void FunctionSpecializer::updateCallSites(Function *F, SpecSet &Specs,
+                                          SpecSet::iterator IterEnd) {
   SmallVector<CallBase *, 8> ToUpdate;
   for (User *U : F->users()) {
     if (auto *CS = dyn_cast<CallBase>(U))
@@ -672,9 +675,9 @@
   for (CallBase *CS : ToUpdate) {
     // Decrement the counter if the callsite is either recursive or updated.
     bool ShouldDecrementCount = CS->getFunction() == F;
-    for (CallSpecBinding &Specialization : Specializations) {
-      Function *Clone = Specialization.second.Clone;
-      SmallVectorImpl<ArgInfo> &Args = Specialization.second.Args;
+    for (auto I = Specs.begin(); I != IterEnd; ++I) {
+      Function *Clone = I->Clone;
+      const SmallVectorImpl<ArgInfo> &Args = I->Args;
 
       if (any_of(Args, [CS, this](const ArgInfo &Arg) {
             unsigned ArgNo = Arg.Formal->getArgNo();
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -43,7 +43,7 @@
           "Number of instructions replaced with (simpler) instruction");
 
 static cl::opt<bool> SpecializeFunctions("specialize-functions",
-    cl::init(false), cl::Hidden, cl::desc("Enable function specialization"));
+    cl::init(true), cl::Hidden, cl::desc("Enable function specialization"));
 
 static cl::opt<unsigned> FuncSpecializationMaxIters(
     "func-specialization-max-iters", cl::init(1), cl::Hidden, cl::desc(
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-nonconst-glob.ll
@@ -7,8 +7,8 @@
 ; Global B is not constant. We do not specialise on addresses unless we
 ; enable that:
 
-; ON-ADDRESS: call i32 @foo.1(i32 %x, ptr @A)
-; ON-ADDRESS: call i32 @foo.2(i32 %y, ptr @B)
+; ON-ADDRESS: call i32 @foo.2(i32 %x, ptr @A)
+; ON-ADDRESS: call i32 @foo.1(i32 %y, ptr @B)
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-stats.ll
@@ -1,7 +1,7 @@
 ; REQUIRES: asserts
 ; RUN: opt -stats -passes=ipsccp -specialize-functions -S -force-function-specialization < %s 2>&1 | FileCheck %s
 
-; CHECK: 2 function-specialization - Number of functions specialized
+; CHECK: 2 function-specialization - Number of specializations created
 
 define i64 @main(i64 %x, i1 %flag) {
 entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
@@ -43,9 +43,9 @@
 }
 
 define i32 @main(ptr %0, i32 %1) {
-; CHECK:    call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; CHECK:    call void @func.1(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
   %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
-; CHECK:    call void @func.1(ptr [[TMP0]], i32 0)
+; CHECK:    call void @func.2(ptr [[TMP0]], i32 0)
   %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
 ; CHECK:    ret i32 0
   ret i32 %4
@@ -61,7 +61,7 @@
 ; CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
 ; CHECK:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK:    call void @decrement(ptr [[TMP9]])
+; CHECK:    call void @increment(ptr [[TMP9]])
 ; CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
 ; CHECK:    call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
@@ -80,7 +80,7 @@
 ; CHECK:    [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
 ; CHECK:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK:    call void @increment(ptr [[TMP9]])
+; CHECK:    call void @decrement(ptr [[TMP9]])
 ; CHECK:    [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
 ; CHECK:    call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll
@@ -14,8 +14,8 @@
 
 define dso_local i32 @bar(i32 %x, i32 %y) {
 ; COMMON-LABEL: @bar
-; FORCE:        %call = call i32 @foo.1(i32 %x, ptr @A)
-; FORCE:        %call1 = call i32 @foo.2(i32 %y, ptr @B)
+; FORCE:        %call = call i32 @foo.2(i32 %x, ptr @A)
+; FORCE:        %call1 = call i32 @foo.1(i32 %y, ptr @B)
 ; DISABLED-NOT: %call1 = call i32 @foo.1(
 entry:
   %tobool = icmp ne i32 %x, 0
@@ -38,14 +38,14 @@
 ;
 ; FORCE:      define internal i32 @foo.1(i32 %x, ptr %b) {
 ; FORCE-NEXT: entry:
-; FORCE-NEXT:   %0 = load i32, ptr @A, align 4
+; FORCE-NEXT:   %0 = load i32, ptr @B, align 4
 ; FORCE-NEXT:   %add = add nsw i32 %x, %0
 ; FORCE-NEXT:   ret i32 %add
 ; FORCE-NEXT: }
 ;
 ; FORCE:      define internal i32 @foo.2(i32 %x, ptr %b) {
 ; FORCE-NEXT: entry:
-; FORCE-NEXT:   %0 = load i32, ptr @B, align 4
+; FORCE-NEXT:   %0 = load i32, ptr @A, align 4
 ; FORCE-NEXT:   %add = add nsw i32 %x, %0
 ; FORCE-NEXT:   ret i32 %add
 ; FORCE-NEXT: }
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
@@ -44,18 +44,18 @@
 
 ; CHECK:        define internal i32 @foo.1(i32 %x, ptr %b, ptr %c) {
 ; CHECK-NEXT:   entry:
-; CHECK-NEXT:     %0 = load i32, ptr @A, align 4
+; CHECK-NEXT:     %0 = load i32, ptr @B, align 4
 ; CHECK-NEXT:     %add = add nsw i32 %x, %0
-; CHECK-NEXT:     %1 = load i32, ptr @C, align 4
+; CHECK-NEXT:     %1 = load i32, ptr @D, align 4
 ; CHECK-NEXT:     %add1 = add nsw i32 %add, %1
 ; CHECK-NEXT:     ret i32 %add1
 ; CHECK-NEXT:   }
 
 ; CHECK: define internal i32 @foo.2(i32 %x, ptr %b, ptr %c) {
 ; CHECK-NEXT:   entry:
-; CHECK-NEXT:     %0 = load i32, ptr @B, align 4
+; CHECK-NEXT:     %0 = load i32, ptr @A, align 4
 ; CHECK-NEXT:     %add = add nsw i32 %x, %0
-; CHECK-NEXT:     %1 = load i32, ptr @D, align 4
+; CHECK-NEXT:     %1 = load i32, ptr @C, align 4
 ; CHECK-NEXT:     %add1 = add nsw i32 %add, %1
 ; CHECK-NEXT:     ret i32 %add1
 ; CHECK-NEXT:   }
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S --passes=ipsccp  -specialize-functions -func-specialization-max-clones=1 < %s | FileCheck %s
+define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
+entry:
+  %call = tail call i32 %p(i32 noundef %x)
+  %call1 = tail call i32 %q(i32 noundef %x)
+  %add = add nsw i32 %call1, %call
+  ret i32 %add
+}
+
+define internal i32 @g(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
+entry:
+  %call = tail call i32 %p(i32 noundef %x)
+  %call1 = tail call i32 %q(i32 noundef %x)
+  %sub = sub nsw i32 %call, %call1
+  ret i32 %sub
+}
+
+define i32 @h0(i32 noundef %x) {
+entry:
+  %call = tail call i32 @f(i32 noundef %x, ptr noundef nonnull @pp, ptr noundef nonnull @qq)
+  ret i32 %call
+}
+
+define i32 @h1(i32 noundef %x) {
+entry:
+  %call = tail call i32 @f(i32 noundef %x, ptr noundef nonnull @qq, ptr noundef nonnull @pp)
+  ret i32 %call
+}
+
+define i32 @h2(i32 noundef %x, ptr nocapture noundef readonly %p) {
+entry:
+  %call = tail call i32 @g(i32 noundef %x, ptr noundef %p, ptr noundef nonnull @pp)
+  ret i32 %call
+}
+
+define i32 @h3(i32 noundef %x, ptr nocapture noundef readonly %p) {
+entry:
+  %call = tail call i32 @g(i32 noundef %x, ptr noundef %p, ptr noundef nonnull @qq)
+  ret i32 %call
+}
+
+declare i32 @pp(i32 noundef)
+declare i32 @qq(i32 noundef)
+
+
+; Check that the global ranking causes two specialisations of
+; `f` to be chosen, whereas the old algorithm would choose
+; one specialsation of `f` and one of `g`.
+
+; CHECK-DAG: define internal i32 @f.1
+; CHECK-DAG: define internal i32 @f.2
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -6,14 +6,14 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
 ; CHECK:       plus:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.1(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
 ; CHECK-NEXT:    br label [[MERGE:%.*]]
 ; CHECK:       minus:
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
 ; CHECK-NEXT:    [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.1(i64 [[PH]], i64 42, ptr @plus, ptr @minus)
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.2(i64 [[PH]], i64 42, ptr @plus, ptr @minus)
 ; CHECK-NEXT:    ret i64 [[CMP2]]
 ;
 entry:
@@ -60,20 +60,20 @@
   ret i64 %sub
 }
 
-; CHECK-LABEL: @compute.1
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
-
-; CHECK-LABEL: @compute.2
+; CHECK: define internal i64 @compute.1
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @minus(i64 [[X:%.*]], i64 [[Y:%.*]])
 ; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
 
-; CHECK-LABEL: @compute.3
+; CHECK: define internal i64 @compute.2
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
 ; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+
+; CHECK: define internal i64 @compute.3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP0:%.*]] = call i64 %binop1(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT:    [[CMP1:%.*]] = call i64 @plus(i64 [[X]], i64 [[Y]])
+; CHECK-NEXT:    [[CMP2:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr %binop1, ptr @plus)