diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -151,13 +151,14 @@
   /// Return a reference to the set of argument tracked functions.
   SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions();
 
-  /// Mark the constant argument of a new function specialization. \p F points
-  /// to the cloned function and \p Arg represents the constant argument as a
-  /// pair of {formal,actual} values (the formal argument is associated with the
-  /// original function definition). All other arguments of the specialization
-  /// inherit the lattice state of their corresponding values in the original
-  /// function.
-  void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg);
+  /// Mark the constant arguments of a new function specialization. \p F points
+  /// to the cloned function and \p Args contains a list of constant arguments
+  /// represented as pairs of {formal,actual} values (the formal argument is
+  /// associated with the original function definition). All other arguments of
+  /// the specialization inherit the lattice state of their corresponding values
+  /// in the original function.
+  void markArgInFuncSpecialization(Function *F,
+                                   const SmallVectorImpl<ArgInfo> &Args);
 
   /// Mark all of the blocks in function \p F non-executable. Clients can used
   /// this method to erase a function from the module (e.g., if it has been
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -87,6 +87,12 @@
              "specialization"),
     cl::init(3));
 
+static cl::opt<unsigned> MinGainThreshold(
+    "func-specialization-min-gain", cl::Hidden,
+    cl::desc("The minimum gain for a specialization to be considered "
+             "profitable"),
+    cl::init(10000));
+
 static cl::opt<unsigned> SmallFunctionThreshold(
     "func-specialization-size-threshold", cl::Hidden,
     cl::desc("Don't specialize functions that have less than this theshold "
@@ -102,8 +108,10 @@
     "func-specialization-on-address", cl::init(false), cl::Hidden,
     cl::desc("Enable function specialization on the address of global values"));
 
-// TODO: This needs checking to see the impact on compile-times, which is why
-// this is off by default for now.
+// Disabled by default as it can significantly increase compilation times.
+// Running nikic's compile time tracker on x86 with instruction count as the
+// metric shows 4% regression for SPASS while being neutral for all other
+// benchmarks of the llvm test suite.
 static cl::opt<bool> EnableSpecializationForLiteralConstant(
     "function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
     cl::desc("Enable specialization of functions that take a literal constant "
@@ -113,17 +121,14 @@
 // Bookkeeping struct to pass data from the analysis and profitability phase
 // to the actual transform helper functions.
 struct SpecializationInfo {
-  ArgInfo Arg;          // Stores the {formal,actual} argument pair.
-  InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
-
-  SpecializationInfo(Argument *A, Constant *C, InstructionCost G)
-      : Arg(A, C), Gain(G){};
+  SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
+  InstructionCost Gain = 0;     // Profitability: Gain = Bonus - Cost.
 };
 } // Anonymous namespace
 
 using FuncList = SmallVectorImpl<Function *>;
-using ConstList = SmallVector<Constant *>;
-using SpecializationList = SmallVector<SpecializationInfo>;
+using ConstList = SmallVector<std::pair<CallBase *, Constant *>>;
+using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>;
 
 // Helper to check if \p LV is either a constant or a constant
 // range with a single element. This should cover exactly the same cases as the
@@ -310,17 +315,15 @@
       LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
                         << F->getName() << " is " << Cost << "\n");
 
-      SpecializationList Specializations;
+      SpecializationMap Specializations;
       calculateGains(F, Cost, Specializations);
       if (Specializations.empty()) {
         LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
         continue;
       }
 
-      for (SpecializationInfo &S : Specializations) {
-        specializeFunction(F, S, WorkList);
-        Changed = true;
-      }
+      specializeFunction(F, Specializations, WorkList);
+      Changed = true;
     }
 
     updateSpecializedFuncs(Candidates, WorkList);
@@ -398,11 +401,9 @@
   /// This function decides whether it's worthwhile to specialize function \p F
   /// based on the known constant values its arguments can take on, i.e. it
   /// calculates a gain and returns a list of actual arguments that are deemed
-  /// profitable to specialize. Specialization is performed on the first
-  /// interesting argument. Specializations based on additional arguments will
-  /// be evaluated on following iterations of the main IPSCCP solve loop.
+  /// profitable to specialize.
   void calculateGains(Function *F, InstructionCost Cost,
-                      SpecializationList &WorkList) {
+                      SpecializationMap &Specializations) {
     // Determine if we should specialize the function based on the values the
     // argument can take on. If specialization is not profitable, we continue
     // on to the next argument.
@@ -417,49 +418,55 @@
         continue;
       }
 
-      for (auto *ActualArg : ActualArgs) {
-        InstructionCost Gain =
-            ForceFunctionSpecialization
-                ? 1
-                : getSpecializationBonus(&FormalArg, ActualArg) - Cost;
+      for (auto &Entry : ActualArgs) {
+        CallBase *Call = Entry.first;
+        Constant *ActualArg = Entry.second;
 
-        if (Gain <= 0)
-          continue;
-        WorkList.push_back({&FormalArg, ActualArg, Gain});
+        SpecializationInfo &S = Specializations[Call];
+        if (!ForceFunctionSpecialization)
+          S.Gain += getSpecializationBonus(&FormalArg, ActualArg);
+        S.Args.push_back({&FormalArg, ActualArg});
       }
+    }
 
-      if (WorkList.empty())
-        continue;
+    if (Specializations.empty())
+      return;
+
+    if (ForceFunctionSpecialization) {
+      // The cost model is disregarded so keep up to \p MaxClonesThreshold
+      // per function specialization.
+      while (Specializations.size() > MaxClonesThreshold)
+        Specializations.erase(Specializations.begin());
+    } else {
+      // Account the Cost per specialization.
+      SmallVector<CallBase *, 8> KeysToRemove;
 
-      // Sort the candidates in descending order.
-      llvm::stable_sort(WorkList, [](const SpecializationInfo &L,
-                                     const SpecializationInfo &R) {
-        return L.Gain > R.Gain;
-      });
-
-      // Truncate the worklist to 'MaxClonesThreshold' candidates if
-      // necessary.
-      if (WorkList.size() > MaxClonesThreshold) {
-        LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
-                          << "the maximum number of clones threshold.\n"
-                          << "FnSpecialization: Truncating worklist to "
-                          << MaxClonesThreshold << " candidates.\n");
-        WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
+      for (auto &Entry : Specializations) {
+        CallBase *Call = Entry.first;
+        SpecializationInfo &S = Entry.second;
+
+        S.Gain -= Cost;
+        if (S.Gain < MinGainThreshold)
+          KeysToRemove.push_back(Call);
       }
+      // Remove unprofitable specializations.
+      for (CallBase *Call : KeysToRemove)
+        Specializations.erase(Call);
+    }
 
-      LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
+    LLVM_DEBUG(if (!Specializations.empty())
+                 dbgs() << "FnSpecialization: Specializations for function "
                         << F->getName() << "\n";
-                 for (SpecializationInfo &S : WorkList) {
-                   dbgs() << "FnSpecialization:   FormalArg = "
-                          << S.Arg.Formal->getNameOrAsOperand()
-                          << ", ActualArg = "
-                          << S.Arg.Actual->getNameOrAsOperand()
-                          << ", Gain = " << S.Gain << "\n";
-                 });
+               for (auto &Entry : Specializations) {
+                 SpecializationInfo &S = Entry.second;
 
-      // FIXME: Only one argument per function.
-      break;
-    }
+                 dbgs() << "FnSpecialization:   Gain = " << S.Gain << "\n";
+                 for (ArgInfo &Arg : S.Args)
+                   dbgs() << "FnSpecialization:   - FormalArg = "
+                          << Arg.Formal->getNameOrAsOperand()
+                          << ", ActualArg = "
+                          << Arg.Actual->getNameOrAsOperand() << "\n";
+               });
   }
 
   bool isCandidateFunction(Function *F) {
@@ -486,32 +493,36 @@
     return true;
   }
 
-  void specializeFunction(Function *F, SpecializationInfo &S,
+  void specializeFunction(Function *F, SpecializationMap &Specializations,
                           FuncList &WorkList) {
-    ValueToValueMapTy Mappings;
-    Function *Clone = cloneCandidateFunction(F, Mappings);
-
-    // Rewrite calls to the function so that they call the clone instead.
-    rewriteCallSites(Clone, S.Arg, Mappings);
-
-    // Initialize the lattice state of the arguments of the function clone,
-    // marking the argument on which we specialized the function constant
-    // with the given value.
-    Solver.markArgInFuncSpecialization(Clone, S.Arg);
-
-    // Mark all the specialized functions
-    WorkList.push_back(Clone);
-    NbFunctionsSpecialized++;
-
-    // If the function has been completely specialized, the original function
-    // is no longer needed. Mark it unreachable.
-    if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) {
-          if (auto *CS = dyn_cast<CallBase>(U))
-            return CS->getFunction() == F;
-          return false;
-        })) {
-      Solver.markFunctionUnreachable(F);
-      FullySpecialized.insert(F);
+    for (auto &Entry : Specializations) {
+      SpecializationInfo &S = Entry.second;
+
+      ValueToValueMapTy Mappings;
+      Function *Clone = cloneCandidateFunction(F, Mappings);
+
+      // Rewrite calls to the function so that they call the clone instead.
+      rewriteCallSites(Clone, S.Args, Mappings);
+
+      // Initialize the lattice state of the arguments of the function clone,
+      // marking the argument on which we specialized the function constant
+      // with the given value.
+      Solver.markArgInFuncSpecialization(Clone, S.Args);
+
+      // Mark all the specialized functions
+      WorkList.push_back(Clone);
+      NbFunctionsSpecialized++;
+
+      // If the function has been completely specialized, the original function
+      // is no longer needed. Mark it unreachable.
+      if (F->getNumUses() == 0 || all_of(F->users(), [F](User *U) {
+            if (auto *CS = dyn_cast<CallBase>(U))
+              return CS->getFunction() == F;
+            return false;
+          })) {
+        Solver.markFunctionUnreachable(F);
+        FullySpecialized.insert(F);
+      }
     }
   }
 
@@ -725,23 +736,24 @@
 
       if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
                                EnableSpecializationForLiteralConstant))
-        Constants.push_back(cast<Constant>(V));
+        Constants.push_back(std::make_pair(&CS, cast<Constant>(V)));
     }
   }
 
   /// Rewrite calls to function \p F to call function \p Clone instead.
   ///
   /// This function modifies calls to function \p F as long as the actual
-  /// argument matches the one in \p Arg. Note that for recursive calls we
-  /// need to compare against the cloned formal argument.
+  /// arguments match those in \p Args. Note that for recursive calls we
+  /// need to compare against the cloned formal arguments.
   ///
   /// Callsites that have been marked with the MinSize function attribute won't
   /// be specialized and rewritten.
-  void rewriteCallSites(Function *Clone, const ArgInfo &Arg,
+  void rewriteCallSites(Function *Clone, const SmallVectorImpl<ArgInfo> &Args,
                         ValueToValueMapTy &Mappings) {
-    Function *F = Arg.Formal->getParent();
-    unsigned ArgNo = Arg.Formal->getArgNo();
-    SmallVector<CallBase *, 4> CallSitesToRewrite;
+    assert(!Args.empty() && "Specialization without arguments");
+    Function *F = Args[0].Formal->getParent();
+
+    SmallVector<CallBase *, 8> CallSitesToRewrite;
     for (auto *U : F->users()) {
       if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
         continue;
@@ -761,9 +773,13 @@
                         << *CS << "\n");
       if (/* recursive call */
           (CS->getFunction() == Clone &&
-           CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]) ||
+           all_of(Args, [CS, &Mappings](const ArgInfo &Arg) {
+               unsigned ArgNo = Arg.Formal->getArgNo();
+               return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; })) ||
           /* normal call */
-          CS->getArgOperand(ArgNo) == Arg.Actual) {
+          all_of(Args, [CS](const ArgInfo &Arg) {
+              unsigned ArgNo = Arg.Formal->getArgNo();
+              return CS->getArgOperand(ArgNo) == Arg.Actual; })) {
         CS->setCalledFunction(Clone);
         Solver.markOverdefined(CS);
       }
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -450,7 +450,8 @@
     return TrackingIncomingArguments;
   }
 
-  void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg);
+  void markArgInFuncSpecialization(Function *F,
+                                   const SmallVectorImpl<ArgInfo> &Args);
 
   void markFunctionUnreachable(Function *F) {
     for (auto &BB : *F)
@@ -524,21 +525,24 @@
   return nullptr;
 }
 
-void SCCPInstVisitor::markArgInFuncSpecialization(Function *F,
-                                                  const ArgInfo &Arg) {
-  assert(F->arg_size() == Arg.Formal->getParent()->arg_size() &&
+void SCCPInstVisitor::markArgInFuncSpecialization(
+         Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+  assert(!Args.empty() && "Specialization without arguments");
+  assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() &&
          "Functions should have the same number of arguments");
 
+  auto Iter = Args.begin();
   Argument *NewArg = F->arg_begin();
-  Argument *OldArg = Arg.Formal->getParent()->arg_begin();
+  Argument *OldArg = Args[0].Formal->getParent()->arg_begin();
   for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) {
 
     LLVM_DEBUG(dbgs() << "SCCP: Marking argument "
                       << NewArg->getNameOrAsOperand() << "\n");
 
-    if (OldArg == Arg.Formal) {
+    if (OldArg == (*Iter).Formal) {
       // Mark the argument constants in the new function.
-      markConstant(NewArg, Arg.Actual);
+      markConstant(NewArg, (*Iter).Actual);
+      ++Iter;
     } else if (ValueState.count(OldArg)) {
       // For the remaining arguments in the new function, copy the lattice state
       // over from the old function.
@@ -1717,8 +1721,9 @@
   return Visitor->getArgumentTrackedFunctions();
 }
 
-void SCCPSolver::markArgInFuncSpecialization(Function *F, const ArgInfo &Arg) {
-  Visitor->markArgInFuncSpecialization(F, Arg);
+void SCCPSolver::markArgInFuncSpecialization(
+         Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+  Visitor->markArgInFuncSpecialization(F, Args);
 }
 
 void SCCPSolver::markFunctionUnreachable(Function *F) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll
@@ -1,4 +1,4 @@
-; RUN: opt -function-specialization -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s
+; RUN: opt -function-specialization -func-specialization-min-gain=50 -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s
 
 ; Check that the literal constant parameter could be specialized.
 ; CHECK: @foo.1(
@@ -41,4 +41,4 @@
   %retval.2 = call i32 @foo(i1 0)
   %retval = add nsw i32 %retval.1, %retval.2
   ret i32 %retval
-}
\ No newline at end of file
+}
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll
@@ -1,8 +1,12 @@
-; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s
+; RUN: opt -function-specialization -func-specialization-min-gain=20000 -func-specialization-avg-iters-cost=10 -func-specialization-size-threshold=10 -S < %s | FileCheck %s --check-prefix=HIGH_ITER_COST
+; RUN: opt -function-specialization -func-specialization-min-gain=20000 -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s --check-prefix=LOW_ITER_COST
 
 ; Check that the loop depth results in a larger specialization bonus.
-; CHECK: @foo.1(
-; CHECK: @foo.2(
+; HIGH_ITER_COST: @foo.1(
+; HIGH_ITER_COST: @foo.2(
+
+; LOW_ITER_COST-NOT: @foo.1(
+; LOW_ITER_COST-NOT: @foo.2(
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -60,4 +64,4 @@
 return:
   %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
   ret i32 %retval.0
-}
\ No newline at end of file
+}
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s
+; RUN: opt -function-specialization -func-specialization-min-gain=500 -func-specialization-size-threshold=3 -S < %s | FileCheck %s
 
 ; Checks for callsites that have been annotated with MinSize. We only expect
 ; specialisation for the call that does not have the attribute:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll
@@ -1,4 +1,4 @@
-; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s
+; RUN: opt -function-specialization -func-specialization-min-gain=500 -func-specialization-size-threshold=3 -S < %s | FileCheck %s
 
 define i64 @main(i64 %x, i1 %flag) {
 ;
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
@@ -46,7 +46,7 @@
 ; CHECK-NEXT:   entry:
 ; CHECK-NEXT:     %0 = load i32, i32* @A, align 4
 ; CHECK-NEXT:     %add = add nsw i32 %x, %0
-; CHECK-NEXT:     %1 = load i32, i32* %c, align 4
+; CHECK-NEXT:     %1 = load i32, i32* @C, align 4
 ; CHECK-NEXT:     %add1 = add nsw i32 %add, %1
 ; CHECK-NEXT:     ret i32 %add1
 ; CHECK-NEXT:   }
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:   entry:
 ; CHECK-NEXT:     %0 = load i32, i32* @B, align 4
 ; CHECK-NEXT:     %add = add nsw i32 %x, %0
-; CHECK-NEXT:     %1 = load i32, i32* %c, align 4
+; CHECK-NEXT:     %1 = load i32, i32* @D, align 4
 ; CHECK-NEXT:     %add1 = add nsw i32 %add, %1
 ; CHECK-NEXT:     ret i32 %add1
 ; CHECK-NEXT:   }
diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
--- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll
@@ -1,4 +1,4 @@
-; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s
+; RUN: opt -function-specialization -func-specialization-min-gain=400 -func-specialization-size-threshold=3 -S < %s | FileCheck %s
 
 define i64 @main(i64 %x, i1 %flag) {
 entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -0,0 +1,172 @@
+; RUN: opt -function-specialization -func-specialization-min-gain=1000 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE
+; RUN: opt -function-specialization -func-specialization-min-gain=900 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefixes=ONE
+; RUN: opt -function-specialization -func-specialization-min-gain=700 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO
+; RUN: opt -function-specialization -func-specialization-min-gain=600 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE
+
+define i64 @main(i64 %x, i64 %y, i1 %flag) {
+; NONE-LABEL: @main(
+; NONE-NEXT:  entry:
+; NONE-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; NONE:       plus:
+; NONE-NEXT:    [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; NONE-NEXT:    br label [[MERGE:%.*]]
+; NONE:       minus:
+; NONE-NEXT:    [[TMP1:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; NONE-NEXT:    br label [[MERGE]]
+; NONE:       merge:
+; NONE-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; NONE-NEXT:    [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; NONE-NEXT:    ret i64 [[TMP3]]
+;
+; ONE-LABEL: @main(
+; ONE-NEXT:  entry:
+; ONE-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; ONE:       plus:
+; ONE-NEXT:    [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; ONE-NEXT:    br label [[MERGE:%.*]]
+; ONE:       minus:
+; ONE-NEXT:    [[TMP1:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; ONE-NEXT:    br label [[MERGE]]
+; ONE:       merge:
+; ONE-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; ONE-NEXT:    [[TMP3:%.*]] = call i64 @compute.1(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; ONE-NEXT:    ret i64 [[TMP3]]
+;
+; TWO-LABEL: @main(
+; TWO-NEXT:  entry:
+; TWO-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; TWO:       plus:
+; TWO-NEXT:    [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; TWO-NEXT:    br label [[MERGE:%.*]]
+; TWO:       minus:
+; TWO-NEXT:    [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; TWO-NEXT:    br label [[MERGE]]
+; TWO:       merge:
+; TWO-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; TWO-NEXT:    [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; TWO-NEXT:    ret i64 [[TMP3]]
+;
+; THREE-LABEL: @main(
+; THREE-NEXT:  entry:
+; THREE-NEXT:    br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; THREE:       plus:
+; THREE-NEXT:    [[TMP0:%.*]] = call i64 @compute.1(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; THREE-NEXT:    br label [[MERGE:%.*]]
+; THREE:       minus:
+; THREE-NEXT:    [[TMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; THREE-NEXT:    br label [[MERGE]]
+; THREE:       merge:
+; THREE-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; THREE-NEXT:    [[TMP3:%.*]] = call i64 @compute.3(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; THREE-NEXT:    ret i64 [[TMP3]]
+;
+entry:
+  br i1 %flag, label %plus, label %minus
+
+plus:
+  %tmp0 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+  br label %merge
+
+minus:
+  %tmp1 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+  br label %merge
+
+merge:
+  %tmp2 = phi i64 [ %tmp0, %plus ], [ %tmp1, %minus]
+  %tmp3 = call i64 @compute(i64 %tmp2, i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+  ret i64 %tmp3
+}
+
+; THREE-NOT: define internal i64 @compute
+;
+; THREE-LABEL: define internal i64 @compute.1(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+; THREE-NEXT:  entry:
+; THREE-NEXT:    [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
+; THREE-NEXT:    [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
+; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT:    [[TMP5:%.+]] = mul i64 [[TMP4]], 2
+; THREE-NEXT:    ret i64 [[TMP5]]
+; THREE-NEXT:  }
+;
+; THREE-LABEL: define internal i64 @compute.2(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+; THREE-NEXT:  entry:
+; THREE-NEXT:    [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
+; THREE-NEXT:    [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
+; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT:    [[TMP5:%.+]] = mul i64 [[TMP4]], 2
+; THREE-NEXT:    ret i64 [[TMP5]]
+; THREE-NEXT:  }
+;
+; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+; THREE-NEXT:  entry:
+; THREE-NEXT:    [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y)
+; THREE-NEXT:    [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
+; THREE-NEXT:    [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
+; THREE-NEXT:    [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT:    [[TMP5:%.+]] = mul i64 [[TMP4]], 2
+; THREE-NEXT:    ret i64 [[TMP5]]
+; THREE-NEXT:  }
+;
+define internal i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+entry:
+  %tmp0 = call i64 %binop1(i64 %x, i64 %y)
+  %tmp1 = call i64 %binop2(i64 %x, i64 %y)
+  %add = add i64 %tmp0, %tmp1
+  %div = sdiv i64 %add, %x
+  %sub = sub i64 %div, %y
+  %mul = mul i64 %sub, 2
+  ret i64 %mul
+}
+
+define internal i64 @plus(i64 %x, i64 %y) {
+entry:
+  %tmp0 = add i64 %x, %y
+  ret i64 %tmp0
+}
+
+define internal i64 @minus(i64 %x, i64 %y) {
+entry:
+  %tmp0 = sub i64 %x, %y
+  ret i64 %tmp0
+}
+
+define internal i64 @mul(i64 %x, i64 %n) {
+entry:
+  %cmp6 = icmp sgt i64 %n, 1
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %add, %for.body ]
+  ret i64 %x.addr.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %x.addr.07 = phi i64 [ %add, %for.body ], [ %x, %entry ]
+  %add = shl nsw i64 %x.addr.07, 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define internal i64 @power(i64 %x, i64 %n) {
+entry:
+  %cmp6 = icmp sgt i64 %n, 1
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %mul, %for.body ]
+  ret i64 %x.addr.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+  %x.addr.07 = phi i64 [ %mul, %for.body ], [ %x, %entry ]
+  %mul = mul nsw i64 %x.addr.07, %x.addr.07
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}