diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h --- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h +++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h @@ -34,6 +34,14 @@ PostDominatorTree *PDT; }; +/// Helper struct shared between Function Specialization and SCCP Solver. +struct ArgInfo { + Argument *Formal; // The Formal argument being analysed. + Constant *Actual; // A corresponding actual constant argument. + + ArgInfo(Argument *F, Constant *A) : Formal(F), Actual(A) {}; +}; + class SCCPInstVisitor; //===----------------------------------------------------------------------===// @@ -134,11 +142,13 @@ /// Return a reference to the set of argument tracked functions. SmallPtrSetImpl &getArgumentTrackedFunctions(); - /// Mark argument \p A constant with value \p C in a new function - /// specialization. The argument's parent function is a specialization of the - /// original function \p F. All other arguments of the specialization inherit - /// the lattice state of their corresponding values in the original function. - void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C); + /// Mark the constant arguments of a new function specialization \p F. A list + /// of {formal,actual} pairs corresponding to these constants resides in + /// \p Args. The formal arguments are associated with the original function + /// definition. + /// All other arguments of the specialization inherit the lattice state of + /// their corresponding values in the original function. + void markArgInFuncSpecialization(Function *F, SmallVectorImpl &Args); /// Mark all of the blocks in function \p F non-executable. Clients can used /// this method to erase a function from the module (e.g., if it has been diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -83,6 +83,12 @@ "specialization"), cl::init(3)); +static cl::opt MinGainThreshold( + "func-specialization-min-gain", cl::Hidden, + cl::desc("The minimum gain for a specialization to be considered " + "profitable"), + cl::init(10000)); + static cl::opt SmallFunctionThreshold( "func-specialization-size-threshold", cl::Hidden, cl::desc("Don't specialize functions that have less than this theshold " @@ -98,8 +104,23 @@ "func-specialization-on-address", cl::init(false), cl::Hidden, cl::desc("Enable function specialization on the address of global values")); -// TODO: This needs checking to see the impact on compile-times, which is why -// this is off by default for now. +// Disabled by default as it can significantly increase compilation times. +// Metrics based on instruction count as per +// https://llvm-compile-time-tracker.com +// https://github.com/nikic/llvm-compile-time-tracker +// +// testname | % diff +// -----------------+------------------- +// ClamAV | -0.003008478350503 +// 7zip | -0.00387846255468 +// tramp3d-v4 | +0.032250428769666 +// kimwitu++ | +0.012783554583518 +// sqlite3 | +0.064026452410774 +// mafft | -0.010917789416896 +// lencod | -0.010513312365949 +// SPASS | +4.0348661146709 +// Bullet | +0.000114373245283 +// consumer-typeset | +0.007390973545237 static cl::opt EnableSpecializationForLiteralConstant( "function-specialization-for-literal-constant", cl::init(false), cl::Hidden, cl::desc("Enable specialization of functions that take a literal constant " @@ -108,24 +129,15 @@ namespace { // Bookkeeping struct to pass data from the analysis and profitability phase // to the actual transform helper functions. -struct ArgInfo { - Function *Fn; // The function to perform specialisation on. - Argument *Formal; // The Formal argument being analysed. - Constant *Actual; // A corresponding actual constant argument. - InstructionCost Gain; // Profitability: Gain = Bonus - Cost. - - // Flag if this will be a partial specialization, in which case we will need - // to keep the original function around in addition to the added - // specializations. - bool Partial = false; - - ArgInfo(Function *F, Argument *A, Constant *C, InstructionCost G) - : Fn(F), Formal(A), Actual(C), Gain(G){}; +struct Specialization { + SmallVector Args; // Stores the {formal,actual} argument pairs. + InstructionCost Gain = 0; // Profitability: Gain = Bonus - Cost. }; } // Anonymous namespace using FuncList = SmallVectorImpl; -using ConstList = SmallVectorImpl; +using ConstList = SmallVector>; +using SpecializationInfo = SmallMapVector; // Helper to check if \p LV is either a constant or a constant // range with a single element. This should cover exactly the same cases as the @@ -306,20 +318,17 @@ LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for " << F->getName() << " is " << Cost << "\n"); - auto ConstArgs = calculateGains(F, Cost); - if (ConstArgs.empty()) { + SpecializationInfo Info = calculateGains(F, Cost); + if (Info.empty()) { LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n"); continue; } - for (auto &CA : ConstArgs) { - specializeFunction(CA, WorkList); - Changed = true; - } + specializeFunction(F, Info, WorkList); + Changed = true; } updateSpecializedFuncs(Candidates, WorkList); - NumFuncSpecialized += NbFunctionsSpecialized; return Changed; } @@ -373,15 +382,10 @@ } private: - // The number of functions specialised, used for collecting statistics and - // also in the cost model. - unsigned NbFunctionsSpecialized = 0; - /// Clone the function \p F and remove the ssa_copy intrinsics added by /// the SCCPSolver in the cloned version. - Function *cloneCandidateFunction(Function *F) { - ValueToValueMapTy EmptyMap; - Function *Clone = CloneFunction(F, EmptyMap); + Function *cloneCandidateFunction(Function *F, ValueToValueMapTy &Mappings) { + Function *Clone = CloneFunction(F, Mappings); removeSSACopy(*Clone); return Clone; } @@ -392,8 +396,8 @@ /// profitable to specialize. Specialization is performed on the first /// interesting argument. Specializations based on additional arguments will /// be evaluated on following iterations of the main IPSCCP solve loop. - SmallVector calculateGains(Function *F, InstructionCost Cost) { - SmallVector Worklist; + SpecializationInfo calculateGains(Function *F, InstructionCost Cost) { + SpecializationInfo Info; // Determine if we should specialize the function based on the values the // argument can take on. If specialization is not profitable, we continue // on to the next argument. @@ -405,7 +409,7 @@ // be set to false by isArgumentInteresting (that function only adds // values to the Constants list that are deemed profitable). bool IsPartial = true; - SmallVector ActualArgs; + ConstList ActualArgs; if (!isArgumentInteresting(&FormalArg, ActualArgs, IsPartial)) { LLVM_DEBUG(dbgs() << "FnSpecialization: Argument " << FormalArg.getNameOrAsOperand() @@ -413,55 +417,58 @@ continue; } - for (auto *ActualArg : ActualArgs) { - InstructionCost Gain = - ForceFunctionSpecialization - ? 1 - : getSpecializationBonus(&FormalArg, ActualArg) - Cost; + for (auto &Entry : ActualArgs) { + CallBase *Call = Entry.first; + Constant *ActualArg = Entry.second; - if (Gain <= 0) - continue; - Worklist.push_back({F, &FormalArg, ActualArg, Gain}); + Specialization &S = Info[Call]; + if (!ForceFunctionSpecialization) + S.Gain += getSpecializationBonus(&FormalArg, ActualArg); + S.Args.push_back({&FormalArg, ActualArg}); } + } - if (Worklist.empty()) - continue; + if (Info.empty()) + return Info; - // Sort the candidates in descending order. - llvm::stable_sort(Worklist, [](const ArgInfo &L, const ArgInfo &R) { - return L.Gain > R.Gain; - }); - - // Truncate the worklist to 'MaxClonesThreshold' candidates if - // necessary. - if (Worklist.size() > MaxClonesThreshold) { - LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed " - << "the maximum number of clones threshold.\n" - << "FnSpecialization: Truncating worklist to " - << MaxClonesThreshold << " candidates.\n"); - Worklist.erase(Worklist.begin() + MaxClonesThreshold, - Worklist.end()); - } + if (ForceFunctionSpecialization) { + // The cost model is disregarded so keep up to \p MaxClonesThreshold + // per function specialization. + while (Info.size() > MaxClonesThreshold) + Info.erase(Info.begin()); + } else { + // Account the Cost per specialization. + SmallVector KeysToRemove; - if (IsPartial || Worklist.size() < ActualArgs.size()) - for (auto &ActualArg : Worklist) - ActualArg.Partial = true; + for (auto &Entry : Info) { + CallBase *Call = Entry.first; + Specialization &S = Entry.second; + + S.Gain -= Cost; + if (S.Gain < MinGainThreshold) + KeysToRemove.push_back(Call); + } + // Remove unprofitable specializations. + for (CallBase *Call : KeysToRemove) + Info.erase(Call); + } - LLVM_DEBUG( + LLVM_DEBUG( + if (!Info.empty()) dbgs() << "FnSpecialization: Specializations for function " << F->getName() << "\n"; - for (auto &C : Worklist) { - dbgs() << "FnSpecialization: FormalArg = " - << C.Formal->getNameOrAsOperand() << ", ActualArg = " - << C.Actual->getNameOrAsOperand() << ", Gain = " - << C.Gain << "\n"; - } - ); - - // FIXME: Only one argument per function. - break; - } - return Worklist; + for (auto &Entry : Info) { + Specialization &S = Entry.second; + + dbgs() << "FnSpecialization: Gain = " << S.Gain << "\n"; + for (ArgInfo &Arg : S.Args) + dbgs() << "FnSpecialization: - FormalArg = " + << Arg.Formal->getNameOrAsOperand() << ", " << "ActualArg = " + << Arg.Actual->getNameOrAsOperand() << "\n"; + } + ); + + return Info; } bool isCandidateFunction(Function *F) { @@ -488,32 +495,32 @@ return true; } - void specializeFunction(ArgInfo &AI, FuncList &WorkList) { - Function *Clone = cloneCandidateFunction(AI.Fn); - Argument *ClonedArg = Clone->getArg(AI.Formal->getArgNo()); - - // Rewrite calls to the function so that they call the clone instead. - rewriteCallSites(AI.Fn, Clone, *ClonedArg, AI.Actual); - - // Initialize the lattice state of the arguments of the function clone, - // marking the argument on which we specialized the function constant - // with the given value. - Solver.markArgInFuncSpecialization(AI.Fn, ClonedArg, AI.Actual); - - // Mark all the specialized functions - WorkList.push_back(Clone); - NbFunctionsSpecialized++; - + void specializeFunction(Function *F, SpecializationInfo &Info, + FuncList &WorkList) { + for (auto &Entry : Info) { + Specialization &S = Entry.second; + + ValueToValueMapTy Mappings; + Function *Clone = cloneCandidateFunction(F, Mappings); + rewriteCallSites(Clone, S.Args, Mappings); + // Initialize the lattice state of the arguments of the function clone, + // marking the argument on which we specialized the function constant + // with the given value. + Solver.markArgInFuncSpecialization(Clone, S.Args); + // Mark all the specialized functions + WorkList.push_back(Clone); + ++NumFuncSpecialized; + } // If the function has been completely specialized, the original function // is no longer needed. Mark it unreachable. - if (AI.Fn->getNumUses() == 0 || - all_of(AI.Fn->users(), [&AI](User *U) { + if (F->getNumUses() == 0 || + all_of(F->users(), [F](User *U) { if (auto *CS = dyn_cast_or_null(U)) - return CS->getFunction() == AI.Fn; + return CS->getFunction() == F; return false; })) { - Solver.markFunctionUnreachable(AI.Fn); - FullySpecialized.push_back(AI.Fn); + Solver.markFunctionUnreachable(F); + FullySpecialized.push_back(F); } } @@ -539,9 +546,8 @@ } // Otherwise, set the specialization cost to be the cost of all the - // instructions in the function and penalty for specializing more functions. - unsigned Penalty = NbFunctionsSpecialized + 1; - return Metrics.NumInsts * InlineConstants::InstrCost * Penalty; + // instructions in the function. + return Metrics.NumInsts * InlineConstants::InstrCost; } InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI, @@ -689,6 +695,10 @@ // TODO 2: this currently does not support constants, i.e. integer ranges. // IsPartial = !getPossibleConstants(A, Constants); + + if (Constants.empty()) + return false; + LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument " << A->getNameOrAsOperand() << "\n"); return true; @@ -745,7 +755,7 @@ if (isa(V) && (Solver.getLatticeValueFor(V).isConstant() || EnableSpecializationForLiteralConstant)) - Constants.push_back(cast(V)); + Constants.push_back(std::make_pair(&CS, cast(V))); else AllConstant = false; } @@ -757,16 +767,18 @@ /// Rewrite calls to function \p F to call function \p Clone instead. /// - /// This function modifies calls to function \p F whose argument at index \p - /// ArgNo is equal to constant \p C. The calls are rewritten to call function - /// \p Clone instead. + /// This function modifies calls to function \p F as long as the actual + /// arguments match those in \p Args. Note that for recursive calls we + /// need to compare against the cloned formal arguments. /// /// Callsites that have been marked with the MinSize function attribute won't /// be specialized and rewritten. - void rewriteCallSites(Function *F, Function *Clone, Argument &Arg, - Constant *C) { - unsigned ArgNo = Arg.getArgNo(); - SmallVector CallSitesToRewrite; + void rewriteCallSites(Function *Clone, SmallVectorImpl &Args, + ValueToValueMapTy &Mappings) { + assert(!Args.empty() && "Specialization without arguments"); + Function *F = Args[0].Formal->getParent(); + + SmallVector CallSitesToRewrite; for (auto *U : F->users()) { if (!isa(U) && !isa(U)) continue; @@ -784,8 +796,15 @@ LLVM_DEBUG(dbgs() << "FnSpecialization: " << CS->getFunction()->getName() << " ->" << *CS << "\n"); - if ((CS->getFunction() == Clone && CS->getArgOperand(ArgNo) == &Arg) || - CS->getArgOperand(ArgNo) == C) { + if (/* recursive call */ + (CS->getFunction() == Clone && + all_of(Args, [CS, &Mappings](ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]; })) || + /* normal call */ + all_of(Args, [CS](ArgInfo &Arg) { + unsigned ArgNo = Arg.Formal->getArgNo(); + return CS->getArgOperand(ArgNo) == Arg.Actual; })) { CS->setCalledFunction(Clone); Solver.markOverdefined(CS); } diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -452,7 +452,7 @@ return TrackingIncomingArguments; } - void markArgInFuncSpecialization(Function *F, Argument *A, Constant *C); + void markArgInFuncSpecialization(Function *F, SmallVectorImpl &Args); void markFunctionUnreachable(Function *F) { for (auto &BB : *F) @@ -526,24 +526,29 @@ return nullptr; } -void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, Argument *A, - Constant *C) { - assert(F->arg_size() == A->getParent()->arg_size() && +void +SCCPInstVisitor::markArgInFuncSpecialization(Function *F, + SmallVectorImpl &Args) { + assert(!Args.empty() && "Specialization without arguments"); + assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() && "Functions should have the same number of arguments"); - // Mark the argument constant in the new function. - markConstant(A, C); - - // For the remaining arguments in the new function, copy the lattice state - // over from the old function. - for (Argument *OldArg = F->arg_begin(), *NewArg = A->getParent()->arg_begin(), - *End = F->arg_end(); - OldArg != End; ++OldArg, ++NewArg) { + auto Iter = Args.begin(); + Argument *NewArg = F->arg_begin(); + Argument *OldArg = Args[0].Formal->getParent()->arg_begin(); + for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) { LLVM_DEBUG(dbgs() << "SCCP: Marking argument " << NewArg->getNameOrAsOperand() << "\n"); - if (NewArg != A && ValueState.count(OldArg)) { + if (OldArg == (*Iter).Formal) { + // Mark the argument constants in the new function. + markConstant(NewArg, (*Iter).Actual); + ++Iter; + } else if (ValueState.count(OldArg)) { + // For the remaining arguments in the new function, copy the lattice state + // over from the old function. + // // Note: This previously looked like this: // ValueState[NewArg] = ValueState[OldArg]; // This is incorrect because the DenseMap class may resize the underlying @@ -1718,9 +1723,9 @@ return Visitor->getArgumentTrackedFunctions(); } -void SCCPSolver::markArgInFuncSpecialization(Function *F, Argument *A, - Constant *C) { - Visitor->markArgInFuncSpecialization(F, A, C); +void SCCPSolver::markArgInFuncSpecialization(Function *F, + SmallVectorImpl &Args) { + Visitor->markArgInFuncSpecialization(F, Args); } void SCCPSolver::markFunctionUnreachable(Function *F) { diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-integers.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -function-specialization -func-specialization-min-gain=50 -function-specialization-for-literal-constant=true -func-specialization-size-threshold=10 -S < %s | FileCheck %s ; Check that the literal constant parameter could be specialized. ; CHECK: @foo.1( @@ -41,4 +41,4 @@ %retval.2 = call i32 @foo(i1 0) %retval = add nsw i32 %retval.1, %retval.2 ret i32 %retval -} \ No newline at end of file +} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll @@ -1,8 +1,12 @@ -; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s +; RUN: opt -function-specialization -func-specialization-min-gain=20000 -func-specialization-avg-iters-cost=10 -func-specialization-size-threshold=10 -S < %s | FileCheck %s --check-prefix=HIGH_ITER_COST +; RUN: opt -function-specialization -func-specialization-min-gain=20000 -func-specialization-avg-iters-cost=3 -func-specialization-size-threshold=10 -S < %s | FileCheck %s --check-prefix=LOW_ITER_COST ; Check that the loop depth results in a larger specialization bonus. -; CHECK: @foo.1( -; CHECK: @foo.2( +; HIGH_ITER_COST: @foo.1( +; HIGH_ITER_COST: @foo.2( + +; LOW_ITER_COST-NOT: @foo.1( +; LOW_ITER_COST-NOT: @foo.2( target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -60,4 +64,4 @@ return: %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] ret i32 %retval.0 -} \ No newline at end of file +} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -function-specialization -func-specialization-min-gain=500 -func-specialization-size-threshold=3 -S < %s | FileCheck %s ; Checks for callsites that have been annotated with MinSize. We only expect ; specialisation for the call that does not have the attribute: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -function-specialization -func-specialization-min-gain=500 -func-specialization-size-threshold=3 -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -46,7 +46,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = load i32, i32* @A, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 -; CHECK-NEXT: %1 = load i32, i32* %c, align 4 +; CHECK-NEXT: %1 = load i32, i32* @C, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 ; CHECK-NEXT: ret i32 %add1 ; CHECK-NEXT: } @@ -55,7 +55,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: %0 = load i32, i32* @B, align 4 ; CHECK-NEXT: %add = add nsw i32 %x, %0 -; CHECK-NEXT: %1 = load i32, i32* %c, align 4 +; CHECK-NEXT: %1 = load i32, i32* @D, align 4 ; CHECK-NEXT: %add1 = add nsw i32 %add, %1 ; CHECK-NEXT: ret i32 %add1 ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll --- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll +++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll @@ -1,4 +1,4 @@ -; RUN: opt -function-specialization -func-specialization-size-threshold=3 -S < %s | FileCheck %s +; RUN: opt -function-specialization -func-specialization-min-gain=400 -func-specialization-size-threshold=3 -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -0,0 +1,172 @@ +; RUN: opt -function-specialization -func-specialization-min-gain=1000 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -function-specialization -func-specialization-min-gain=900 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefixes=ONE +; RUN: opt -function-specialization -func-specialization-min-gain=700 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -function-specialization -func-specialization-min-gain=600 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE + +define i64 @main(i64 %x, i64 %y, i1 %flag) { +; NONE-LABEL: @main( +; NONE-NEXT: entry: +; NONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; NONE: plus: +; NONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; NONE-NEXT: br label [[MERGE:%.*]] +; NONE: minus: +; NONE-NEXT: [[TMP1:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; NONE-NEXT: br label [[MERGE]] +; NONE: merge: +; NONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; NONE-NEXT: [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; NONE-NEXT: ret i64 [[TMP3]] +; +; ONE-LABEL: @main( +; ONE-NEXT: entry: +; ONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; ONE: plus: +; ONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; ONE-NEXT: br label [[MERGE:%.*]] +; ONE: minus: +; ONE-NEXT: [[TMP1:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; ONE-NEXT: br label [[MERGE]] +; ONE: merge: +; ONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; ONE-NEXT: [[TMP3:%.*]] = call i64 @compute.1(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; ONE-NEXT: ret i64 [[TMP3]] +; +; TWO-LABEL: @main( +; TWO-NEXT: entry: +; TWO-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; TWO: plus: +; TWO-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; TWO-NEXT: br label [[MERGE:%.*]] +; TWO: minus: +; TWO-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; TWO-NEXT: br label [[MERGE]] +; TWO: merge: +; TWO-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; TWO-NEXT: [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; TWO-NEXT: ret i64 [[TMP3]] +; +; THREE-LABEL: @main( +; THREE-NEXT: entry: +; THREE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] +; THREE: plus: +; THREE-NEXT: [[TMP0:%.*]] = call i64 @compute.1(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul) +; THREE-NEXT: br label [[MERGE:%.*]] +; THREE: minus: +; THREE-NEXT: [[TMP1:%.*]] = call i64 @compute.2(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @minus, i64 (i64, i64)* @power) +; THREE-NEXT: br label [[MERGE]] +; THREE: merge: +; THREE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ] +; THREE-NEXT: [[TMP3:%.*]] = call i64 @compute.3(i64 [[TMP2]], i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) +; THREE-NEXT: ret i64 [[TMP3]] +; +entry: + br i1 %flag, label %plus, label %minus + +plus: + %tmp0 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @power, i64 (i64, i64)* @mul) + br label %merge + +minus: + %tmp1 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @minus, i64 (i64, i64)* @power) + br label %merge + +merge: + %tmp2 = phi i64 [ %tmp0, %plus ], [ %tmp1, %minus] + %tmp3 = call i64 @compute(i64 %tmp2, i64 42, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus) + ret i64 %tmp3 +} + +; THREE-NOT: define internal i64 @compute +; +; THREE-LABEL: define internal i64 @compute.1(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +; THREE-NEXT: entry: +; THREE-NEXT: [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y) +; THREE-NEXT: [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y) +; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]] +; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x +; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y +; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2 +; THREE-NEXT: ret i64 [[TMP5]] +; THREE-NEXT: } +; +; THREE-LABEL: define internal i64 @compute.2(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +; THREE-NEXT: entry: +; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y) +; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y) +; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]] +; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x +; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y +; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2 +; THREE-NEXT: ret i64 [[TMP5]] +; THREE-NEXT: } +; +; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +; THREE-NEXT: entry: +; THREE-NEXT: [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y) +; THREE-NEXT: [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y) +; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]] +; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x +; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y +; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2 +; THREE-NEXT: ret i64 [[TMP5]] +; THREE-NEXT: } +; +define internal i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) { +entry: + %tmp0 = call i64 %binop1(i64 %x, i64 %y) + %tmp1 = call i64 %binop2(i64 %x, i64 %y) + %add = add i64 %tmp0, %tmp1 + %div = sdiv i64 %add, %x + %sub = sub i64 %div, %y + %mul = mul i64 %sub, 2 + ret i64 %mul +} + +define internal i64 @plus(i64 %x, i64 %y) { +entry: + %tmp0 = add i64 %x, %y + ret i64 %tmp0 +} + +define internal i64 @minus(i64 %x, i64 %y) { +entry: + %tmp0 = sub i64 %x, %y + ret i64 %tmp0 +} + +define internal i64 @mul(i64 %x, i64 %n) { +entry: + %cmp6 = icmp sgt i64 %n, 1 + br i1 %cmp6, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %add, %for.body ] + ret i64 %x.addr.0.lcssa + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %x.addr.07 = phi i64 [ %add, %for.body ], [ %x, %entry ] + %add = shl nsw i64 %x.addr.07, 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define internal i64 @power(i64 %x, i64 %n) { +entry: + %cmp6 = icmp sgt i64 %n, 1 + br i1 %cmp6, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %mul, %for.body ] + ret i64 %x.addr.0.lcssa + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %x.addr.07 = phi i64 [ %mul, %for.body ], [ %x, %entry ] + %mul = mul nsw i64 %x.addr.07, %x.addr.07 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +}