diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -171,6 +171,7 @@ SmallPtrSet Specializations; SmallPtrSet FullySpecialized; DenseMap FunctionMetrics; + DenseMap NumSpecs; public: FunctionSpecializer( diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -489,6 +489,9 @@ // Find possible specializations for each function. SpecMap SM; SmallVector AllSpecs; + + promoteConstantStackValues(); + unsigned NumCandidates = 0; for (Function &F : M) { if (!isCandidateFunction(&F)) @@ -612,7 +615,6 @@ // Rerun the solver to notify the users of the modified callsites. Solver.solveWhileResolvedUndefs(); - promoteConstantStackValues(); return true; } @@ -733,10 +735,13 @@ } else { // Calculate the specialisation gain. Cost Score = 0; + Cost Penalty = 1; InstCostVisitor Visitor = getInstCostVisitorFor(F); - for (ArgInfo &A : S.Args) + for (ArgInfo &A : S.Args) { Score += getSpecializationBonus(A.Formal, A.Actual, Visitor); - Score /= SpecCost; + Penalty += NumSpecs[A.Formal]; + } + Score /= SpecCost * Penalty; // Discard unprofitable specialisations. if (!ForceSpecialization && Score < MinScore) @@ -804,6 +809,11 @@ // Mark all the specialized functions Specializations.insert(Clone); + + // Update the cost model. + for (const ArgInfo &A : S.Args) + ++NumSpecs[A.Formal]; + ++NumSpecsCreated; return Clone; diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -42,7 +42,7 @@ "Number of instructions replaced with (simpler) instruction"); static cl::opt FuncSpecMaxIters( - "funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc( + "funcspec-max-iters", cl::init(10), cl::Hidden, cl::desc( "The maximum number of iterations function specialization is run")); static void findReturnsToZap(Function &F, diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll @@ -26,31 +26,40 @@ ret void } -; ITERS2: @funcspec.arg.4 = internal constant i32 2 -; ITERS2: @funcspec.arg.5 = internal constant i32 4 +; ITERS2: @funcspec.arg = internal constant i32 1 +; ITERS2: @funcspec.arg.2 = internal constant i32 5 -; ITERS3: @funcspec.arg.7 = internal constant i32 3 -; ITERS3: @funcspec.arg.8 = internal constant i32 3 +; ITERS3: @funcspec.arg = internal constant i32 1 +; ITERS3: @funcspec.arg.2 = internal constant i32 5 +; ITERS3: @funcspec.arg.4 = internal constant i32 2 +; ITERS3: @funcspec.arg.5 = internal constant i32 4 + +; ITERS4: @funcspec.arg = internal constant i32 1 +; ITERS4: @funcspec.arg.2 = internal constant i32 5 +; ITERS4: @funcspec.arg.4 = internal constant i32 2 +; ITERS4: @funcspec.arg.5 = internal constant i32 4 +; ITERS4: @funcspec.arg.7 = internal constant i32 3 +; ITERS4: @funcspec.arg.8 = internal constant i32 3 define i32 @main() { ; ITERS2-LABEL: @main( -; ITERS2-NEXT: call void @print_val(i32 0, i32 6) -; ITERS2-NEXT: call void @print_val(i32 1, i32 5) -; ITERS2-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.4, i32 1, ptr nonnull @funcspec.arg.5) -; ITERS2-NEXT: ret i32 0 +; ITERS2: call void @print_val(i32 0, i32 6) +; ITERS2: call void @print_val(i32 1, i32 5) +; ITERS2: call void @recursiveFunc( +; ITERS2: ret i32 0 ; ; ITERS3-LABEL: @main( -; ITERS3-NEXT: call void @print_val(i32 0, i32 6) -; ITERS3-NEXT: call void @print_val(i32 1, i32 5) -; ITERS3-NEXT: call void @print_val(i32 2, i32 4) -; ITERS3-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.7, i32 1, ptr nonnull @funcspec.arg.8) -; ITERS3-NEXT: ret i32 0 +; ITERS3: call void @print_val(i32 0, i32 6) +; ITERS3: call void @print_val(i32 1, i32 5) +; ITERS3: call void @print_val(i32 2, i32 4) +; ITERS3: call void @recursiveFunc( +; ITERS3: ret i32 0 ; ; ITERS4-LABEL: @main( -; ITERS4-NEXT: call void @print_val(i32 0, i32 6) -; ITERS4-NEXT: call void @print_val(i32 1, i32 5) -; ITERS4-NEXT: call void @print_val(i32 2, i32 4) -; ITERS4-NEXT: ret i32 0 +; ITERS4: call void @print_val(i32 0, i32 6) +; ITERS4: call void @print_val(i32 1, i32 5) +; ITERS4: call void @print_val(i32 2, i32 4) +; ITERS4: ret i32 0 ; call void @recursiveFunc(ptr nonnull @low, i32 1, ptr nonnull @high) ret i32 0 diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -1,8 +1,8 @@ ; RUN: opt -passes="ipsccp" -force-specialization \ -; RUN: -funcspec-max-clones=2 -S < %s | FileCheck %s +; RUN: -funcspec-max-iters=1 -funcspec-max-clones=2 -S < %s | FileCheck %s ; RUN: opt -passes="ipsccp" -force-specialization \ -; RUN: -funcspec-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1 +; RUN: -funcspec-max-iters=1 -funcspec-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/FunctionSpecialization/max-iters.ll b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll --- a/llvm/test/Transforms/FunctionSpecialization/max-iters.ll +++ b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1 +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1 ; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1 ; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2 ; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll @@ -0,0 +1,63 @@ +; REQUIRES: asserts +; RUN: opt -passes="ipsccp,inline,instcombine,simplifycfg" -S \ +; RUN: -funcspec-min-entry-freq=8 -funcspec-min-function-size=17 \ +; RUN: -funcspec-min-score=20 -funcspec-max-iters=100 \ +; RUN: -debug-only=function-specialization < %s 2>&1 | FileCheck %s + +; Make sure the number of specializations created are not +; linear to the number of iterations (funcspec-max-iters). + +; CHECK: FnSpecialization: Function recursiveFunc , score 241 +; CHECK: FnSpecialization: Function recursiveFunc , score 120 +; CHECK: FnSpecialization: Function recursiveFunc , score 80 +; CHECK: FnSpecialization: Function recursiveFunc , score 60 +; CHECK: FnSpecialization: Function recursiveFunc , score 48 +; CHECK: FnSpecialization: Function recursiveFunc , score 40 +; CHECK: FnSpecialization: Function recursiveFunc , score 34 +; CHECK: FnSpecialization: Function recursiveFunc , score 30 +; CHECK: FnSpecialization: Function recursiveFunc , score 26 +; CHECK: FnSpecialization: Function recursiveFunc , score 24 +; CHECK: FnSpecialization: Function recursiveFunc , score 21 +; CHECK: FnSpecialization: Function recursiveFunc , score 20 +; CHECK: FnSpecialization: Created 12 specializations in module + +@Global = internal constant i32 1, align 4 + +define internal void @recursiveFunc(ptr readonly %arg) { + %temp = alloca i32, align 4 + %arg.load = load i32, ptr %arg, align 4 + %arg.cmp = icmp slt i32 %arg.load, 10000 + br i1 %arg.cmp, label %loop1, label %ret.block + +loop1: + br label %loop2 + +loop2: + br label %block6 + +block6: + call void @print_val(i32 %arg.load) + %arg.add = add nsw i32 %arg.load, 1 + store i32 %arg.add, ptr %temp, align 4 + call void @recursiveFunc(ptr %temp) + br label %loop2.end + +loop2.end: + %exit_cond3 = call i1 @exit_cond() + br i1 %exit_cond3, label %loop2, label %loop1.end + +loop1.end: + %exit_cond4 = call i1 @exit_cond() + br i1 %exit_cond4, label %loop1, label %ret.block + +ret.block: + ret void +} + +define i32 @main() { + call void @recursiveFunc(ptr @Global) + ret i32 0 +} + +declare dso_local void @print_val(i32) +declare dso_local i1 @exit_cond() diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll --- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes="ipsccp" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO -; RUN: opt -passes="ipsccp" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE +; RUN: opt -passes="ipsccp" -funcspec-max-iters=1 -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -passes="ipsccp" -funcspec-max-iters=1 -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE +; RUN: opt -passes="ipsccp" -funcspec-max-iters=1 -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -passes="ipsccp" -funcspec-max-iters=1 -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE ; Make sure that we iterate correctly after sorting the specializations: ;