diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -225,6 +225,7 @@ SmallPtrSet Specializations; SmallPtrSet FullySpecialized; DenseMap FunctionMetrics; + DenseMap FunctionGrowth; public: FunctionSpecializer( diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -93,6 +93,10 @@ "Don't specialize functions that have less than this number of " "instructions")); +static cl::opt MaxCodeSizeGrowth( + "funcspec-max-codesize-growth", cl::init(3), cl::Hidden, cl::desc( + "Maximum codesize growth allowed per function")); + static cl::opt MinCodeSizeSavings( "funcspec-min-codesize-savings", cl::init(20), cl::Hidden, cl::desc( "Reject specializations whose codesize savings are less than this" @@ -841,7 +845,10 @@ << B.CodeSize << ", Latency = " << B.Latency << ", Inlining = " << Score << "}\n"); - auto IsProfitable = [&FuncSize](Bonus &B, unsigned Score) -> bool { + FunctionGrowth[F] += FuncSize - B.CodeSize; + + auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize, + unsigned FuncGrowth) -> bool { // No check required. if (ForceSpecialization) return true; @@ -854,11 +861,14 @@ // Minimum latency savings. if (B.Latency < MinLatencySavings * FuncSize / 100) return false; + // Maximum codesize growth. + if (FuncGrowth / FuncSize > MaxCodeSizeGrowth) + return false; return true; }; // Discard unprofitable specialisations. - if (!IsProfitable(B, Score)) + if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F])) continue; // Create a new specialisation entry. diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -43,7 +43,7 @@ "Number of instructions replaced with (simpler) instruction"); static cl::opt FuncSpecMaxIters( - "funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc( + "funcspec-max-iters", cl::init(10), cl::Hidden, cl::desc( "The maximum number of iterations function specialization is run")); static void findReturnsToZap(Function &F, diff --git a/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/recursive-penalty.ll @@ -0,0 +1,64 @@ +; REQUIRES: asserts +; RUN: opt -passes="ipsccp,inline,instcombine,simplifycfg" -S \ +; RUN: -funcspec-min-function-size=23 -funcspec-max-iters=100 \ +; RUN: -debug-only=function-specialization < %s 2>&1 | FileCheck %s + +; Make sure the number of specializations created are not +; linear to the number of iterations (funcspec-max-iters). + +; CHECK: FnSpecialization: Created 4 specializations in module + +@Global = internal constant i32 1, align 4 + +define internal void @recursiveFunc(ptr readonly %arg) { + %temp = alloca i32, align 4 + %arg.load = load i32, ptr %arg, align 4 + %arg.cmp = icmp slt i32 %arg.load, 10000 + br i1 %arg.cmp, label %loop1, label %ret.block + +loop1: + br label %loop2 + +loop2: + br label %loop3 + +loop3: + br label %loop4 + +loop4: + br label %block6 + +block6: + call void @print_val(i32 %arg.load) + %arg.add = add nsw i32 %arg.load, 1 + store i32 %arg.add, ptr %temp, align 4 + call void @recursiveFunc(ptr %temp) + br label %loop4.end + +loop4.end: + %exit_cond1 = call i1 @exit_cond() + br i1 %exit_cond1, label %loop4, label %loop3.end + +loop3.end: + %exit_cond2 = call i1 @exit_cond() + br i1 %exit_cond2, label %loop3, label %loop2.end + +loop2.end: + %exit_cond3 = call i1 @exit_cond() + br i1 %exit_cond3, label %loop2, label %loop1.end + +loop1.end: + %exit_cond4 = call i1 @exit_cond() + br i1 %exit_cond4, label %loop1, label %ret.block + +ret.block: + ret void +} + +define i32 @main() { + call void @recursiveFunc(ptr @Global) + ret i32 0 +} + +declare dso_local void @print_val(i32) +declare dso_local i1 @exit_cond()