diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -184,6 +184,7 @@ ~FunctionSpecializer(); + void init(); bool run(); InstCostVisitor getInstCostVisitorFor(Function *F) { diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -463,6 +463,15 @@ cleanUpSSA(); } +/// Should be called once before any calls to run. +void FunctionSpecializer::init() { + // Promote constant stack values once before entering the run loop. + // Apart from generally increasing specialization opportunities, this is + // critical for Fortran programs where function arguments are passed by + // reference. + promoteConstantStackValues(); +} + /// Attempt to specialize functions in the module to enable constant /// propagation across function boundaries. /// diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -160,6 +160,7 @@ Solver.solveWhileResolvedUndefsIn(M); if (IsFuncSpecEnabled) { + Specializer.init(); unsigned Iters = 0; while (Iters++ < FuncSpecMaxIters && Specializer.run()); } diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive5.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive5.ll @@ -0,0 +1,48 @@ +; RUN: opt -passes="ipsccp,inline,instcombine" -force-specialization -funcspec-max-iters=1 -S < %s | FileCheck %s --check-prefix=ITERS1 +; RUN: opt -passes="ipsccp,inline,instcombine" -force-specialization -funcspec-max-iters=4 -S < %s | FileCheck %s --check-prefix=ITERS4 + +define internal void @recursiveFunc(ptr nocapture readonly %lo, i32 %step, ptr nocapture readonly %hi) { + %lo.temp = alloca i32, align 4 + %hi.temp = alloca i32, align 4 + %lo.load = load i32, ptr %lo, align 4 + %hi.load = load i32, ptr %hi, align 4 + %cmp = icmp ne i32 %lo.load, %hi.load + br i1 %cmp, label %block6, label %ret.block + +block6: + call void @print_val(i32 %lo.load, i32 %hi.load) + %add = add nsw i32 %lo.load, %step + %sub = sub nsw i32 %hi.load, %step + store i32 %add, ptr %lo.temp, align 4 + store i32 %sub, ptr %hi.temp, align 4 + call void @recursiveFunc(ptr nonnull %lo.temp, i32 %step, ptr nonnull %hi.temp) + br label %ret.block + +ret.block: + ret void +} + +; ITERS1: @funcspec.arg.3 = internal constant i32 1 +; ITERS1: @funcspec.arg.4 = internal constant i32 5 + +define i32 @main() { +; ITERS1-LABEL: @main( +; ITERS1-NEXT: call void @print_val(i32 0, i32 6) +; ITERS1-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.3, i32 1, ptr nonnull @funcspec.arg.4) +; ITERS1-NEXT: ret i32 0 +; +; ITERS4-LABEL: @main( +; ITERS4-NEXT: call void @print_val(i32 0, i32 6) +; ITERS4-NEXT: call void @print_val(i32 1, i32 5) +; ITERS4-NEXT: call void @print_val(i32 2, i32 4) +; ITERS4-NEXT: ret i32 0 +; + %low.arg = alloca i32, align 4 + %high.arg = alloca i32, align 4 + store i32 0, ptr %low.arg, align 4 + store i32 6, ptr %high.arg, align 4 + call void @recursiveFunc(ptr nonnull %low.arg, i32 1, ptr nonnull %high.arg) + ret i32 0 +} + +declare dso_local void @print_val(i32, i32)