diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -805,6 +805,7 @@ // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. + auto CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); if (CG.lookupSCC(*CG.lookup(Callee)) == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " @@ -814,6 +815,29 @@ continue; } + // Do not inline a function involved in a mutual-recursive cycle. Due to + // the lack of a global inline history, such inlining may cause an + // exponential size growth as the inlining extends up along the call + // graph. Consider the following example, + // void A() { B(); B();} + // void B() { A(); } + // void C() { B(); } + // void D() { C(); } + // When processing C, inlining B will result in + // void C() { B(); B(); } + // When processing D, inlining C and futher the two B callsites will + // result in + // void D() { B(); B(); B(); B(); } + // Note that we are only checking mutual-recursion here. Self-recurision + // is handled by inline cost analyzer. + if (CalleeSCC->size() > 1) { + LLVM_DEBUG(dbgs() << "Skipping inlining a node that is involved in a " + "mutual-recursive SCC" + << F.getName() << " -> " << Callee.getName() << "\n"); + setInlineRemark(*CB, "recursive"); + continue; + } + auto Advice = Advisor.getAdvice(*CB, OnlyMandatory); // Check whether we want to inline this callsite. if (!Advice->isInliningRecommended()) { diff --git a/llvm/test/Transforms/Inline/recursion.ll b/llvm/test/Transforms/Inline/recursion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/recursion.ll @@ -0,0 +1,101 @@ +; RUN: opt -O3 -inline-threshold=50 -S < %s | FileCheck %s +;; Check function fib is not over-inlined into A, B and C. + +define dso_local i32 @fib0(i32 %n) #0 { +entry: + %retval = alloca i32, align 4 + %n.addr = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + %0 = load i32, i32* %n.addr, align 4 + %cmp = icmp sle i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %1 = load i32, i32* %n.addr, align 4 + store i32 %1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + %2 = load i32, i32* %n.addr, align 4 + %sub = sub nsw i32 %2, 1 + %call = call i32 @fib(i32 %sub) + %3 = load i32, i32* %n.addr, align 4 + %sub1 = sub nsw i32 %3, 2 + %call2 = call i32 @fib(i32 %sub1) + %add = add nsw i32 %call, %call2 + store i32 %add, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %4 = load i32, i32* %retval, align 4 + ret i32 %4 +} + +define dso_local i32 @fib(i32 %n) #0 { +entry: + %retval = alloca i32, align 4 + %n.addr = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + %0 = load i32, i32* %n.addr, align 4 + %cmp = icmp sle i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %1 = load i32, i32* %n.addr, align 4 + store i32 %1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + %2 = load i32, i32* %n.addr, align 4 + %sub = sub nsw i32 %2, 1 + %call = call i32 @fib0(i32 %sub) + %3 = load i32, i32* %n.addr, align 4 + %sub1 = sub nsw i32 %3, 2 + %call2 = call i32 @fib0(i32 %sub1) + %add = add nsw i32 %call, %call2 + store i32 %add, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %4 = load i32, i32* %retval, align 4 + ret i32 %4 +} + +; CHECK: @A( +; CHECK: call i32 @fib(i32 100) +; CHECK: call i32 @fib(i32 200) +define dso_local i32 @A(i32 %n) #1 { +entry: + %n.addr = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + %call = call i32 @fib(i32 100) + %call1 = call i32 @fib(i32 200) + %add = add nsw i32 %call, %call1 + ret i32 %add +} + +; CHECK: @B( +; CHECK: call i32 @fib(i32 100) +; CHECK: call i32 @fib(i32 200) +define dso_local i32 @B(i32 %n) #1 { +entry: + %n.addr = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + %call = call i32 @A(i32 8) + ret i32 %call +} + +; CHECK: @C( +; CHECK: call i32 @fib(i32 100) +; CHECK: call i32 @fib(i32 200) +define dso_local i32 @C(i32 %n) #0 { +entry: + %n.addr = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + %0 = load i32, i32* %n.addr, align 4 + %call = call i32 @B(i32 %0) + ret i32 %call +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-jump-tables"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-jump-tables"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }