diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -49,6 +49,9 @@ /// Do not inline functions which allocate this many bytes on the stack /// when the caller is recursive. const unsigned TotalAllocaSizeRecursiveCaller = 1024; +/// Do not inline dynamic allocas that have been constant propagated to be +/// static allocas above this amount in bytes. +const uint64_t MaxSimplifiedDynamicAllocaToMove = 65536; } // namespace InlineConstants /// Represents the cost of inlining a function. diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -838,6 +838,15 @@ if (I.isArrayAllocation()) { Constant *Size = SimplifiedValues.lookup(I.getArraySize()); if (auto *AllocSize = dyn_cast_or_null(Size)) { + // Sometimes a dynamic alloca could be converted into a static alloca + // after this constant prop, and become a huge static alloca on an + // unconditional CFG path. Avoid inlining if this is going to happen above + // a threshold. + if (AllocSize->getLimitedValue() > + InlineConstants::MaxSimplifiedDynamicAllocaToMove) { + HasDynamicAlloca = true; + return false; + } Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(), diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/llvm/test/Transforms/Inline/dynamic-alloca-simplified-large.ll b/llvm/test/Transforms/Inline/dynamic-alloca-simplified-large.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/dynamic-alloca-simplified-large.ll @@ -0,0 +1,72 @@ +; RUN: opt -inline < %s -S -o - | FileCheck %s +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +define void @caller1(i8 *%p1, i1 %b) { +; CHECK-LABEL: @caller1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i1 [[B:%.*]], true +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[SPLIT:%.*]] +; CHECK: split: +; CHECK-NEXT: call void @callee(i8* [[P1:%.*]], i32 0, i32 -1) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i1 %b, true + br i1 %cond, label %exit, label %split + +split: + ; This path may be generated from CS splitting and never taken at runtime. + call void @callee(i8* %p1, i32 0, i32 -1) + br label %exit + +exit: + ret void +} + +define void @callee(i8* %p1, i32 %l1, i32 %l2) { +entry: + %ext = zext i32 %l2 to i64 + %vla = alloca float, i64 %ext, align 16 + call void @extern_call(float* nonnull %vla) #3 + ret void +} + + +define void @caller2_below_threshold(i8 *%p1, i1 %b) { +; CHECK-LABEL: @caller2_below_threshold( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VLA_I:%.*]] = alloca float, i64 15000, align 16 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i1 [[B:%.*]], true +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[SPLIT:%.*]] +; CHECK: split: +; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call i8* @llvm.stacksave() +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[VLA_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 60000, i8* [[TMP0]]) +; CHECK-NEXT: call void @extern_call(float* nonnull [[VLA_I]]) #1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[VLA_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 60000, i8* [[TMP1]]) +; CHECK-NEXT: call void @llvm.stackrestore(i8* [[SAVEDSTACK]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i1 %b, true + br i1 %cond, label %exit, label %split + +split: + call void @callee(i8* %p1, i32 0, i32 15000) + br label %exit + +exit: + ret void +} + +declare void @extern_call(float*) + +attributes #1 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { nounwind } +