diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -49,6 +49,9 @@ /// Do not inline functions which allocate this many bytes on the stack /// when the caller is recursive. const unsigned TotalAllocaSizeRecursiveCaller = 1024; +/// Do not move callee static allocas to the caller entry block if the alloc +/// size is above this amount in bytes. +const unsigned MaxStaticAllocaSizeToMove = 65536; } // namespace InlineConstants /// Represents the cost of inlining a function. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Transforms/Utils/Local.h" @@ -1914,7 +1915,11 @@ // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. + // We don't move allocas if they would be allocating very large blocks of + // memory, since they might be on a never-executed path. This could happen due + // to things like call site splitting & SCCP { + auto &DL = Caller->getParent()->getDataLayout(); BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { @@ -1928,6 +1933,15 @@ continue; } + auto AllocIsOversize = [&AI, DL]() { + auto Size = AI->getAllocationSizeInBits(DL); + if (!Size) + return false; + return Size.getValue() / 8 > InlineConstants::MaxStaticAllocaSizeToMove; + }; + if (AllocIsOversize()) + continue; + if (!allocaWouldBeStaticInEntry(AI)) continue; @@ -1938,6 +1952,7 @@ // all at once. while (isa(I) && !cast(I)->use_empty() && + !AllocIsOversize() && allocaWouldBeStaticInEntry(cast(I))) { IFI.StaticAllocas.push_back(cast(I)); ++I; diff --git a/llvm/test/Transforms/Inline/alloca-too-large-to-hoist.ll b/llvm/test/Transforms/Inline/alloca-too-large-to-hoist.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca-too-large-to-hoist.ll @@ -0,0 +1,70 @@ +; RUN: opt -inline < %s -S -o - | FileCheck %s +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +define void @caller1(i8 *%p1, i1 %b) { +; CHECK-LABEL: @caller1 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i1 [[B:%.*]], true +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[SPLIT:%.*]] +; CHECK: split: +; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call i8* @llvm.stacksave() +; CHECK-NEXT: [[VLA_I:%.*]] = alloca float, i64 4294967295, align 16 +; CHECK-NEXT: call void @extern_call(float* nonnull [[VLA_I]]) #0 +entry: + %cond = icmp eq i1 %b, true + br i1 %cond, label %exit, label %split + +split: + ; This path may be generated from CS splitting and never taken at runtime. + call void @callee(i8* %p1, i32 0, i32 -1) + br label %exit + +exit: + ret void +} + +define void @callee(i8* %p1, i32 %l1, i32 %l2) { +entry: + %ext = zext i32 %l2 to i64 + %vla = alloca float, i64 %ext, align 16 + call void @extern_call(float* nonnull %vla) #3 + ret void +} + + +define void @caller2_below_threshold(i8 *%p1, i1 %b) { +; CHECK-LABEL: @caller2_below_threshold +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VLA_I:%.*]] = alloca float, i64 15000, align 16 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i1 [[B:%.*]], true +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[SPLIT:%.*]] +; CHECK: split: +; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call i8* @llvm.stacksave() +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[VLA_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 60000, i8* [[TMP0]]) +; CHECK-NEXT: call void @extern_call(float* nonnull [[VLA_I]]) #0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[VLA_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 60000, i8* [[TMP1]]) +; CHECK-NEXT: call void @llvm.stackrestore(i8* [[SAVEDSTACK]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i1 %b, true + br i1 %cond, label %exit, label %split + +split: + call void @callee(i8* %p1, i32 0, i32 15000) + br label %exit + +exit: + ret void +} + +declare void @extern_call(float*) + +attributes #1 = { argmemonly nounwind willreturn writeonly } +attributes #3 = { nounwind } +