diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -42,6 +41,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -65,6 +65,7 @@ #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/Evaluator.h" #include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Transforms/Utils/Local.h" #include #include #include @@ -2098,14 +2099,63 @@ return Attrs; } -static void RemoveAttribute(Function *F, Attribute::AttrKind A) { - F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A)); +static void RemoveNestAttribute(Function *F) { + F->setAttributes( + StripAttr(F->getContext(), F->getAttributes(), Attribute::Nest)); + for (User *U : F->users()) { + if (isa(U)) + continue; + CallSite CS(cast(U)); + CS.setAttributes( + StripAttr(F->getContext(), CS.getAttributes(), Attribute::Nest)); + } +} + +static void RemoveInAlloca(Function *F) { for (User *U : F->users()) { if (isa(U)) continue; CallSite CS(cast(U)); - CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A)); + assert(CS.arg_size() > 0); + // The inalloca, if present, is on the last argument. + unsigned ArgNo = CS.arg_size() - 1; + if (!CS.paramHasAttr(ArgNo, Attribute::InAlloca)) + continue; + CS.setAttributes( + StripAttr(F->getContext(), CS.getAttributes(), Attribute::InAlloca)); + if (AllocaInst *AI = dyn_cast(CS.getArgument(ArgNo))) { + AI->setUsedWithInAlloca(false); + // Don't hoist allocas with dynamic size or already in the entry block. + ConstantInt *Num = dyn_cast(AI->getArraySize()); + if (!Num) + continue; + BasicBlock &Entry = AI->getFunction()->getEntryBlock(); + if (AI->getParent() == &Entry) + continue; + // Restrict the lifetime of the hoisted alloca to start where the + // original alloca was and end after the call that uses the alloca. + ConstantInt *Size = + ConstantInt::get(Type::getInt64Ty(AI->getContext()), + Num->getZExtValue() * + F->getParent()->getDataLayout().getTypeAllocSize( + AI->getAllocatedType())); + IRBuilder<> IR(AI); + IR.CreateLifetimeStart(AI, Size); + if (CS->isTerminator()) { + for (unsigned i = 0; i < CS->getNumSuccessors(); ++i) { + BasicBlock *BB = CS->getSuccessor(i); + IRBuilder<>(BB, BB->getFirstInsertionPt()) + .CreateLifetimeEnd(AI, Size); + } + } else { + IR.SetInsertPoint(CS->getParent(), ++CS->getIterator()); + IR.CreateLifetimeEnd(AI, Size); + } + AI->moveBefore(Entry.getTerminator()); + } } + F->setAttributes( + StripAttr(F->getContext(), F->getAttributes(), Attribute::InAlloca)); } /// Return true if this is a calling convention that we'd like to change. The @@ -2283,11 +2333,9 @@ // If we have an inalloca parameter that we can safely remove the // inalloca attribute from, do so. This unlocks optimizations that // wouldn't be safe in the presence of inalloca. - // FIXME: We should also hoist alloca affected by this to the entry - // block if possible. if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && !F->hasAddressTaken()) { - RemoveAttribute(F, Attribute::InAlloca); + RemoveInAlloca(F); Changed = true; } @@ -2323,7 +2371,7 @@ !F->hasAddressTaken()) { // The function is not used by a trampoline intrinsic, so it is safe // to remove the 'nest' attribute. - RemoveAttribute(F, Attribute::Nest); + RemoveNestAttribute(F); ++NumNestRemoved; Changed = true; } diff --git a/llvm/test/Transforms/GlobalOpt/inalloca.ll b/llvm/test/Transforms/GlobalOpt/inalloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/inalloca.ll @@ -0,0 +1,74 @@ +; Tests that globalopt can turn inallocas into static allocas. + +; RUN: opt -S -globalopt %s | FileCheck %s +; RUN: opt -S -passes=globalopt %s | FileCheck %s + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.11.0" + +%struct.a = type { i1 } + +; CHECK: define internal fastcc i1 @f(%struct.a* %a) +define internal i1 @f(%struct.a* inalloca %a) { + %1 = getelementptr inbounds %struct.a, %struct.a* %a, i32 0, i32 0 + %2 = load i1, i1* %1 + ret i1 %2 +} + +; CHECK: @move +define i32 @move() { +; CHECK: %a = alloca %struct.a + br label %again +; CHECK: again: +again: +; CHECK-NOT: alloca +; CHECK: %1 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %1) + %a = alloca inalloca %struct.a +; CHECK: %t = call fastcc i1 @f(%struct.a* %a) + %t = call i1 @f(%struct.a* inalloca %a) +; CHECK: %2 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %2) + br i1 %t, label %again, label %done +done: + ret i32 0 +} + +; Check that allocas already in the entry block stay before their uses. +; CHECK: @dontmove +define i32 @dontmove() { +; CHECK: %a = alloca %struct.a + %a = alloca inalloca %struct.a +; CHECK: %t = call fastcc i1 @f(%struct.a* %a) + %t = call i1 @f(%struct.a* inalloca %a) + ret i32 0 +} + +; Check that we insert lifetime ends for all successors of an invoke. +; CHECK: @successors +define i32 @successors() nounwind personality i32 (...)* @__CxxFrameHandler3 { +; CHECK: %a = alloca %struct.a + br label %again +; CHECK: again: +again: +; CHECK-NOT: alloca +; CHECK: %1 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %1) + %a = alloca inalloca %struct.a +; CHECK: %t = invoke fastcc i1 @f(%struct.a* %a) + %t = invoke i1 @f(%struct.a* inalloca %a) + to label %cont unwind label %unwind +cont: +; CHECK: %2 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %2) + br i1 %t, label %again, label %done +unwind: + landingpad { i8*, i32 } catch i8** null +; CHECK: %4 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %4) + ret i32 -1 +done: + ret i32 0 +} + +declare i32 @__CxxFrameHandler3(...)