diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -42,6 +41,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -65,6 +65,7 @@ #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/Evaluator.h" #include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Transforms/Utils/Local.h" #include #include #include @@ -2098,14 +2099,78 @@ return Attrs; } -static void RemoveAttribute(Function *F, Attribute::AttrKind A) { - F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A)); +static void RemoveNestAttribute(Function *F) { + F->setAttributes( + StripAttr(F->getContext(), F->getAttributes(), Attribute::Nest)); + for (User *U : F->users()) { + if (isa(U)) + continue; + CallSite CS(cast(U)); + CS.setAttributes( + StripAttr(F->getContext(), CS.getAttributes(), Attribute::Nest)); + } +} + +static void InsertLifetimeEndAfter(Instruction *I, Value *P, ConstantInt *Size) { + if (I->isTerminator()) { + for (unsigned i = 0; i < I->getNumSuccessors(); ++i) { + BasicBlock *BB = I->getSuccessor(i); + Instruction *NonPhi = BB->getFirstNonPHI(); + if (NonPhi->isEHPad()) { + BasicBlock::iterator IP = ++NonPhi->getIterator(); + if (IP == BB->end()) + InsertLifetimeEndAfter(NonPhi, P, Size); + else + IRBuilder<>(BB, IP).CreateLifetimeEnd(P, Size); + } else { + IRBuilder<>(NonPhi).CreateLifetimeEnd(P, Size); + } + } + } else { + IRBuilder<>(I->getParent(), ++I->getIterator()) + .CreateLifetimeEnd(P, Size); + } +} + +static void RemoveInAlloca(Function *F) { for (User *U : F->users()) { if (isa(U)) continue; CallSite CS(cast(U)); - CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A)); + assert(CS.arg_size() > 0); + // The inalloca, if present, is on the last argument. + unsigned ArgNo = CS.arg_size() - 1; + if (!CS.paramHasAttr(ArgNo, Attribute::InAlloca)) + continue; + CS.setAttributes( + StripAttr(F->getContext(), CS.getAttributes(), Attribute::InAlloca)); + if (AllocaInst *AI = dyn_cast(CS.getArgument(ArgNo))) { + AI->setUsedWithInAlloca(false); + // Hoist allocas we just removed inalloca from, unless: + // 1. They are already in the entry block. + // 2. They have dynamic size. + // 3. There is control flow between the alloca and the call that uses it. + ConstantInt *Num = dyn_cast(AI->getArraySize()); + if (!Num) + continue; + if (AI->getParent() != CS->getParent()) + continue; + BasicBlock &Entry = AI->getFunction()->getEntryBlock(); + if (AI->getParent() == &Entry) + continue; + IRBuilder<> IR(AI); + // Restrict the lifetime of the hoisted alloca to start where the + // original alloca was and end after the call that uses the alloca. + ConstantInt *Size = IR.getInt64( + Num->getZExtValue() * + F->getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType())); + IR.CreateLifetimeStart(AI, Size); + InsertLifetimeEndAfter(CS.getInstruction(), AI, Size); + AI->moveBefore(Entry.getTerminator()); + } } + F->setAttributes( + StripAttr(F->getContext(), F->getAttributes(), Attribute::InAlloca)); } /// Return true if this is a calling convention that we'd like to change. The @@ -2283,11 +2348,9 @@ // If we have an inalloca parameter that we can safely remove the // inalloca attribute from, do so. This unlocks optimizations that // wouldn't be safe in the presence of inalloca. - // FIXME: We should also hoist alloca affected by this to the entry - // block if possible. if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && !F->hasAddressTaken()) { - RemoveAttribute(F, Attribute::InAlloca); + RemoveInAlloca(F); Changed = true; } @@ -2323,7 +2386,7 @@ !F->hasAddressTaken()) { // The function is not used by a trampoline intrinsic, so it is safe // to remove the 'nest' attribute. - RemoveAttribute(F, Attribute::Nest); + RemoveNestAttribute(F); ++NumNestRemoved; Changed = true; } diff --git a/llvm/test/Transforms/GlobalOpt/inalloca.ll b/llvm/test/Transforms/GlobalOpt/inalloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/inalloca.ll @@ -0,0 +1,105 @@ +; Tests that globalopt can turn inallocas into static allocas. + +; RUN: opt -S -globalopt %s | FileCheck %s +; RUN: opt -S -passes=globalopt %s | FileCheck %s + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.11.0" + +%struct.a = type { i1 } + +; CHECK: define internal fastcc i1 @f(%struct.a* %a) +define internal i1 @f(%struct.a* inalloca %a) { + %1 = getelementptr inbounds %struct.a, %struct.a* %a, i32 0, i32 0 + %2 = load i1, i1* %1 + ret i1 %2 +} + +; CHECK: @move +define i32 @move() { +; CHECK: %a = alloca %struct.a + br label %again +; CHECK: again: +again: +; CHECK-NOT: alloca +; CHECK: %1 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %1) + %a = alloca inalloca %struct.a +; CHECK: %t = call fastcc i1 @f(%struct.a* %a) + %t = call i1 @f(%struct.a* inalloca %a) +; CHECK: %2 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %2) + br i1 %t, label %again, label %done +done: + ret i32 0 +} + +; Check that allocas already in the entry block stay before their uses. +; CHECK: @dontmove +define i32 @dontmove() { +; CHECK: %a = alloca %struct.a + %a = alloca inalloca %struct.a +; CHECK: %t = call fastcc i1 @f(%struct.a* %a) + %t = call i1 @f(%struct.a* inalloca %a) + ret i32 0 +} + +; Check that we insert lifetime ends for all successors of an invoke. +; CHECK: @successors +define i32 @successors() nounwind personality i32 (...)* @__CxxFrameHandler3 { +; CHECK: %a = alloca %struct.a + br label %again +; CHECK: again: +again: +; CHECK-NOT: alloca +; CHECK: %1 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* %1) +; CHECK: %t = invoke fastcc i1 @f(%struct.a* %a) + %a = alloca inalloca %struct.a + %t = invoke i1 @f(%struct.a* inalloca %a) + to label %cont unwind label %unwind +cont: +; CHECK: %2 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %2) + br i1 %t, label %again, label %done +unwind: + %cs = catchswitch within none [label %unwind.body] unwind to caller +unwind.body: + %cp = catchpad within %cs [i8* null, i32 64, i8* null] +; CHECK: %3 = bitcast %struct.a* %a to i8* +; CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* %3) + catchret from %cp to label %retneg +retneg: + ret i32 -1 +done: + ret i32 0 +} + +; Hoisting allocas from basic blocks other than the function that uses +; them require more sophisticated analysis. For now, just leave them +; alone. +define void @control_flow(i1 %x) nounwind personality i32 (...)* @__CxxFrameHandler3 { + br i1 %x, label %done, label %doit + +doit: + %a = alloca inalloca %struct.a + %t = invoke i1 @g() to label %cont unwind label %unwind + +cont: + %ptr = getelementptr %struct.a, %struct.a* %a, i32 0, i32 0 + store i1 %t, i1* %ptr + call i1 @f(%struct.a* inalloca %a) + br label %done + +unwind: + %cs = catchswitch within none [label %unwind.body] unwind to caller +unwind.body: + %cp = catchpad within %cs [i8* null, i32 64, i8* null] + catchret from %cp to label %done + +done: + ret void +} + +declare i32 @__CxxFrameHandler3(...) +declare i1 @g()