diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -45,6 +44,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -62,41 +62,60 @@ STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); bool llvm::isAllocaPromotable(const AllocaInst *AI) { + SmallVector Roots; // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. unsigned AS = AI->getType()->getAddressSpace(); - // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast(U)) { - // Note that atomic loads can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (LI->isVolatile()) + Roots.push_back(AI); + while (Roots.size()) { + const Instruction *I = Roots.pop_back_val(); + // Only allow direct and non-volatile loads and stores... + for (const Use &UU : I->uses()) { + const User *U = UU.getUser(); + if (const LoadInst *LI = dyn_cast(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (LI->isVolatile()) + return false; + } else if (const StoreInst *SI = dyn_cast(U)) { + if (SI->getOperand(0) == AI) + return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast(U)) { + if (!II->isLifetimeStartOrEnd()) + return false; + } else if (const BitCastInst *BCI = dyn_cast(U)) { + Roots.push_back(BCI); + } else if (const GetElementPtrInst *GEPI = + dyn_cast(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; + // TODO: Add GEPI as a root and recurse on it. Would need to add support + // for GetElementPtr in PromoteMem2Reg. + } else if (const CallBase *CB = dyn_cast(U)) { + if (!CB->isArgOperand(&UU)) + return false; + unsigned ArgNo = CB->getArgOperandNo(&UU); + // Argument must not be captured for subsequent use + if (U->getType()->isPointerTy() && + !CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return false; + // Argument that is not accessed can be promoted + if (!(CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone))) + return false; + } else { return false; - } else if (const StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI) - return false; // Don't allow a store OF the AI, only INTO the AI. - // Note that atomic stores can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (SI->isVolatile()) - return false; - } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd()) - return false; - } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; - if (!onlyUsedByLifetimeMarkers(BCI)) - return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; - if (!GEPI->hasAllZeroIndices()) - return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) - return false; - } else { - return false; + } } } @@ -186,7 +205,6 @@ DenseMap InstNumbers; public: - /// This code only looks at accesses to allocas. static bool isInterestingInstruction(const Instruction *I) { return (isa(I) && isa(I->getOperand(0))) || @@ -312,26 +330,43 @@ AC->registerAssumption(CI); } -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { +static void removeNonLoadStoreUsers(AllocaInst *AI) { + SmallVector Roots; + SmallSet Visited; + // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. + Roots.push_back(AI); + while (Roots.size()) { + Instruction *I = Roots.pop_back_val(); + Visited.insert(I); + for (auto UI = I->use_begin(), UE = I->use_end(); UI != UE;) { + Instruction *U = cast(UI->getUser()); + unsigned ArgNo = + isa(U) ? cast(U)->getArgOperandNo(&*UI) : -1; + ++UI; + if (isa(U) || isa(U)) + continue; + else if (isa(U) || isa(U)) + Roots.push_back(U); + else if (CallBase *CB = dyn_cast(U)) { + if (isa(U)) + U->eraseFromParent(); + else { + // Replace this operand with an UndefValue because it is readnone. + CB->setArgOperand(ArgNo, UndefValue::get(I->getType())); + } + } else + U->eraseFromParent(); + } + } - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast(*UI); - ++UI; - if (isa(I) || isa(I)) + // Remove all non-leaf uses except the AllocaInst itself + for (Instruction *I : Visited) { + if (I == AI) continue; - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast(*UUI); - ++UUI; - Inst->eraseFromParent(); - } - } + I->dropAllReferences(); I->eraseFromParent(); } } @@ -544,7 +579,7 @@ assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + removeNonLoadStoreUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. diff --git a/llvm/test/Transforms/Mem2Reg/fnattr.ll b/llvm/test/Transforms/Mem2Reg/fnattr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Mem2Reg/fnattr.ll @@ -0,0 +1,41 @@ +; RUN: opt -S < %s -mem2reg | FileCheck %s + +declare void @foo1a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo1b(i8* nocapture, i8) local_unnamed_addr readnone +declare void @foo1c(i8* nocapture, i8) local_unnamed_addr inaccessiblememonly + +; Doesn't read from pointer argument +define i32 @a() norecurse { +; CHECK-LABEL: @a +; CHECK-NOT: alloca +; CHECK-NOT: %g1 +; CHECK: undef +; CHECK: ret i32 42 +; CHECK: } + %g1 = alloca i32 + store i32 42, i32 *%g1 + %p = bitcast i32* %g1 to i8* + call void @foo1a(i8* %p, i8 0) + call void @foo1b(i8* %p, i8 0) + call void @foo1c(i8* %p, i8 0) + %a = load i32, i32* %g1 + ret i32 %a +} + +declare void @foo2a(i8*, i8) local_unnamed_addr readnone +declare void @foo2b(i8* nocapture, i8) local_unnamed_addr + +; May-capture and may-read not supported +define i32 @b() norecurse { +; CHECK-LABEL: @b +; CHECK: %g2 = alloca +; CHECK: load +; CHECK: } + %g2 = alloca i32 + store i32 42, i32 *%g2 + %p = bitcast i32* %g2 to i8* + call void @foo2a(i8* %p, i8 0) + call void @foo2b(i8* %p, i8 0) + %b = load i32, i32* %g2 + ret i32 %b +}