diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -44,7 +43,9 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -62,42 +63,71 @@ STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); bool llvm::isAllocaPromotable(const AllocaInst *AI) { + SmallVector Roots; // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. unsigned AS = AI->getType()->getAddressSpace(); - // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast(U)) { - // Note that atomic loads can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (LI->isVolatile()) + Roots.push_back(AI); + while (Roots.size()) { + const Instruction *I = Roots.pop_back_val(); + bool NoOptCallsOnly = !!I->getNumUses(); + // Only allow direct and non-volatile loads and stores... + for (const Use &UU : I->uses()) { + const User *U = UU.getUser(); + if (const LoadInst *LI = dyn_cast(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (LI->isVolatile()) + return false; + } else if (const StoreInst *SI = dyn_cast(U)) { + if (SI->getOperand(0) == AI) + return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; + } else if (const IntrinsicInst *II = dyn_cast(U)) { + if (!II->isLifetimeStartOrEnd()) + return false; + } else if (const BitCastInst *BCI = dyn_cast(U)) { + Roots.push_back(BCI); + } else if (const GetElementPtrInst *GEPI = + dyn_cast(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; + // TODO: Add GEPI as a root and recurse on it. Would need to add support + // for GetElementPtr in PromoteMem2Reg. + } else if (const CallBase *CB = dyn_cast(U)) { + if (!CB->isArgOperand(&UU)) + return false; + unsigned ArgNo = CB->getArgOperandNo(&UU); + // Argument must not be captured for subsequent use + if (U->getType()->isPointerTy() && + !CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return false; + // Argument that is not accessed can be promoted + if (!(CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone))) + return false; + // Argument is the original alloca, and so far is only used as operand + // in non-accessed and non-capturing calls + if (I == AI) + continue; + } else return false; - } else if (const StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI) - return false; // Don't allow a store OF the AI, only INTO the AI. - // Note that atomic stores can be transformed; atomic semantics do - // not have any meaning for a local alloca. - if (SI->isVolatile()) - return false; - } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd()) - return false; - } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; - if (!onlyUsedByLifetimeMarkers(BCI)) - return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) - return false; - if (!GEPI->hasAllZeroIndices()) - return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) - return false; - } else { - return false; + NoOptCallsOnly = false; } + + // Not promotable because it was created as a fresh alloca only used in + // non-accessed and non-capturing calls + if (NoOptCallsOnly) + return false; } return true; @@ -186,7 +216,6 @@ DenseMap InstNumbers; public: - /// This code only looks at accesses to allocas. static bool isInterestingInstruction(const Instruction *I) { return (isa(I) && isa(I->getOperand(0))) || @@ -312,27 +341,46 @@ AC->registerAssumption(CI); } -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { +static void removeNonLoadStoreUsers(AllocaInst *AI, const DataLayout &DL) { + SmallVector Roots; + ValueMap VisitedMap; + // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. - - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast(*UI); - ++UI; - if (isa(I) || isa(I)) - continue; - - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast(*UUI); - ++UUI; - Inst->eraseFromParent(); - } + Roots.push_back(AI); + while (Roots.size()) { + Instruction *V = Roots.pop_back_val(); + auto it = VisitedMap.insert({V, nullptr}).first; + for (auto UI = V->user_begin(), UE = V->user_end(); UI != UE;) { + Instruction *I = cast(*UI); + ++UI; + if (isa(I) || isa(I)) + continue; + else if (isa(I) || isa(I)) + Roots.push_back(I); + else if (isa(I) && !isa(I)) { + // Introduce a fresh alloca for this readnone operand, because it + // still needs some stack variable, but doesn't have to be this one. + if (!it->second) + it->second = new AllocaInst(V->getType()->getPointerElementType(), + DL.getAllocaAddrSpace(), nullptr, "", AI); + } else + I->eraseFromParent(); } - I->eraseFromParent(); + } + + // Replace non-leaf uses with fresh alloca, or remove it unless it is the + // original alloca + for (auto P : VisitedMap) { + if (P.first == AI) + continue; + if (!P.second) + P.first->dropAllReferences(); + else { + // Avoid updating the VisitedMap itself + P.first->replaceUsesWithIf(P.second, [](Use &U) { return true; }); + } + P.first->eraseFromParent(); } } @@ -544,7 +592,7 @@ assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + removeNonLoadStoreUsers(AI, SQ.DL); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. diff --git a/llvm/test/Transforms/Mem2Reg/fnattr.ll b/llvm/test/Transforms/Mem2Reg/fnattr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Mem2Reg/fnattr.ll @@ -0,0 +1,40 @@ +; RUN: opt -S < %s -mem2reg | FileCheck %s + +declare void @foo1a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo1b(i8* nocapture, i8) local_unnamed_addr readnone +declare void @foo1c(i8* nocapture, i8) local_unnamed_addr inaccessiblememonly + +; Doesn't read from pointer argument +define i32 @a() norecurse { +; CHECK-LABEL: @a +; CHECK: alloca +; CHECK-NOT: %g1 +; CHECK: ret i32 42 +; CHECK: } + %g1 = alloca i32 + store i32 42, i32 *%g1 + %p = bitcast i32* %g1 to i8* + call void @foo1a(i8* %p, i8 0) + call void @foo1b(i8* %p, i8 0) + call void @foo1c(i8* %p, i8 0) + %a = load i32, i32* %g1 + ret i32 %a +} + +declare void @foo2a(i8*, i8) local_unnamed_addr readnone +declare void @foo2b(i8* nocapture, i8) local_unnamed_addr + +; May-capture and may-read not supported +define i32 @b() norecurse { +; CHECK-LABEL: @b +; CHECK: %g2 = alloca +; CHECK: load +; CHECK: } + %g2 = alloca i32 + store i32 42, i32 *%g2 + %p = bitcast i32* %g2 to i8* + call void @foo2a(i8* %p, i8 0) + call void @foo2b(i8* %p, i8 0) + %b = load i32, i32* %g2 + ret i32 %b +}