diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -68,6 +68,7 @@ #include "llvm/Transforms/Utils/Local.h" #include #include +#include #include #include @@ -1806,32 +1807,45 @@ // We don't do an exhaustive search for memory operations - simply look // through bitcasts as they're quite common and benign. const DataLayout &DL = GV->getParent()->getDataLayout(); - SmallVector Loads; - SmallVector Stores; - for (auto *U : GV->users()) { - if (Operator::getOpcode(U) == Instruction::BitCast) { - for (auto *UU : U->users()) { - if (auto *LI = dyn_cast(UU)) - Loads.push_back(LI); - else if (auto *SI = dyn_cast(UU)) - Stores.push_back(SI); - else + SmallVector, 4> Loads; + SmallVector Stores; + SmallVector Roots; + + Roots.push_back(GV); + while (Roots.size()) { + const Value *R = Roots.pop_back_val(); + for (auto &U : R->uses()) { + User *UU = U.getUser(); + if (Operator::getOpcode(UU) == Instruction::BitCast) + Roots.push_back(UU); + else if (auto *LI = dyn_cast(UU)) + Loads.push_back(std::make_pair<>(LI, LI->getType())); + else if (auto *SI = dyn_cast(UU)) + Stores.push_back(SI); + else if (auto *CB = dyn_cast(UU)) { + // TODO: Handle isDroppable() case + if (!CB->isArgOperand(&U)) return false; - } - continue; + unsigned ArgNo = CB->getArgOperandNo(&U); + // Argument must not be captured for subsequent use + if (!CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return false; + // Depending on attributes, either treat calls as load at the + // call site, or ignore them if they are not going to be dereferenced. + if (CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone)) + continue; + else if (CB->hasFnAttr(Attribute::ReadOnly) || + CB->paramHasAttr(ArgNo, Attribute::ReadOnly)) { + // Assume that in the worst case, the entire type is accessed + Loads.push_back({CB, GV->getType()->getPointerElementType()}); + } else { + return false; + } + } else + return false; } - - Instruction *I = dyn_cast(U); - if (!I) - return false; - assert(I->getParent()->getParent() == F); - - if (auto *LI = dyn_cast(I)) - Loads.push_back(LI); - else if (auto *SI = dyn_cast(I)) - Stores.push_back(SI); - else - return false; } // We have identified all uses of GV into loads and stores. Now check if all @@ -1853,8 +1867,10 @@ if (Loads.size() * Stores.size() > Threshold) return false; - for (auto *L : Loads) { - auto *LTy = L->getType(); + for (auto &LP : Loads) { + Type *LTy; + const Instruction *L; + std::tie(L, LTy) = LP; if (none_of(Stores, [&](const StoreInst *S) { auto *STy = S->getValueOperand()->getType(); // The load is only dominated by the store if DomTree says so diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -164,10 +164,27 @@ if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::Stored; - } else if (auto C = ImmutableCallSite(I)) { - if (!C.isCallee(&U)) + } else if (const CallBase *CB = dyn_cast(I)) { + if (CB->isCallee(&U)) { + GS.IsLoaded = true; + } else if (CB->isArgOperand(&U)) { + unsigned ArgNo = CB->getArgOperandNo(&U); + // Argument must not be captured for subsequent use + if (!CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return true; + // Depending on attributes, treat the operand as a pure call or load + // at the call site. + if (CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone)) + continue; + else if (CB->hasFnAttr(Attribute::ReadOnly) || + CB->paramHasAttr(ArgNo, Attribute::ReadOnly)) + GS.IsLoaded = true; + else + return true; + } else return true; - GS.IsLoaded = true; } else { return true; // Any other non-load instruction might take address! } diff --git a/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll b/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll @@ -0,0 +1,80 @@ +; RUN: opt -S < %s -globalopt | FileCheck %s + +declare void @foo1a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo1b(i8* nocapture, i8) local_unnamed_addr readnone +declare void @foo1c(i8* nocapture, i8) local_unnamed_addr inaccessiblememonly + +@G1 = internal global i32 0 + +; Doesn't read from pointer argument +define i32 @a() norecurse { +; CHECK-LABEL: @a +; CHECK: alloca +; CHECK-NOT: @G1 +; CHECK: } + store i32 42, i32 *@G1 + %p = bitcast i32* @G1 to i8* + call void @foo1a(i8* %p, i8 0) + call void @foo1b(i8* %p, i8 0) + call void @foo1c(i8* %p, i8 0) + %a = load i32, i32* @G1 + ret i32 %a +} + +declare void @foo2a(i8* readonly nocapture, i8) local_unnamed_addr +declare void @foo2b(i8* nocapture, i8) local_unnamed_addr readonly + +@G2 = internal global i32 0 + +; Reads from pointer argument, 8-bit call/load is less than 32-bit store +define i32 @b() norecurse { +; CHECK-LABEL: @b +; CHECK: alloca +; CHECK-NOT: @G2 +; CHECK: } + store i32 42, i32 *@G2 + %p = bitcast i32* @G2 to i8* + call void @foo2a(i8* %p, i8 0) + call void @foo2b(i8* %p, i8 0) + ret i32 0 +} + +declare void @foo3a(i32* writeonly nocapture, i8) local_unnamed_addr +declare void @foo3b(i32* nocapture, i8) local_unnamed_addr writeonly +declare void @foo3c(i32* writeonly, i8) local_unnamed_addr writeonly + +@G3 = internal global i32 0 + +; May-write to pointer argument, not supported +define i32 @c() norecurse { +; CHECK-LABEL: @c +; CHECK-NOT: alloca +; CHECK: @G3 +; CHECK: } + call void @foo3a(i32* @G3, i8 0) + call void @foo3b(i32* @G3, i8 0) + call void @foo3c(i32* @G3, i8 0) + %c = load i32, i32* @G3 + ret i32 %c +} + +declare void @foo4a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo4b(i8* readnone, i8) local_unnamed_addr +declare void @llvm.assume(i1 %cond) + +@G4 = internal global i32 0 + +; Operand bundle and may-capture not supported +define i32 @d() norecurse { +; CHECK-LABEL: @d +; CHECK-NOT: alloca +; CHECK: @G4 +; CHECK: } + store i32 42, i32 *@G4 + call void @llvm.assume(i1 true) ["align"(i32 *@G4, i64 128)] + %p = bitcast i32* @G4 to i8* + call void @foo4a(i8* %p, i8 0) + call void @foo4b(i8* %p, i8 0) + %d = load i32, i32* @G4 + ret i32 %d +}