diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -68,6 +68,7 @@ #include "llvm/Transforms/Utils/Local.h" #include #include +#include #include #include @@ -1806,32 +1807,47 @@ // We don't do an exhaustive search for memory operations - simply look // through bitcasts as they're quite common and benign. const DataLayout &DL = GV->getParent()->getDataLayout(); - SmallVector Loads; - SmallVector Stores; - for (auto *U : GV->users()) { - if (Operator::getOpcode(U) == Instruction::BitCast) { - for (auto *UU : U->users()) { - if (auto *LI = dyn_cast(UU)) - Loads.push_back(LI); - else if (auto *SI = dyn_cast(UU)) - Stores.push_back(SI); - else + SmallVector, 4> Loads; + SmallVector, 4> Stores; + SmallVector Roots; + + Roots.push_back(GV); + while (Roots.size()) { + Value *R = Roots.pop_back_val(); + for (auto &U : R->uses()) { + User *UU = U.getUser(); + if (Operator::getOpcode(UU) == Instruction::BitCast) + Roots.push_back(UU); + else if (auto *LI = dyn_cast(UU)) + Loads.push_back(std::make_pair<>(LI, LI->getType())); + else if (auto *SI = dyn_cast(UU)) + Stores.push_back( + std::make_pair<>(SI, SI->getValueOperand()->getType())); + else if (auto *CB = dyn_cast(UU)) { + // TODO: Handle isDroppable() case + if (!CB->isArgOperand(&U)) return false; - } - continue; + unsigned ArgNo = CB->getArgOperandNo(&U); + // Argument must not be captured for subsequent use + if (!CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return false; + // Depending on attributes, either treat calls as loads/stores at the + // call site, or ignore them if they are not going to be dereferenced. + if (CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone)) + continue; + else if (CB->hasFnAttr(Attribute::ReadOnly) || + CB->paramHasAttr(ArgNo, Attribute::ReadOnly)) { + // Assume that in the worst case, the entire type is accessed + Loads.push_back({CB, U->getType()->getPointerElementType()}); + } else { + // TODO: writeonly is not yet supported + return false; + } + } else + return false; } - - Instruction *I = dyn_cast(U); - if (!I) - return false; - assert(I->getParent()->getParent() == F); - - if (auto *LI = dyn_cast(I)) - Loads.push_back(LI); - else if (auto *SI = dyn_cast(I)) - Stores.push_back(SI); - else - return false; } // We have identified all uses of GV into loads and stores. Now check if all @@ -1853,10 +1869,12 @@ if (Loads.size() * Stores.size() > Threshold) return false; - for (auto *L : Loads) { - auto *LTy = L->getType(); - if (none_of(Stores, [&](const StoreInst *S) { - auto *STy = S->getValueOperand()->getType(); + for (auto &LP : Loads) { + Type *LTy, *STy; + const Instruction *L, *S; + std::tie(L, LTy) = LP; + if (none_of(Stores, [&](auto &SP) { + std::tie(S, STy) = SP; // The load is only dominated by the store if DomTree says so // and the number of bits loaded in L is less than or equal to // the number of bits stored in S. diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -164,10 +164,30 @@ if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::Stored; - } else if (auto C = ImmutableCallSite(I)) { - if (!C.isCallee(&U)) + } else if (const CallBase *CB = dyn_cast(I)) { + if (CB->isCallee(&U)) { + GS.IsLoaded = true; + } else if (CB->isArgOperand(&U)) { + unsigned ArgNo = CB->getArgOperandNo(&U); + // Argument must not be captured for subsequent use + if (!CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return true; + // Depending on attributes, treat the operand as a pure call, load, or + // store at the call site. + if (CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone)) + continue; + else if (CB->hasFnAttr(Attribute::ReadOnly) || + CB->paramHasAttr(ArgNo, Attribute::ReadOnly)) + GS.IsLoaded = true; + else if (CB->hasFnAttr(Attribute::WriteOnly) || + CB->paramHasAttr(ArgNo, Attribute::WriteOnly)) + GS.StoredType = GlobalStatus::Stored; + else + return true; + } else return true; - GS.IsLoaded = true; } else { return true; // Any other non-load instruction might take address! } diff --git a/llvm/test/Transforms/GlobalOpt/invariant.group.ll b/llvm/test/Transforms/GlobalOpt/invariant.group.ll --- a/llvm/test/Transforms/GlobalOpt/invariant.group.ll +++ b/llvm/test/Transforms/GlobalOpt/invariant.group.ll @@ -4,7 +4,7 @@ ; FIXME: @tmp and @tmp2 can be safely set to 42 ; CHECK: @tmp = local_unnamed_addr global i32 0 ; CHECK: @tmp2 = local_unnamed_addr global i32 0 -; CHECK: @tmp3 = global i32 0 +; CHECK: @tmp3 = local_unnamed_addr global i32 0 @tmp = global i32 0 @tmp2 = global i32 0 diff --git a/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll b/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll @@ -0,0 +1,78 @@ +; RUN: opt -S < %s -globalopt | FileCheck %s + +declare void @foo1a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo1b(i8* nocapture, i8) local_unnamed_addr readnone +declare void @foo1c(i8* nocapture, i8) local_unnamed_addr inaccessiblememonly + +@G1 = internal global i32 0 + +; Doesn't read from pointer argument +define i32 @a() norecurse { +; CHECK-LABEL: @a +; CHECK: alloca +; CHECK-NOT: @G1 +; CHECK: } + store i32 42, i32 *@G1 + %p = bitcast i32* @G1 to i8* + call void @foo1a(i8* %p, i8 0) + call void @foo1b(i8* %p, i8 0) + call void @foo1c(i8* %p, i8 0) + %a = load i32, i32* @G1 + ret i32 %a +} + +declare void @foo2a(i8* readonly nocapture, i8) local_unnamed_addr +declare void @foo2b(i8* nocapture, i8) local_unnamed_addr readonly + +@G2 = internal global i32 0 + +; Reads from pointer argument, 8-bit call/load is less than 32-bit store +define i32 @b() norecurse { +; CHECK-LABEL: @b +; CHECK: alloca +; CHECK-NOT: @G2 +; CHECK: } + store i32 42, i32 *@G2 + %p = bitcast i32* @G2 to i8* + call void @foo2a(i8* %p, i8 0) + call void @foo2b(i8* %p, i8 0) + ret i32 0 +} + +declare void @foo3a(i32* writeonly nocapture, i8) local_unnamed_addr +declare void @foo3b(i32* nocapture, i8) local_unnamed_addr writeonly + +@G3 = internal global i32 0 + +; Writes to pointer argument, not supported +define i32 @c() norecurse { +; CHECK-LABEL: @c +; CHECK-NOT: alloca +; CHECK: @G3 +; CHECK: } + call void @foo3a(i32* @G3, i8 0) + call void @foo3b(i32* @G3, i8 0) + %a = load i32, i32* @G3 + ret i32 %a +} + +declare void @foo4a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo4b(i8* readnone, i8) local_unnamed_addr +declare void @llvm.assume(i1 %cond) + +@G4 = internal global i32 0 + +; Operand bundle and may-capture not supported +define i32 @d() norecurse { +; CHECK-LABEL: @a +; CHECK-NOT: alloca +; CHECK: @G4 +; CHECK: } + store i32 42, i32 *@G4 + call void @llvm.assume(i1 true) ["align"(@G4, i64 128)] + %p = bitcast i32* @G4 to i8* + call void @foo4a(i8* %p, i8 0) + call void @foo4b(i8* %p, i8 0) + %a = load i32, i32* @G4 + ret i32 %a +}