diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -68,6 +68,7 @@ #include "llvm/Transforms/Utils/Local.h" #include #include +#include #include #include @@ -1806,32 +1807,45 @@ // We don't do an exhaustive search for memory operations - simply look // through bitcasts as they're quite common and benign. const DataLayout &DL = GV->getParent()->getDataLayout(); - SmallVector Loads; - SmallVector Stores; - for (auto *U : GV->users()) { - if (Operator::getOpcode(U) == Instruction::BitCast) { - for (auto *UU : U->users()) { - if (auto *LI = dyn_cast(UU)) - Loads.push_back(LI); - else if (auto *SI = dyn_cast(UU)) - Stores.push_back(SI); - else + SmallVector, 4> Loads; + SmallVector Stores; + SmallVector Roots; + + Roots.push_back(GV); + while (Roots.size()) { + const Value *R = Roots.pop_back_val(); + for (auto &U : R->uses()) { + User *UU = U.getUser(); + if (Operator::getOpcode(UU) == Instruction::BitCast) + Roots.push_back(UU); + else if (auto *LI = dyn_cast(UU)) + Loads.push_back(std::make_pair<>(LI, LI->getType())); + else if (auto *SI = dyn_cast(UU)) + Stores.push_back(SI); + else if (auto *CB = dyn_cast(UU)) { + // TODO: Handle isDroppable() case + if (!CB->isArgOperand(&U)) return false; - } - continue; + unsigned ArgNo = CB->getArgOperandNo(&U); + // Argument must not be captured for subsequent use + if (!CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return false; + // Depending on attributes, either treat calls as load at the + // call site, or ignore them if they are not going to be dereferenced. + if (CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone)) + continue; + else if (CB->hasFnAttr(Attribute::ReadOnly) || + CB->paramHasAttr(ArgNo, Attribute::ReadOnly)) { + // Assume that in the worst case, the entire type is accessed + Loads.push_back({CB, GV->getType()->getPointerElementType()}); + } else { + return false; + } + } else + return false; } - - Instruction *I = dyn_cast(U); - if (!I) - return false; - assert(I->getParent()->getParent() == F); - - if (auto *LI = dyn_cast(I)) - Loads.push_back(LI); - else if (auto *SI = dyn_cast(I)) - Stores.push_back(SI); - else - return false; } // We have identified all uses of GV into loads and stores. Now check if all @@ -1853,8 +1867,10 @@ if (Loads.size() * Stores.size() > Threshold) return false; - for (auto *L : Loads) { - auto *LTy = L->getType(); + for (auto &LP : Loads) { + Type *LTy; + const Instruction *L; + std::tie(L, LTy) = LP; if (none_of(Stores, [&](const StoreInst *S) { auto *STy = S->getValueOperand()->getType(); // The load is only dominated by the store if DomTree says so diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -164,10 +164,27 @@ if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::Stored; - } else if (auto C = ImmutableCallSite(I)) { - if (!C.isCallee(&U)) + } else if (const CallBase *CB = dyn_cast(I)) { + if (CB->isCallee(&U)) { + GS.IsLoaded = true; + } else if (CB->isArgOperand(&U)) { + unsigned ArgNo = CB->getArgOperandNo(&U); + // Argument must not be captured for subsequent use + if (!CB->paramHasAttr(ArgNo, Attribute::NoCapture)) + return true; + // Depending on attributes, treat the operand as a pure call or load + // at the call site. + if (CB->hasFnAttr(Attribute::InaccessibleMemOnly) || + CB->hasFnAttr(Attribute::ReadNone) || + CB->paramHasAttr(ArgNo, Attribute::ReadNone)) + continue; + else if (CB->hasFnAttr(Attribute::ReadOnly) || + CB->paramHasAttr(ArgNo, Attribute::ReadOnly)) + GS.IsLoaded = true; + else + return true; + } else return true; - GS.IsLoaded = true; } else { return true; // Any other non-load instruction might take address! } diff --git a/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll b/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/localize-fnattr.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -globalopt | FileCheck %s + +declare void @foo1a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo1b(i8* nocapture, i8) local_unnamed_addr readnone +declare void @foo1c(i8* nocapture, i8) local_unnamed_addr inaccessiblememonly + +@G1 = internal global i32 0 + +; Doesn't read from pointer argument +define i32 @a() norecurse { +; CHECK-LABEL: define {{[^@]+}}@a() local_unnamed_addr +; CHECK-NEXT: [[G1:%.*]] = alloca i32 +; CHECK-NEXT: store i32 0, i32* [[G1]] +; CHECK-NEXT: store i32 42, i32* [[G1]] +; CHECK-NEXT: [[P:%.*]] = bitcast i32* [[G1]] to i8* +; CHECK-NEXT: call void @foo1a(i8* [[P]], i8 0) +; CHECK-NEXT: call void @foo1b(i8* [[P]], i8 0) +; CHECK-NEXT: call void @foo1c(i8* [[P]], i8 0) +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[G1]] +; CHECK-NEXT: ret i32 [[A]] +; + store i32 42, i32 *@G1 + %p = bitcast i32* @G1 to i8* + call void @foo1a(i8* %p, i8 0) + call void @foo1b(i8* %p, i8 0) + call void @foo1c(i8* %p, i8 0) + %a = load i32, i32* @G1 + ret i32 %a +} + +declare void @foo2a(i8* readonly nocapture, i8) local_unnamed_addr +declare void @foo2b(i8* nocapture, i8) local_unnamed_addr readonly + +@G2 = internal global i32 0 + +; Reads from pointer argument, 8-bit call/load is less than 32-bit store +define i32 @b() norecurse { +; CHECK-LABEL: define {{[^@]+}}@b() local_unnamed_addr +; CHECK-NEXT: [[G2:%.*]] = alloca i32 +; CHECK-NEXT: store i32 0, i32* [[G2]] +; CHECK-NEXT: store i32 42, i32* [[G2]] +; CHECK-NEXT: [[P:%.*]] = bitcast i32* [[G2]] to i8* +; CHECK-NEXT: call void @foo2a(i8* [[P]], i8 0) +; CHECK-NEXT: call void @foo2b(i8* [[P]], i8 0) +; CHECK-NEXT: ret i32 0 +; + store i32 42, i32 *@G2 + %p = bitcast i32* @G2 to i8* + call void @foo2a(i8* %p, i8 0) + call void @foo2b(i8* %p, i8 0) + ret i32 0 +} + +declare void @foo3a(i32* writeonly nocapture, i8) local_unnamed_addr +declare void @foo3b(i32* nocapture, i8) local_unnamed_addr writeonly +declare void @foo3c(i32* writeonly, i8) local_unnamed_addr writeonly + +@G3 = internal global i32 0 + +; May-write to pointer argument, not supported +define i32 @c() norecurse { +; CHECK-LABEL: define {{[^@]+}}@c() local_unnamed_addr +; CHECK-NEXT: call void @foo3a(i32* @G3, i8 0) +; CHECK-NEXT: call void @foo3b(i32* @G3, i8 0) +; CHECK-NEXT: call void @foo3c(i32* @G3, i8 0) +; CHECK-NEXT: [[C:%.*]] = load i32, i32* @G3 +; CHECK-NEXT: ret i32 [[C]] +; + call void @foo3a(i32* @G3, i8 0) + call void @foo3b(i32* @G3, i8 0) + call void @foo3c(i32* @G3, i8 0) + %c = load i32, i32* @G3 + ret i32 %c +} + +declare void @foo4a(i8* readnone nocapture, i8) local_unnamed_addr +declare void @foo4b(i8* readnone, i8) local_unnamed_addr +declare void @llvm.assume(i1 %cond) + +@G4 = internal global i32 0 + +; Operand bundle and may-capture not supported +define i32 @d() norecurse { +; CHECK-LABEL: define {{[^@]+}}@d() local_unnamed_addr +; CHECK-NEXT: store i32 42, i32* @G4 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* @G4, i64 128) ] +; CHECK-NEXT: [[P:%.*]] = bitcast i32* @G4 to i8* +; CHECK-NEXT: call void @foo4a(i8* [[P]], i8 0) +; CHECK-NEXT: call void @foo4b(i8* [[P]], i8 0) +; CHECK-NEXT: [[D:%.*]] = load i32, i32* @G4 +; CHECK-NEXT: ret i32 [[D]] +; + store i32 42, i32 *@G4 + call void @llvm.assume(i1 true) ["align"(i32 *@G4, i64 128)] + %p = bitcast i32* @G4 to i8* + call void @foo4a(i8* %p, i8 0) + call void @foo4b(i8* %p, i8 0) + %d = load i32, i32* @G4 + ret i32 %d +}