diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -217,6 +217,24 @@ bool isAssumedReadNone(Attributor &A, const IRPosition &IRP, const AbstractAttribute &QueryingAA, bool &IsKnown); +/// Return true if \p ToI is potentially reachable from \p FromI. The two +/// instructions do not need to be in the same function. \p GoBackwardsCB +/// can be provided to convey domain knowledge about the "lifespan" the user is +/// interested in. By default, the callers of \p FromI are checked as well to +/// determine if \p ToI can be reached. If the query is not interested in +/// callers beyond a certain point, e.g., a GPU kernel entry or the function +/// containing an alloca, the \p GoBackwardsCB should return false. +bool isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Instruction &ToI, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB = nullptr); + +/// Same as above but it is sufficient to reach any instruction in \p ToFn. +bool isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB); + } // namespace AA /// The value passed to the line option that defines the maximal initialization @@ -4636,11 +4654,12 @@ /// If the function represented by this possition can reach \p Fn. virtual bool canReach(Attributor &A, const Function &Fn) const = 0; - /// Can \p CB reach \p Fn + /// Can \p CB reach \p Fn. virtual bool canReach(Attributor &A, CallBase &CB, const Function &Fn) const = 0; - /// Can \p Inst reach \p Fn + /// Can \p Inst reach \p Fn. + /// See also AA::isPotentiallyReachable. virtual bool instructionCanReach(Attributor &A, const Instruction &Inst, const Function &Fn, bool UseBackwards = true) const = 0; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -436,6 +436,121 @@ /* RequireReadNone */ true, IsKnown); } +static bool +isPotentiallyReachable(Attributor &A, const Instruction &FromI, + const Instruction *ToI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName() + << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) + << "]\n"); + + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(&FromI); + + while (!Worklist.empty()) { + const Instruction *CurFromI = Worklist.pop_back_val(); + if (!Visited.insert(CurFromI).second) + continue; + + const Function *FromFn = CurFromI->getFunction(); + if (FromFn == &ToFn) { + if (!ToI) + return true; + LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI + << " intraprocedurally\n"); + const auto &ReachabilityAA = A.getAAFor( + QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); + bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " " + << (Result ? "can potentially " : "cannot ") << "reach " + << *ToI << " [Intra]\n"); + if (Result) + return true; + continue; + } + + // TODO: If we can go arbitrarily backwards we will eventually reach an + // entry point that can reach ToI. Only once this takes a set of blocks + // through which we cannot go, or once we track internal functions not + // accessible from the outside, it makes sense to perform backwards analysis + // in the absence of a GoBackwardsCB. + if (!GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " + << *CurFromI << " is not checked backwards, abort\n"); + return true; + } + + // Check if the current instruction is already known to reach the ToFn. + const auto &FnReachabilityAA = A.getAAFor( + QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL); + bool Result = FnReachabilityAA.instructionCanReach( + A, *CurFromI, ToFn, /* UseBackwards */ false); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName() + << " " << (Result ? "can potentially " : "cannot ") + << "reach @" << ToFn.getName() << " [FromFn]\n"); + if (Result) + return true; + + // If we do not go backwards from the FromFn we are done here and so far we + // could not find a way to reach ToFn/ToI. + if (!GoBackwardsCB(*FromFn)) + continue; + + LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @" + << FromFn->getName() << "\n"); + + auto CheckCallSite = [&](AbstractCallSite ACS) { + CallBase *CB = ACS.getInstruction(); + if (!CB) + return false; + + if (isa(CB)) + return false; + + Instruction *Inst = CB->getNextNonDebugInstruction(); + Worklist.push_back(Inst); + return true; + }; + + bool AllCallSitesKnown; + Result = !A.checkForAllCallSites(CheckCallSite, *FromFn, + /* RequireAllCallSites */ true, + &QueryingAA, AllCallSitesKnown); + if (Result) { + LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " failed, give up\n"); + return true; + } + + LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " worklist size is: " << Worklist.size() << "\n"); + } + return false; +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Instruction &ToI, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from " + << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n"); + const Function *ToFn = ToI.getFunction(); + return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA, + GoBackwardsCB); +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB) { + return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA, + GoBackwardsCB); +} + /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -1464,9 +1579,11 @@ InvalidAA->Deps.pop_back(); AbstractAttribute *DepAA = cast(Dep.getPointer()); if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) { + LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA); Worklist.insert(DepAA); continue; } + LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA); DepAA->getState().indicatePessimisticFixpoint(); assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!"); if (!DepAA->getState().isValidState()) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1130,19 +1130,63 @@ QueryingAA, IRPosition::function(*LI.getFunction()), DepClassTy::OPTIONAL); - // Helper to determine if the instruction may reach the load. - auto IsReachableFrom = [&](const Instruction &I) { - const auto &ReachabilityAA = A.getAAFor( - QueryingAA, IRPosition::function(*I.getFunction()), - DepClassTy::OPTIONAL); - return ReachabilityAA.isAssumedReachable(A, I, LI); - }; - - const bool CanUseCFGResoning = - NoRecurseAA.isKnownNoRecurse() && CanIgnoreThreading(LI); + const bool CanUseCFGResoning = CanIgnoreThreading(LI); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - InfoCache.getAnalysisResultForFunction(Scope); + NoRecurseAA.isKnownNoRecurse() + ? InfoCache.getAnalysisResultForFunction( + Scope) + : nullptr; + + enum GPUAddressSpace : unsigned { + Generic = 0, + Global = 1, + Shared = 3, + Constant = 4, + Local = 5, + }; + + // Helper to check if a value has "kernel lifetime", that is it will not + // outlive a GPU kernel. This is true for shared, constant, and local + // globals on AMD and NVIDIA GPUs. + auto HasKernelLifetime = [&](Value *V, Module &M) { + Triple T(M.getTargetTriple()); + if (!(T.isAMDGPU() || T.isNVPTX())) + return false; + switch (V->getType()->getPointerAddressSpace()) { + case GPUAddressSpace::Shared: + case GPUAddressSpace::Constant: + case GPUAddressSpace::Local: + return true; + default: + return false; + }; + }; + + // The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query + // to determine if we should look at reachability from the callee. For + // certain pointers we know the lifetime and we do not have to step into the + // callee to determine reachability as the pointer would be dead in the + // callee. See the conditional initialization below. + std::function IsLiveInCalleeCB; + + if (auto *AI = dyn_cast(&getAssociatedValue())) { + // If the alloca containing function is not recursive the alloca + // must be dead in the callee. + const Function *AIFn = AI->getFunction(); + const auto &NoRecurseAA = A.getAAFor( + *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL); + if (NoRecurseAA.isAssumedNoRecurse()) { + IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; }; + } + } else if (auto *GV = dyn_cast(&getAssociatedValue())) { + // If the global has kernel lifetime we can stop if we reach a kernel + // as it is "dead" in the (unknown) callees. + if (HasKernelLifetime(GV, *GV->getParent())) + IsLiveInCalleeCB = [](const Function &Fn) { + return !Fn.hasFnAttribute("kernel"); + }; + } auto AccessCB = [&](const Access &Acc, bool Exact) { if (!Acc.isWrite()) @@ -1151,7 +1195,8 @@ // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. if (CanUseCFGResoning) { - if (!IsReachableFrom(*Acc.getLocalInst())) + if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA, + IsLiveInCalleeCB)) return true; if (DT && Exact && (Acc.getLocalInst()->getFunction() == LI.getFunction()) && @@ -9674,7 +9719,7 @@ if (!Reachability.isAssumedReachable(A, Inst, CBInst)) return true; - const auto &CB = cast(CBInst); + auto &CB = cast(CBInst); const AACallEdges &AAEdges = A.getAAFor( *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); @@ -9684,47 +9729,8 @@ bool UsedAssumedInformation = false; return A.checkForAllCallLikeInstructions(CheckCallBase, *this, - UsedAssumedInformation); - } - - ChangeStatus checkReachableBackwards(Attributor &A, QuerySet &Set) { - ChangeStatus Change = ChangeStatus::UNCHANGED; - - // For all remaining instruction queries, check - // callers. A call inside that function might satisfy the query. - auto CheckCallSite = [&](AbstractCallSite CallSite) { - CallBase *CB = CallSite.getInstruction(); - if (!CB) - return false; - - if (isa(CB)) - return false; - - Instruction *Inst = CB->getNextNonDebugInstruction(); - const AAFunctionReachability &AA = A.getAAFor( - *this, IRPosition::function(*Inst->getFunction()), - DepClassTy::REQUIRED); - for (const Function *Fn : make_early_inc_range(Set.Unreachable)) { - if (AA.instructionCanReach(A, *Inst, *Fn, /* UseBackwards */ false)) { - Set.markReachable(*Fn); - Change = ChangeStatus::CHANGED; - } - } - return true; - }; - - bool NoUnknownCall = true; - if (A.checkForAllCallSites(CheckCallSite, *this, true, NoUnknownCall)) - return Change; - - // If we don't know all callsites we have to assume that we can reach fn. - for (auto &QSet : InstQueriesBackwards) { - if (!QSet.second.CanReachUnknownCallee) - Change = ChangeStatus::CHANGED; - QSet.second.CanReachUnknownCallee = true; - } - - return Change; + UsedAssumedInformation, + /* CheckBBLivenessOnly */ true); } public: @@ -9776,12 +9782,15 @@ if (!isValidState()) return true; - const auto &Reachability = &A.getAAFor( + if (UseBackwards) + return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr); + + const auto &Reachability = A.getAAFor( *this, IRPosition::function(*getAssociatedFunction()), DepClassTy::REQUIRED); SmallVector CallEdges; - bool AllKnown = getReachableCallEdges(A, *Reachability, Inst, CallEdges); + bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges); // Attributor returns attributes as const, so this function has to be // const for users of this attribute to use it without having to do // a const_cast. @@ -9791,25 +9800,7 @@ if (!AllKnown) InstQSet.CanReachUnknownCallee = true; - bool ForwardsResult = InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); - if (ForwardsResult) - return true; - // We are done. - if (!UseBackwards) - return false; - - QuerySet &InstBackwardsQSet = NonConstThis->InstQueriesBackwards[&Inst]; - - Optional BackwardsCached = InstBackwardsQSet.isCachedReachable(Fn); - if (BackwardsCached.hasValue()) - return BackwardsCached.getValue(); - - // Assume unreachable, to prevent problems. - InstBackwardsQSet.Unreachable.insert(&Fn); - - // Check backwards reachability. - NonConstThis->checkReachableBackwards(A, InstBackwardsQSet); - return InstBackwardsQSet.isCachedReachable(Fn).getValue(); + return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); } /// See AbstractAttribute::updateImpl(...). @@ -9847,10 +9838,6 @@ Change |= InstPair.second.update(A, *this, CallEdges); } - // Update backwards queries. - for (auto &QueryPair : InstQueriesBackwards) - Change |= checkReachableBackwards(A, QueryPair.second); - return Change; } @@ -9879,9 +9866,6 @@ /// This is for instruction queries than scan "forward". DenseMap InstQueries; - - /// This is for instruction queries than scan "backward". - DenseMap InstQueriesBackwards; }; /// ---------------------- Assumption Propagation ------------------------------ diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; Test that we only promote arguments when the caller/callee have compatible diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -76,7 +76,7 @@ ; CHECK-LABEL: define {{[^@]+}}@callercaller ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: ret i32 3 +; CHECK-NEXT: ret i32 undef ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -35,7 +35,7 @@ ; CHECK-LABEL: define {{[^@]+}}@callercaller ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: ret i32 3 +; CHECK-NEXT: ret i32 undef ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -296,7 +296,7 @@ ; IS__CGSCC____-NEXT: store i32 [[TMP1]], i32* [[W0]], align 4 ; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] ; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR5:[0-9]+]] +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR4]] ; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: return: @@ -339,8 +339,8 @@ ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_source_ret2_nrw ; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR2:[0-9]+]] { ; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR5]] -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR5:[0-9]+]] ; IS__CGSCC____-NEXT: ret i32* [[CALL1]] ; entry: @@ -362,6 +362,6 @@ ; IS__CGSCC____: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } ; IS__CGSCC____: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind } ; IS__CGSCC____: attributes #[[ATTR3]] = { nofree nosync nounwind } -; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind } -; IS__CGSCC____: attributes #[[ATTR5]] = { nounwind willreturn } +; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind willreturn } +; IS__CGSCC____: attributes #[[ATTR5]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -18,12 +18,19 @@ ; CHECK: @[[UNREACHABLENONKERNEL:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 0, align 4 ;. define dso_local void @kernel(i32 %C) norecurse "kernel" { -; CHECK: Function Attrs: norecurse nosync nounwind -; CHECK-LABEL: define {{[^@]+}}@kernel -; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @level1Kernel(i32 [[C]]) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@kernel +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: call void @level1Kernel(i32 [[C]]) #[[ATTR3:[0-9]+]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: norecurse nosync nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@kernel +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: call void @level1Kernel(i32 [[C]]) #[[ATTR4:[0-9]+]] +; IS__CGSCC____-NEXT: ret void ; entry: call void @level1Kernel(i32 %C) @@ -31,22 +38,39 @@ } define internal void @level1Kernel(i32 %C) { -; CHECK: Function Attrs: norecurse nosync nounwind -; CHECK-LABEL: define {{[^@]+}}@level1Kernel -; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @level2Kernelall_early() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: call void @level2Kernela() #[[ATTR3]] -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: call void @level2Kernelb() #[[ATTR3]] -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: call void @level2Kernelall_late() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level1Kernel +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: call void @level2Kernelall_early() #[[ATTR4:[0-9]+]] +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: call void @level2Kernela() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END:%.*]] +; IS__TUNIT____: if.else: +; IS__TUNIT____-NEXT: call void @level2Kernelb() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: call void @level2Kernelall_late() #[[ATTR5:[0-9]+]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: norecurse nosync nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@level1Kernel +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: call void @level2Kernelall_early() #[[ATTR5:[0-9]+]] +; IS__CGSCC____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__CGSCC____-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__CGSCC____: if.then: +; IS__CGSCC____-NEXT: call void @level2Kernela() #[[ATTR4]] +; IS__CGSCC____-NEXT: br label [[IF_END:%.*]] +; IS__CGSCC____: if.else: +; IS__CGSCC____-NEXT: call void @level2Kernelb() #[[ATTR4]] +; IS__CGSCC____-NEXT: br label [[IF_END]] +; IS__CGSCC____: if.end: +; IS__CGSCC____-NEXT: call void @level2Kernelall_late() #[[ATTR6:[0-9]+]] +; IS__CGSCC____-NEXT: ret void ; entry: call void @level2Kernelall_early() @@ -88,8 +112,7 @@ ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; IS__TUNIT____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6:[0-9]+]] +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: norecurse nosync nounwind @@ -99,7 +122,7 @@ ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -117,8 +140,7 @@ ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; IS__TUNIT____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6]] +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: norecurse nosync nounwind @@ -128,7 +150,7 @@ ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -140,12 +162,17 @@ } define internal void @level2Kernelall_late() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_late -; CHECK-SAME: () #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2Kernelall_late +; IS__TUNIT____-SAME: () #[[ATTR2]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@level2Kernelall_late +; IS__CGSCC____-SAME: () #[[ATTR3:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: ret void ; entry: store i32 1, i32 *addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 @@ -156,12 +183,19 @@ @UnreachableNonKernel = internal addrspace(3) global i32 0, align 4 define dso_local void @non_kernel(i32 %C) norecurse { -; CHECK: Function Attrs: norecurse nosync nounwind -; CHECK-LABEL: define {{[^@]+}}@non_kernel -; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @level1(i32 [[C]]) #[[ATTR3]] -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@non_kernel +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: call void @level1(i32 [[C]]) #[[ATTR3]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: norecurse nosync nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@non_kernel +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: call void @level1(i32 [[C]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: ret void ; entry: call void @level1(i32 %C) @@ -169,60 +203,58 @@ } define internal void @level1(i32 %C) { -; IS________OPM: Function Attrs: norecurse nosync nounwind -; IS________OPM-LABEL: define {{[^@]+}}@level1 -; IS________OPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 -; IS________OPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] -; IS________OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 -; IS________OPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; IS________OPM: if.then: -; IS________OPM-NEXT: call void @level2a(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] -; IS________OPM-NEXT: br label [[IF_END:%.*]] -; IS________OPM: if.else: -; IS________OPM-NEXT: call void @level2b(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] -; IS________OPM-NEXT: br label [[IF_END]] -; IS________OPM: if.end: -; IS________OPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] -; IS________OPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level1 +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; IS__TUNIT____-NEXT: call void @level2all_early(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: call void @level2a() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END:%.*]] +; IS__TUNIT____: if.else: +; IS__TUNIT____-NEXT: call void @level2b() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] +; IS__TUNIT____-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@level1 -; IS__TUNIT_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] -; IS__TUNIT_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 -; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; IS__TUNIT_NPM: if.then: -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOCAL]], align 4 -; IS__TUNIT_NPM-NEXT: call void @level2a(i32 [[TMP0]]) #[[ATTR3]] -; IS__TUNIT_NPM-NEXT: br label [[IF_END:%.*]] -; IS__TUNIT_NPM: if.else: -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOCAL]], align 4 -; IS__TUNIT_NPM-NEXT: call void @level2b(i32 [[TMP1]]) #[[ATTR3]] -; IS__TUNIT_NPM-NEXT: br label [[IF_END]] -; IS__TUNIT_NPM: if.end: -; IS__TUNIT_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] -; IS__TUNIT_NPM-NEXT: ret void +; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level1 +; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; IS__CGSCC_OPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR5]] +; IS__CGSCC_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: call void @level2a(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR4]] +; IS__CGSCC_OPM-NEXT: br label [[IF_END:%.*]] +; IS__CGSCC_OPM: if.else: +; IS__CGSCC_OPM-NEXT: call void @level2b(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR4]] +; IS__CGSCC_OPM-NEXT: br label [[IF_END]] +; IS__CGSCC_OPM: if.end: +; IS__CGSCC_OPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR7:[0-9]+]] +; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@level1 ; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR4]] +; IS__CGSCC_NPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR5]] ; IS__CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 ; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; IS__CGSCC_NPM: if.then: -; IS__CGSCC_NPM-NEXT: call void @level2a(i32 undef) #[[ATTR3]] +; IS__CGSCC_NPM-NEXT: call void @level2a(i32 undef) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: br label [[IF_END:%.*]] ; IS__CGSCC_NPM: if.else: -; IS__CGSCC_NPM-NEXT: call void @level2b(i32 undef) #[[ATTR6:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @level2b(i32 undef) #[[ATTR7:[0-9]+]] ; IS__CGSCC_NPM-NEXT: br label [[IF_END]] ; IS__CGSCC_NPM: if.end: -; IS__CGSCC_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR7:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR8:[0-9]+]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -245,20 +277,19 @@ } define internal void @level2all_early(i32* %addr) { -; NOT_CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_early -; NOT_CGSCC_NPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; NOT_CGSCC_NPM-NEXT: store i32 17, i32* [[ADDR]], align 4 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2all_early +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: ret void ; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_early -; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@level2all_early +; IS__CGSCC____-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__CGSCC____-NEXT: ret void ; entry: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 @@ -267,36 +298,23 @@ } define internal void @level2a(i32* %addr) { -; IS__TUNIT_OPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@level2a -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6]] -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@level2a -; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]]) #[[ATTR6]] -; IS__TUNIT_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2a +; IS__TUNIT____-SAME: () #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level2a -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4 +; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind @@ -307,7 +325,7 @@ ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR3]] +; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -319,36 +337,23 @@ } define internal void @level2b(i32* %addr) { -; IS__TUNIT_OPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@level2b -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6]] -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@level2b -; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]]) #[[ATTR6]] -; IS__TUNIT_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2b +; IS__TUNIT____-SAME: () #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level2b -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4 +; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind @@ -359,7 +364,7 @@ ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR3]] +; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -371,20 +376,19 @@ } define internal void @level2all_late(i32* %addr) { -; NOT_CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_late -; NOT_CGSCC_NPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; NOT_CGSCC_NPM-NEXT: store i32 5, i32* [[ADDR]], align 4 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2all_late +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: ret void ; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_late -; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@level2all_late +; IS__CGSCC____-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__CGSCC____-NEXT: ret void ; entry: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 @@ -406,16 +410,19 @@ ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { norecurse nosync nounwind } ; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nounwind } -; IS__CGSCC_OPM: attributes #[[ATTR4]] = { nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR5]] = { nounwind writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readnone willreturn } +; IS__CGSCC_OPM: attributes #[[ATTR4]] = { nounwind } +; IS__CGSCC_OPM: attributes #[[ATTR5]] = { nounwind willreturn writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR6]] = { nounwind readnone } +; IS__CGSCC_OPM: attributes #[[ATTR7]] = { nounwind writeonly } ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { norecurse nosync nounwind } ; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nounwind } -; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nounwind willreturn writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR5]] = { nounwind writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nosync nounwind } -; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nosync nounwind writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readnone willreturn } +; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nounwind } +; IS__CGSCC_NPM: attributes #[[ATTR5]] = { nounwind willreturn writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nounwind readnone } +; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nosync nounwind } +; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nosync nounwind writeonly } ;.