diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -110,6 +110,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" @@ -376,6 +377,12 @@ bool isAssumedThreadLocalObject(Attributor &A, Value &Obj, const AbstractAttribute &QueryingAA); +/// Return true if \p I is potentially affected by a barrier. +bool isPotentiallyAffectedByBarrier(Attributor &A, const Instruction &I, + const AbstractAttribute &QueryingAA); +bool isPotentiallyAffectedByBarrier(Attributor &A, ArrayRef Ptrs, + const AbstractAttribute &QueryingAA, + const Instruction *CtxI); } // namespace AA template <> @@ -1921,7 +1928,8 @@ bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA, const AAIsDead *LivenessAA, bool &UsedAssumedInformation, bool CheckBBLivenessOnly = false, - DepClassTy DepClass = DepClassTy::OPTIONAL); + DepClassTy DepClass = DepClassTy::OPTIONAL, + bool CheckForDeadStore = false); /// Return true if \p U is assumed dead. /// @@ -3324,6 +3332,10 @@ /// Helper function specific for intrinsics which are potentially volatile. static bool isNoSyncIntrinsic(const Instruction *I); + /// Helper function to determine if \p CB is an aligned (GPU) barrier. + /// Aligned barriers have to be executed by all threads. + static bool isAlignedBarrier(const CallBase &CB); + /// Create an abstract attribute view for the position \p IRP. static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A); @@ -3618,9 +3630,6 @@ /// Returns true if the underlying value is known dead. virtual bool isKnownDead() const = 0; - /// Returns true if \p BB is assumed dead. - virtual bool isAssumedDead(const BasicBlock *BB) const = 0; - /// Returns true if \p BB is known dead. virtual bool isKnownDead(const BasicBlock *BB) const = 0; @@ -3659,6 +3668,9 @@ return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); } + /// Returns true if \p BB is assumed dead. + virtual bool isAssumedDead(const BasicBlock *BB) const = 0; + /// Return if the edge from \p From BB to \p To BB is assumed dead. /// This is specifically useful in AAReachability. virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const { @@ -4988,6 +5000,32 @@ using Base = StateWrapper; AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + /// Summary about the execution domain of a block or instruction. + struct ExecutionDomainTy { + using BarriersSetTy = SmallPtrSet; + using AssumesSetTy = SmallPtrSet; + + void addAssumeInst(Attributor &A, AssumeInst &AI) { + EncounteredAssumes.insert(&AI); + } + + void addAlignedBarrier(Attributor &A, CallBase &CB) { + AlignedBarriers.insert(&CB); + } + + void clearAssumeInstAndAlignedBarriers() { + EncounteredAssumes.clear(); + AlignedBarriers.clear(); + } + + bool IsExecutedByInitialThreadOnly = true; + bool IsReachedFromAlignedBarrierOnly = true; + bool IsReachingAlignedBarrierOnly = true; + bool EncounteredNonLocalSideEffect = false; + BarriersSetTy AlignedBarriers; + AssumesSetTy EncounteredAssumes; + }; + /// Create an abstract attribute view for the position \p IRP. static AAExecutionDomain &createForPosition(const IRPosition &IRP, Attributor &A); @@ -4999,11 +5037,17 @@ const char *getIdAddr() const override { return &ID; } /// Check if an instruction is executed only by the initial thread. - virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0; + bool isExecutedByInitialThreadOnly(const Instruction &I) const { + return isExecutedByInitialThreadOnly(*I.getParent()); + } /// Check if a basic block is executed only by the initial thread. virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0; + virtual ExecutionDomainTy getExecutionDomain(const BasicBlock &) const = 0; + virtual ExecutionDomainTy getExecutionDomain(const CallBase &) const = 0; + virtual ExecutionDomainTy getFunctionExecutionDomain() const = 0; + /// This function should return true if the type of the \p AA is /// AAExecutionDomain. static bool classof(const AbstractAttribute *AA) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -784,6 +784,61 @@ return false; } +bool AA::isPotentiallyAffectedByBarrier(Attributor &A, const Instruction &I, + const AbstractAttribute &QueryingAA) { + if (!I.mayHaveSideEffects() && !I.mayReadFromMemory()) + return false; + + SmallSetVector Ptrs; + + auto AddLocationPtr = [&](std::optional Loc) { + if (!Loc || !Loc->Ptr) { + LLVM_DEBUG( + dbgs() << "[AA] Access to unknown location; -> requires barriers\n"); + return false; + } + Ptrs.insert(Loc->Ptr); + return true; + }; + + if (const MemIntrinsic *MI = dyn_cast(&I)) { + if (!AddLocationPtr(MemoryLocation::getForDest(MI))) + return true; + if (const MemTransferInst *MTI = dyn_cast(&I)) + if (!AddLocationPtr(MemoryLocation::getForSource(MTI))) + return true; + } else if (!AddLocationPtr(MemoryLocation::getOrNone(&I))) + return true; + + return isPotentiallyAffectedByBarrier(A, Ptrs.getArrayRef(), QueryingAA, &I); +} + +bool AA::isPotentiallyAffectedByBarrier(Attributor &A, + ArrayRef Ptrs, + const AbstractAttribute &QueryingAA, + const Instruction *CtxI) { + for (const Value *Ptr : Ptrs) { + if (!Ptr) { + LLVM_DEBUG(dbgs() << "[AA] nullptr; -> requires barriers\n"); + return true; + } + + auto Pred = [&](Value &Obj) { + if (AA::isAssumedThreadLocalObject(A, Obj, QueryingAA)) + return true; + LLVM_DEBUG(dbgs() << "[AA] Access to '" << Obj << "' via '" << *Ptr + << "'; -> requires barrier\n"); + return false; + }; + + const auto &UnderlyingObjsAA = A.getAAFor( + QueryingAA, IRPosition::value(*Ptr), DepClassTy::OPTIONAL); + if (!UnderlyingObjsAA.forallUnderlyingObjects(Pred)) + return true; + } + return false; +} + /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -1349,7 +1404,8 @@ const AbstractAttribute *QueryingAA, const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation, - bool CheckBBLivenessOnly, DepClassTy DepClass) { + bool CheckBBLivenessOnly, DepClassTy DepClass, + bool CheckForDeadStore) { const IRPosition::CallBaseContext *CBCtx = QueryingAA ? QueryingAA->getCallBaseContext() : nullptr; @@ -1394,6 +1450,14 @@ return true; } + if (CheckForDeadStore && isa(I) && IsDeadAA.isRemovableStore()) { + if (QueryingAA) + recordDependence(IsDeadAA, *QueryingAA, DepClass); + if (!IsDeadAA.isKnownDead()) + UsedAssumedInformation = true; + return true; + } + return false; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -50,6 +50,8 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" @@ -2224,6 +2226,20 @@ /// ------------------------ NoSync Function Attribute ------------------------- +bool AANoSync::isAlignedBarrier(const CallBase &CB) { + switch (CB.getIntrinsicID()) { + case Intrinsic::nvvm_barrier0: + case Intrinsic::nvvm_barrier0_and: + case Intrinsic::nvvm_barrier0_or: + case Intrinsic::nvvm_barrier0_popc: + return true; + // TODO: Check for amdgcn_s_barrier executed in a uniform/aligned way. + default: + break; + } + return hasAssumption(CB, KnownAssumptionString("ompx_aligned_barrier")); +} + bool AANoSync::isNonRelaxedAtomic(const Instruction *I) { if (!I->isAtomic()) return false; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" @@ -32,6 +33,7 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/GlobalValue.h" @@ -51,6 +53,7 @@ #include #include +#include using namespace llvm; using namespace omp; @@ -827,8 +830,6 @@ if (remarksEnabled()) analysisGlobalization(); - - Changed |= eliminateBarriers(); } else { if (PrintICVValues) printICVs(); @@ -851,8 +852,6 @@ Changed = true; } } - - Changed |= eliminateBarriers(); } return Changed; @@ -1418,223 +1417,6 @@ return Changed; } - /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels. - /// TODO: Make this an AA and expand it to work across blocks and functions. - bool eliminateBarriers() { - bool Changed = false; - - if (DisableOpenMPOptBarrierElimination) - return /*Changed=*/false; - - if (OMPInfoCache.Kernels.empty()) - return /*Changed=*/false; - - enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT }; - - class BarrierInfo { - Instruction *I; - enum ImplicitBarrierType Type; - - public: - BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {} - BarrierInfo(Instruction &I) : I(&I) {} - - bool isImplicit() { return !I; } - - bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; } - - bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; } - - Instruction *getInstruction() { return I; } - }; - - for (Function *Kernel : OMPInfoCache.Kernels) { - for (BasicBlock &BB : *Kernel) { - SmallVector BarriersInBlock; - SmallPtrSet BarriersToBeDeleted; - - // Add the kernel entry implicit barrier. - if (&Kernel->getEntryBlock() == &BB) - BarriersInBlock.push_back(IBT_ENTRY); - - // Find implicit and explicit aligned barriers in the same basic block. - for (Instruction &I : BB) { - if (isa(I)) { - // Add the implicit barrier when exiting the kernel. - BarriersInBlock.push_back(IBT_EXIT); - continue; - } - CallBase *CB = dyn_cast(&I); - if (!CB) - continue; - - auto IsAlignBarrierCB = [&](CallBase &CB) { - switch (CB.getIntrinsicID()) { - case Intrinsic::nvvm_barrier0: - case Intrinsic::nvvm_barrier0_and: - case Intrinsic::nvvm_barrier0_or: - case Intrinsic::nvvm_barrier0_popc: - return true; - default: - break; - } - return hasAssumption(CB, - KnownAssumptionString("ompx_aligned_barrier")); - }; - - if (IsAlignBarrierCB(*CB)) { - // Add an explicit aligned barrier. - BarriersInBlock.push_back(I); - } - } - - if (BarriersInBlock.size() <= 1) - continue; - - // A barrier in a barrier pair is removeable if all instructions - // between the barriers in the pair are side-effect free modulo the - // barrier operation. - auto IsBarrierRemoveable = [&Kernel]( - BarrierInfo *StartBI, BarrierInfo *EndBI, - SmallVector &Assumptions) { - assert( - !StartBI->isImplicitExit() && - "Expected start barrier to be other than a kernel exit barrier"); - assert( - !EndBI->isImplicitEntry() && - "Expected end barrier to be other than a kernel entry barrier"); - // If StarBI instructions is null then this the implicit - // kernel entry barrier, so iterate from the first instruction in the - // entry block. - Instruction *I = (StartBI->isImplicitEntry()) - ? &Kernel->getEntryBlock().front() - : StartBI->getInstruction()->getNextNode(); - assert(I && "Expected non-null start instruction"); - Instruction *E = (EndBI->isImplicitExit()) - ? I->getParent()->getTerminator() - : EndBI->getInstruction(); - assert(E && "Expected non-null end instruction"); - - for (; I != E; I = I->getNextNode()) { - if (!I->mayHaveSideEffects() && !I->mayReadFromMemory()) - continue; - - auto IsPotentiallyAffectedByBarrier = - [](std::optional Loc) { - const Value *Obj = (Loc && Loc->Ptr) - ? getUnderlyingObject(Loc->Ptr) - : nullptr; - if (!Obj) { - LLVM_DEBUG( - dbgs() - << "Access to unknown location requires barriers\n"); - return true; - } - if (isa(Obj)) - return false; - if (isa(Obj)) - return false; - if (auto *GV = dyn_cast(Obj)) { - if (GV->isConstant()) - return false; - if (GV->isThreadLocal()) - return false; - if (GV->getAddressSpace() == (int)AddressSpace::Local) - return false; - if (GV->getAddressSpace() == (int)AddressSpace::Constant) - return false; - } - LLVM_DEBUG(dbgs() << "Access to '" << *Obj - << "' requires barriers\n"); - return true; - }; - - if (MemIntrinsic *MI = dyn_cast(I)) { - std::optional Loc = - MemoryLocation::getForDest(MI); - if (IsPotentiallyAffectedByBarrier(Loc)) - return false; - if (MemTransferInst *MTI = dyn_cast(I)) { - std::optional Loc = - MemoryLocation::getForSource(MTI); - if (IsPotentiallyAffectedByBarrier(Loc)) - return false; - } - continue; - } - - if (auto *AI = dyn_cast(I)) { - Assumptions.push_back(AI); - continue; - } - - if (auto *LI = dyn_cast(I)) - if (LI->hasMetadata(LLVMContext::MD_invariant_load)) - continue; - - std::optional Loc = MemoryLocation::getOrNone(I); - if (IsPotentiallyAffectedByBarrier(Loc)) - return false; - } - - return true; - }; - - // Iterate barrier pairs and remove an explicit barrier if analysis - // deems it removeable. - for (auto *It = BarriersInBlock.begin(), - *End = BarriersInBlock.end() - 1; - It != End; ++It) { - - BarrierInfo *StartBI = It; - BarrierInfo *EndBI = (It + 1); - - // Cannot remove when both are implicit barriers, continue. - if (StartBI->isImplicit() && EndBI->isImplicit()) - continue; - - SmallVector Assumptions; - if (!IsBarrierRemoveable(StartBI, EndBI, Assumptions)) - continue; - - assert(!(StartBI->isImplicit() && EndBI->isImplicit()) && - "Expected at least one explicit barrier to remove."); - - for (auto *Assumption : Assumptions) - Assumption->eraseFromParent(); - - // Remove an explicit barrier, check first, then second. - if (!StartBI->isImplicit()) { - LLVM_DEBUG(dbgs() << "Remove start barrier " - << *StartBI->getInstruction() << "\n"); - BarriersToBeDeleted.insert(StartBI->getInstruction()); - } else { - LLVM_DEBUG(dbgs() << "Remove end barrier " - << *EndBI->getInstruction() << "\n"); - BarriersToBeDeleted.insert(EndBI->getInstruction()); - } - } - - if (BarriersToBeDeleted.empty()) - continue; - - Changed = true; - for (Instruction *I : BarriersToBeDeleted) { - ++NumBarriersEliminated; - auto Remark = [&](OptimizationRemark OR) { - return OR << "Redundant barrier eliminated."; - }; - - if (EnableVerboseRemarks) - emitRemark(I, "OMP190", Remark); - I->eraseFromParent(); - } - } - } - - return Changed; - } - void analysisGlobalization() { auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; @@ -2748,77 +2530,154 @@ AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) : AAExecutionDomain(IRP, A) {} + ~AAExecutionDomainFunction() { + delete RPOT; + } + + void initialize(Attributor &A) override { + if (getAnchorScope()->isDeclaration()) { + indicatePessimisticFixpoint(); + return; + } + RPOT = new ReversePostOrderTraversal(getAnchorScope()); + } + const std::string getAsStr() const override { - return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) + - "/" + std::to_string(NumBBs) + " BBs thread 0 only."; + unsigned TotalBlocks = 0, InitialThreadBlocks = 0; + for (auto &It : BEDMap) { + TotalBlocks++; + InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly; + } + return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" + + std::to_string(TotalBlocks) + " executed by initial thread only"; } /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override {} - void initialize(Attributor &A) override { - Function *F = getAnchorScope(); - for (const auto &BB : *F) - SingleThreadedBBs.insert(&BB); - NumBBs = SingleThreadedBBs.size(); - } - ChangeStatus manifest(Attributor &A) override { LLVM_DEBUG({ - for (const BasicBlock *BB : SingleThreadedBBs) + for (const BasicBlock &BB : *getAnchorScope()) { + if (!isExecutedByInitialThreadOnly(BB)) + continue; dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " " - << BB->getName() << " is executed by a single thread.\n"; + << BB.getName() << " is executed by a single thread.\n"; + } }); - return ChangeStatus::UNCHANGED; - } - ChangeStatus updateImpl(Attributor &A) override; + ChangeStatus Changed = ChangeStatus::UNCHANGED; - /// Check if an instruction is executed by a single thread. - bool isExecutedByInitialThreadOnly(const Instruction &I) const override { - return isExecutedByInitialThreadOnly(*I.getParent()); - } + if (DisableOpenMPOptBarrierElimination) + return Changed; - bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { - return isValidState() && SingleThreadedBBs.contains(&BB); + SmallPtrSet DeletedBarriers; + auto HandleAlignedBarrier = [&](CallBase *CB) { + const ExecutionDomainTy &ED = CEDMap[CB]; + if (!ED.IsReachedFromAlignedBarrierOnly || + ED.EncounteredNonLocalSideEffect) + return; + + // We can remove this barrier, if it is one, or all aligned barriers + // reaching the kernel end. In the latter case we can transitively work + // our way back until we find a barrier that guards a side-effect if we + // are dealing with the kernel end here. + if (CB) { + DeletedBarriers.insert(CB); + A.deleteAfterManifest(*CB); + ++NumBarriersEliminated; + Changed = ChangeStatus::CHANGED; + } else if (!ED.AlignedBarriers.empty()) { + NumBarriersEliminated += ED.AlignedBarriers.size(); + Changed = ChangeStatus::CHANGED; + SmallVector Worklist(ED.AlignedBarriers.begin(), + ED.AlignedBarriers.end()); + SmallSetVector Visited; + while (!Worklist.empty()) { + CallBase *LastCB = Worklist.pop_back_val(); + if (!Visited.insert(LastCB)) + continue; + if (!DeletedBarriers.count(LastCB)) { + A.deleteAfterManifest(*LastCB); + continue; + } + // The final aligned barrier (LastCB) reaching the kernel end was + // removed already. This means we can go one step further and remove + // the barriers encoutered last before (LastCB). + const ExecutionDomainTy &LastED = CEDMap[LastCB]; + Worklist.append(LastED.AlignedBarriers.begin(), + LastED.AlignedBarriers.end()); + } + } + + // If we actually eliminated a barrier we need to eliminate the associated + // llvm.assumes as well to avoid creating UB. + if (!ED.EncounteredAssumes.empty() && (CB || !ED.AlignedBarriers.empty())) + for (auto *AssumeCB : ED.EncounteredAssumes) + A.deleteAfterManifest(*AssumeCB); + }; + + for (auto *CB : AlignedBarriers) + HandleAlignedBarrier(CB); + + auto &OMPInfoCache = static_cast(A.getInfoCache()); + // Handle the "kernel end barrier" for kernels too. + if (OMPInfoCache.Kernels.count(getAnchorScope())) + HandleAlignedBarrier(nullptr); + + return Changed; } - /// Set of basic blocks that are executed by a single thread. - SmallSetVector SingleThreadedBBs; + /// Merge barrier and assumption information from \p PredED into the successor + /// \p ED. + void + mergeInPredecessorBarriersAndAssumptions(Attributor &A, ExecutionDomainTy &ED, + const ExecutionDomainTy &PredED); - /// Total number of basic blocks in this function. - long unsigned NumBBs = 0; -}; + /// Merge all information from \p PredED into the successor \p ED. If + /// \p InitialEdgeOnly is set, only the initial edge will enter the block + /// represented by \p ED from this predecessor. + void mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED, + const ExecutionDomainTy &PredED, + bool InitialEdgeOnly = false); -ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { - Function *F = getAnchorScope(); - ReversePostOrderTraversal RPOT(F); - auto NumSingleThreadedBBs = SingleThreadedBBs.size(); + /// Accumulate information for the entry block in \p EntryBBED. + void handleEntryBB(Attributor &A, ExecutionDomainTy &EntryBBED); - bool AllCallSitesKnown; - auto PredForCallSite = [&](AbstractCallSite ACS) { - const auto &ExecutionDomainAA = A.getAAFor( - *this, IRPosition::function(*ACS.getInstruction()->getFunction()), - DepClassTy::REQUIRED); - return ACS.isDirectCall() && - ExecutionDomainAA.isExecutedByInitialThreadOnly( - *ACS.getInstruction()); - }; + /// See AbstractAttribute::updateImpl. + ChangeStatus updateImpl(Attributor &A) override; - if (!A.checkForAllCallSites(PredForCallSite, *this, - /* RequiresAllCallSites */ true, - AllCallSitesKnown)) - SingleThreadedBBs.remove(&F->getEntryBlock()); + /// Query interface, see AAExecutionDomain + ///{ + bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { + if (!isValidState()) + return false; + return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly; + } - auto &OMPInfoCache = static_cast(A.getInfoCache()); - auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; + ExecutionDomainTy getExecutionDomain(const BasicBlock &BB) const override { + assert(isValidState() && + "No request should be made against an invalid state!"); + return BEDMap.lookup(&BB); + } + ExecutionDomainTy getExecutionDomain(const CallBase &CB) const override { + assert(isValidState() && + "No request should be made against an invalid state!"); + return CEDMap.lookup(&CB); + } + ExecutionDomainTy getFunctionExecutionDomain() const override { + assert(isValidState() && + "No request should be made against an invalid state!"); + return BEDMap.lookup(nullptr); + } + ///} // Check if the edge into the successor block contains a condition that only // lets the main thread execute it. - auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) { + static bool isInitialThreadOnlyEdge(Attributor &A, BranchInst *Edge, + BasicBlock &SuccessorBB) { if (!Edge || !Edge->isConditional()) return false; - if (Edge->getSuccessor(0) != SuccessorBB) + if (Edge->getSuccessor(0) != &SuccessorBB) return false; auto *Cmp = dyn_cast(Edge->getCondition()); @@ -2832,6 +2691,8 @@ // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) if (C->isAllOnesValue()) { auto *CB = dyn_cast(Cmp->getOperand(0)); + auto &OMPInfoCache = static_cast(A.getInfoCache()); + auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; if (!CB) return false; @@ -2855,30 +2716,322 @@ return false; }; - // Merge all the predecessor states into the current basic block. A basic - // block is executed by a single thread if all of its predecessors are. - auto MergePredecessorStates = [&](BasicBlock *BB) { - if (pred_empty(BB)) - return SingleThreadedBBs.contains(BB); - - bool IsInitialThread = true; - for (BasicBlock *PredBB : predecessors(BB)) { - if (!IsInitialThreadOnly(dyn_cast(PredBB->getTerminator()), - BB)) - IsInitialThread &= SingleThreadedBBs.contains(PredBB); + /// Mapping containing information per block. + DenseMap BEDMap; + DenseMap CEDMap; + SmallSetVector AlignedBarriers; + + ReversePostOrderTraversal *RPOT = nullptr; +}; + +void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions( + Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED) { + for (auto *EA : PredED.EncounteredAssumes) + ED.addAssumeInst(A, *EA); + + for (auto *AB : PredED.AlignedBarriers) + ED.addAlignedBarrier(A, *AB); +} + +void AAExecutionDomainFunction::mergeInPredecessor( + Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED, + bool InitialEdgeOnly) { + ED.IsExecutedByInitialThreadOnly = + InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly && + ED.IsExecutedByInitialThreadOnly); + + ED.IsReachedFromAlignedBarrierOnly = ED.IsReachedFromAlignedBarrierOnly && + PredED.IsReachedFromAlignedBarrierOnly; + ED.EncounteredNonLocalSideEffect = + ED.EncounteredNonLocalSideEffect | PredED.EncounteredNonLocalSideEffect; + if (ED.IsReachedFromAlignedBarrierOnly) + mergeInPredecessorBarriersAndAssumptions(A, ED, PredED); + else + ED.clearAssumeInstAndAlignedBarriers(); +} + +void AAExecutionDomainFunction::handleEntryBB(Attributor &A, + ExecutionDomainTy &EntryBBED) { + SmallVector PredExecDomains; + auto PredForCallSite = [&](AbstractCallSite ACS) { + const auto &EDAA = A.getAAFor( + *this, IRPosition::function(*ACS.getInstruction()->getFunction()), + DepClassTy::OPTIONAL); + if (!EDAA.getState().isValidState()) + return false; + PredExecDomains.emplace_back( + EDAA.getExecutionDomain(*cast(ACS.getInstruction()))); + return true; + }; + + bool AllCallSitesKnown; + if (A.checkForAllCallSites(PredForCallSite, *this, + /* RequiresAllCallSites */ true, + AllCallSitesKnown)) { + for (const auto &PredED : PredExecDomains) + mergeInPredecessor(A, EntryBBED, PredED); + + } else { + // We could not find all predecessors, so this is either a kernel or a + // function with external linkage (or with some other weird uses). + auto &OMPInfoCache = static_cast(A.getInfoCache()); + if (OMPInfoCache.Kernels.count(getAnchorScope())) { + EntryBBED.IsExecutedByInitialThreadOnly = false; + EntryBBED.IsReachedFromAlignedBarrierOnly = true; + EntryBBED.EncounteredNonLocalSideEffect = false; + } else { + EntryBBED.IsExecutedByInitialThreadOnly = false; + EntryBBED.IsReachedFromAlignedBarrierOnly = false; + EntryBBED.EncounteredNonLocalSideEffect = true; } + } + + auto &FnED = BEDMap[nullptr]; + FnED.IsReachingAlignedBarrierOnly &= + EntryBBED.IsReachedFromAlignedBarrierOnly; +} + +ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { + + bool Changed = false; - return IsInitialThread; + // Helper to deal with an aligned barrier encountered during the forward + // traversal. \p CB is the aligned barrier, \p ED is the execution domain when + // it was encountered. + auto HandleAlignedBarrier = [&](CallBase *CB, ExecutionDomainTy &ED) { + if (CB) + Changed |= AlignedBarriers.insert(CB); + // First, update the barrier ED kept in the separate CEDMap. + auto &CallED = CEDMap[CB]; + mergeInPredecessor(A, CallED, ED); + // Next adjust the ED we use for the traversal. + ED.EncounteredNonLocalSideEffect = false; + ED.IsReachedFromAlignedBarrierOnly = true; + // Aligned barrier collection has to come last. + ED.clearAssumeInstAndAlignedBarriers(); + ED.addAlignedBarrier(A, *CB); }; - for (auto *BB : RPOT) { - if (!MergePredecessorStates(BB)) - SingleThreadedBBs.remove(BB); + auto &LivenessAA = + A.getAAFor(*this, getIRPosition(), DepClassTy::OPTIONAL); + + // Set \p R to \V and report true if that changed \p R. + auto SetAndRecord = [&](bool &R, bool V) { + bool Eq = (R == V); + R = V; + return !Eq; + }; + + auto &OMPInfoCache = static_cast(A.getInfoCache()); + + Function *F = getAnchorScope(); + BasicBlock &EntryBB = F->getEntryBlock(); + + SmallVector SyncInstWorklist; + for (auto &RIt : *RPOT) { + BasicBlock &BB = *RIt; + + ExecutionDomainTy ED; + // Propagate "incoming edges" into information about this block. + if (&BB == &EntryBB) { + handleEntryBB(A, ED); + } else { + // For live non-entry blocks we only propagate information via live edges. + if (LivenessAA.isAssumedDead(&BB)) + continue; + + for (auto *PredBB : predecessors(&BB)) { + if (LivenessAA.isEdgeDead(PredBB, &BB)) + continue; + bool InitialEdgeOnly = isInitialThreadOnlyEdge( + A, dyn_cast(PredBB->getTerminator()), BB); + mergeInPredecessor(A, ED, BEDMap[PredBB], InitialEdgeOnly); + } + } + + // Now we traverse the block, accumulate effects in ED and attach + // information to calls. + for (Instruction &I : BB) { + bool UsedAssumedInformation; + if (A.isAssumedDead(I, *this, &LivenessAA, UsedAssumedInformation, + /* CheckBBLivenessOnly */ false, DepClassTy::OPTIONAL, + /* CheckForDeadStore */ true)) + continue; + + // Asummes and "assume-like" (dbg, lifetime, ...) are handled first, the + // former is collected the latter is ignored. + if (auto *II = dyn_cast(&I)) { + if (auto *AI = dyn_cast_or_null(II)) { + ED.addAssumeInst(A, *AI); + continue; + } + // TODO: Should we also collect and delete lifetime markers? + if (II->isAssumeLikeIntrinsic()) + continue; + } + + auto *CB = dyn_cast(&I); + bool IsNoSync = AA::isNoSyncInst(A, I, *this); + bool IsAlignedBarrier = + !IsNoSync && CB && AANoSync::isAlignedBarrier(*CB); + + // Next we check for calls. Aligned barriers are handled + // explicitly, everything else is kept for the backward traversal and will + // also affect our state. + if (CB) { + if (IsAlignedBarrier) { + HandleAlignedBarrier(CB, ED); + continue; + } + + // Check the pointer(s) of a memory intrinsic explicitly. + if (MemIntrinsic *MI = dyn_cast(&I)) { + if (!ED.EncounteredNonLocalSideEffect && + AA::isPotentiallyAffectedByBarrier(A, I, *this)) + ED.EncounteredNonLocalSideEffect = true; + if (!IsNoSync) { + ED.IsReachedFromAlignedBarrierOnly = false; + SyncInstWorklist.push_back(&I); + } + continue; + } + + // Record how we entered the call, then accumulate the effect of the + // call in ED for potential use by the callee. + auto &CallED = CEDMap[CB]; + mergeInPredecessor(A, CallED, ED); + + // If we have a sync-definition we can check if it starts/ends in an + // aligned barrier. If we are unsure we assume any sync breaks + // alignment. + Function *Callee = CB->getCalledFunction(); + if (!IsNoSync && Callee && !Callee->isDeclaration()) { + const auto &EDAA = A.getAAFor( + *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL); + if (EDAA.getState().isValidState()) { + const auto &CalleeED = EDAA.getFunctionExecutionDomain(); + ED.IsReachedFromAlignedBarrierOnly = + CalleeED.IsReachedFromAlignedBarrierOnly; + if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly) + ED.EncounteredNonLocalSideEffect |= + CalleeED.EncounteredNonLocalSideEffect; + else + ED.EncounteredNonLocalSideEffect = + CalleeED.EncounteredNonLocalSideEffect; + if (!CalleeED.IsReachingAlignedBarrierOnly) + SyncInstWorklist.push_back(&I); + if (CalleeED.IsReachedFromAlignedBarrierOnly) + mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED); + continue; + } + } + ED.IsReachedFromAlignedBarrierOnly = + IsNoSync && ED.IsReachedFromAlignedBarrierOnly; + ED.EncounteredNonLocalSideEffect |= true; + if (!IsNoSync) + SyncInstWorklist.push_back(&I); + } + + if (!I.mayHaveSideEffects() && !I.mayReadFromMemory()) + continue; + + // If we have a callee we try to use fine-grained information to + // determine local side-effects. + if (CB) { + const auto &MemAA = A.getAAFor( + *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL); + + auto AccessPred = [&](const Instruction *I, const Value *Ptr, + AAMemoryLocation::AccessKind, + AAMemoryLocation::MemoryLocationsKind) { + return !AA::isPotentiallyAffectedByBarrier(A, {Ptr}, *this, I); + }; + if (MemAA.getState().isValidState() && + MemAA.checkForAllAccessesToMemoryKind( + AccessPred, AAMemoryLocation::ALL_LOCATIONS)) + continue; + } + + if (!I.mayHaveSideEffects() && OMPInfoCache.isOnlyUsedByAssume(I)) + continue; + + if (auto *LI = dyn_cast(&I)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) + continue; + + if (!ED.EncounteredNonLocalSideEffect && + AA::isPotentiallyAffectedByBarrier(A, I, *this)) + ED.EncounteredNonLocalSideEffect = true; + } + + if (!isa(BB.getTerminator()) && + !BB.getTerminator()->getNumSuccessors()) { + + auto &FnED = BEDMap[nullptr]; + mergeInPredecessor(A, FnED, ED); + + if (OMPInfoCache.Kernels.count(F)) + HandleAlignedBarrier(nullptr, ED); + } + + ExecutionDomainTy &StoredED = BEDMap[&BB]; + ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly; + + // Check if we computed anything different as part of the forward + // traversal. We do not take assumptions and aligned barriers into account + // as they do not influence the state we iterate. Backward traversal values + // are handled later on. + if (ED.IsExecutedByInitialThreadOnly != + StoredED.IsExecutedByInitialThreadOnly || + ED.IsReachedFromAlignedBarrierOnly != + StoredED.IsReachedFromAlignedBarrierOnly || + ED.EncounteredNonLocalSideEffect != + StoredED.EncounteredNonLocalSideEffect) + Changed = true; + + // Update the state with the new value. + StoredED = std::move(ED); + } + + // Propagate (non-aligned) sync instruction effects backwards until the + // entry is hit or an aligned barrier. + SmallSetVector Visited; + while (!SyncInstWorklist.empty()) { + Instruction *SyncInst = SyncInstWorklist.pop_back_val(); + Instruction *CurInst = SyncInst; + bool HitAlignedBarrier = false; + while ((CurInst = CurInst->getPrevNode())) { + auto *CB = dyn_cast(CurInst); + if (!CB) + continue; + auto &CallED = CEDMap[CB]; + if (SetAndRecord(CallED.IsReachingAlignedBarrierOnly, false)) + Changed = true; + HitAlignedBarrier = AlignedBarriers.count(CB); + if (HitAlignedBarrier) + break; + } + if (HitAlignedBarrier) + continue; + BasicBlock *SyncBB = SyncInst->getParent(); + for (auto *PredBB : predecessors(SyncBB)) { + if (LivenessAA.isEdgeDead(PredBB, SyncBB)) + continue; + if (!Visited.insert(PredBB)) + continue; + SyncInstWorklist.push_back(PredBB->getTerminator()); + auto &PredED = BEDMap[PredBB]; + if (SetAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) + Changed = true; + } + if (SyncBB != &EntryBB) + continue; + auto &FnED = BEDMap[nullptr]; + if (SetAndRecord(FnED.IsReachingAlignedBarrierOnly, false)) + Changed = true; } - return (NumSingleThreadedBBs == SingleThreadedBBs.size()) - ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } /// Try to replace memory allocation calls called by a single thread with a @@ -2963,9 +3116,11 @@ Attributor::SimplifictionCallbackTy SCB = [](const IRPosition &, const AbstractAttribute *, bool &) -> std::optional { return nullptr; }; + + Function *F = getAnchorScope(); for (User *U : RFI.Declaration->users()) if (CallBase *CB = dyn_cast(U)) { - if (CB->getCaller() != getAnchorScope()) + if (CB->getFunction() != F) continue; MallocCalls.insert(CB); A.registerSimplificationCallback(IRPosition::callsite_returned(*CB), @@ -3079,6 +3234,8 @@ if (CallBase *CB = dyn_cast(U)) { if (CB->getCaller() != F) continue; + if (!MallocCalls.count(CB)) + continue; if (!isa(CB->getArgOperand(0))) { MallocCalls.remove(CB); continue; diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll --- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll +++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll @@ -25,7 +25,6 @@ ; CHECK: Function Attrs: norecurse nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = alloca i64, align 8 ; CHECK-NEXT: ret void ; ; diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s +; RUN: opt < %s -S -passes=openmp-opt | FileCheck %s --check-prefixes=CHECK,MODULE +; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s --check-prefixes=CHECK,CGSCC +target triple = "amdgcn-amd-amdhsa" declare void @useI32(i32) declare void @unknown() @@ -102,7 +104,6 @@ ; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @GC2 to ptr), align 4 ; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr ; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARGC]], align 4 -; CHECK-NEXT: call void @aligned_barrier() ; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]] ; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]] ; CHECK-NEXT: call void @useI32(i32 [[E]]) @@ -164,7 +165,6 @@ ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4 ; CHECK-NEXT: store i32 [[A]], ptr [[LOC]], align 4 ; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(5) @PG2 to ptr), align 4 -; CHECK-NEXT: call void @aligned_barrier() ; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(5) [[ARG]] to ptr ; CHECK-NEXT: store i32 [[B]], ptr [[ARGC]], align 4 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[LOC]], align 4 @@ -228,31 +228,651 @@ ret void } -!llvm.module.flags = !{!12,!13} -!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11} +define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) { +; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1 +; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] +; CHECK: t0: +; CHECK-NEXT: br label [[T0B:%.*]] +; CHECK: t0b: +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: f0: +; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: t1: +; CHECK-NEXT: br label [[M]] +; CHECK: f1: +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: ret void +; + call void @llvm.nvvm.barrier0() + call void @aligned_barrier() + br i1 %c0, label %t0, label %f0 +t0: + call void @aligned_barrier() + br label %t0b +t0b: + call void @aligned_barrier() + br label %m +f0: + call void @aligned_barrier() + call void @llvm.nvvm.barrier0() + br i1 %c1, label %t1, label %f1 +t1: + call void @aligned_barrier() + br label %m +f1: + call void @aligned_barrier() + br label %m +m: + call void @aligned_barrier() + ret void +} + +define void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, i32* %p) { +; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2 +; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 4, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] +; CHECK: t0: +; CHECK-NEXT: br label [[T0B:%.*]] +; CHECK: t0b: +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: f0: +; CHECK-NEXT: store i32 4, ptr [[P]], align 4 +; CHECK-NEXT: call void @llvm.nvvm.barrier0() +; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: t1: +; CHECK-NEXT: br label [[M]] +; CHECK: f1: +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: store i32 4, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + call void @llvm.nvvm.barrier0() + store i32 4, i32* %p + call void @aligned_barrier() + br i1 %c0, label %t0, label %f0 +t0: + call void @aligned_barrier() + br label %t0b +t0b: + call void @aligned_barrier() + br label %m +f0: + call void @aligned_barrier() + store i32 4, i32* %p + call void @llvm.nvvm.barrier0() + br i1 %c1, label %t1, label %f1 +t1: + call void @aligned_barrier() + br label %m +f1: + call void @aligned_barrier() + br label %m +m: + store i32 4, i32* %p + call void @aligned_barrier() + ret void +} + +define void @multiple_blocks_non_kernel_1(i1 %c0, i1 %c1) { +; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_1 +; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: call void @llvm.nvvm.barrier0() +; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] +; CHECK: t0: +; CHECK-NEXT: br label [[T0B:%.*]] +; CHECK: t0b: +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: f0: +; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: t1: +; CHECK-NEXT: br label [[M]] +; CHECK: f1: +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: ret void +; + call void @llvm.nvvm.barrier0() + call void @aligned_barrier() + br i1 %c0, label %t0, label %f0 +t0: + call void @aligned_barrier() + br label %t0b +t0b: + call void @aligned_barrier() + br label %m +f0: + call void @aligned_barrier() + call void @llvm.nvvm.barrier0() + br i1 %c1, label %t1, label %f1 +t1: + call void @aligned_barrier() + br label %m +f1: + call void @aligned_barrier() + br label %m +m: + call void @aligned_barrier() + ret void +} + +define void @multiple_blocks_non_kernel_2(i1 %c0, i1 %c1) { +; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_2 +; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] +; CHECK: t0: +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br label [[T0B:%.*]] +; CHECK: t0b: +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: f0: +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: t1: +; CHECK-NEXT: br label [[M]] +; CHECK: f1: +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: ret void +; + br i1 %c0, label %t0, label %f0 +t0: + call void @aligned_barrier() + br label %t0b +t0b: + call void @aligned_barrier() + br label %m +f0: + call void @aligned_barrier() + call void @llvm.nvvm.barrier0() + br i1 %c1, label %t1, label %f1 +t1: + call void @aligned_barrier() + br label %m +f1: + call void @aligned_barrier() + br label %m +m: + call void @aligned_barrier() + ret void +} + +define void @multiple_blocks_non_kernel_3(i1 %c0, i1 %c1) { +; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_3 +; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] +; CHECK: t0: +; CHECK-NEXT: br label [[T0B:%.*]] +; CHECK: t0b: +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: f0: +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: t1: +; CHECK-NEXT: br label [[M]] +; CHECK: f1: +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void +; + br i1 %c0, label %t0, label %f0 +t0: + br label %t0b +t0b: + br label %m +f0: + call void @aligned_barrier() + call void @llvm.nvvm.barrier0() + br i1 %c1, label %t1, label %f1 +t1: + call void @aligned_barrier() + br label %m +f1: + call void @aligned_barrier() + br label %m +m: + call void @aligned_barrier() + ret void +} + +define void @multiple_blocks_non_kernel_effects_1(i1 %c0, i1 %c1, i32* %p) { +; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_effects_1 +; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]] +; CHECK: t0: +; CHECK-NEXT: store i32 1, ptr [[P]], align 4 +; CHECK-NEXT: br label [[T0B:%.*]] +; CHECK: t0b: +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: f0: +; CHECK-NEXT: store i32 2, ptr [[P]], align 4 +; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: t1: +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br label [[M]] +; CHECK: f1: +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: store i32 3, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void +; + call void @aligned_barrier() + store i32 0, i32* %p + call void @aligned_barrier() + br i1 %c0, label %t0, label %f0 +t0: + call void @aligned_barrier() + store i32 1, i32* %p + br label %t0b +t0b: + call void @aligned_barrier() + br label %m +f0: + call void @aligned_barrier() + call void @llvm.nvvm.barrier0() + store i32 2, i32* %p + br i1 %c1, label %t1, label %f1 +t1: + call void @aligned_barrier() + br label %m +f1: + call void @aligned_barrier() + br label %m +m: + call void @aligned_barrier() + store i32 3, i32* %p + call void @aligned_barrier() + ret void +} + +define internal void @write_then_barrier0(i32* %p) { +; MODULE-LABEL: define {{[^@]+}}@write_then_barrier0 +; MODULE-SAME: (ptr [[P:%.*]]) { +; MODULE-NEXT: store i32 0, ptr [[P]], align 4 +; MODULE-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@write_then_barrier0 +; CGSCC-SAME: (ptr [[P:%.*]]) { +; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: ret void +; + store i32 0, i32* %p + call void @aligned_barrier() + ret void +} +define internal void @barrier_then_write0(i32* %p) { +; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0 +; MODULE-SAME: (ptr [[P:%.*]]) { +; MODULE-NEXT: store i32 0, ptr [[P]], align 4 +; MODULE-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0 +; CGSCC-SAME: (ptr [[P:%.*]]) { +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 +; CGSCC-NEXT: ret void +; + call void @aligned_barrier() + store i32 0, i32* %p + ret void +} +define internal void @barrier_then_write_then_barrier0(i32* %p) { +; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0 +; MODULE-SAME: (ptr [[P:%.*]]) { +; MODULE-NEXT: store i32 0, ptr [[P]], align 4 +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0 +; CGSCC-SAME: (ptr [[P:%.*]]) { +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: ret void +; + call void @aligned_barrier() + store i32 0, i32* %p + call void @aligned_barrier() + ret void +} +define void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, i32* %p) { +; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0 +; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; MODULE-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]]) +; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] +; MODULE: t03: +; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]]) +; MODULE-NEXT: br label [[T0B3:%.*]] +; MODULE: t0b3: +; MODULE-NEXT: br label [[M3:%.*]] +; MODULE: f03: +; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]]) +; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] +; MODULE: t13: +; MODULE-NEXT: br label [[M3]] +; MODULE: f13: +; MODULE-NEXT: br label [[M3]] +; MODULE: m3: +; MODULE-NEXT: call void @write_then_barrier0(ptr [[P]]) +; MODULE-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0 +; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; CGSCC-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]]) +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] +; CGSCC: t03: +; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]]) +; CGSCC-NEXT: br label [[T0B3:%.*]] +; CGSCC: t0b3: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3:%.*]] +; CGSCC: f03: +; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]]) +; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] +; CGSCC: t13: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3]] +; CGSCC: f13: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3]] +; CGSCC: m3: +; CGSCC-NEXT: call void @write_then_barrier0(ptr [[P]]) +; CGSCC-NEXT: ret void +; + call void @barrier_then_write_then_barrier0(i32* %p) + call void @aligned_barrier() + br i1 %c0, label %t03, label %f03 +t03: + call void @barrier_then_write0(i32* %p) + br label %t0b3 +t0b3: + call void @aligned_barrier() + br label %m3 +f03: + call void @aligned_barrier() + call void @barrier_then_write0(i32* %p) + br i1 %c1, label %t13, label %f13 +t13: + call void @aligned_barrier() + br label %m3 +f13: + call void @aligned_barrier() + br label %m3 +m3: + call void @aligned_barrier() + call void @write_then_barrier0(i32* %p) + ret void +} +define internal void @write_then_barrier1(i32* %p) { +; CHECK-LABEL: define {{[^@]+}}@write_then_barrier1 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void +; + store i32 0, i32* %p + call void @aligned_barrier() + ret void +} +define internal void @barrier_then_write1(i32* %p) { +; MODULE-LABEL: define {{[^@]+}}@barrier_then_write1 +; MODULE-SAME: (ptr [[P:%.*]]) { +; MODULE-NEXT: store i32 0, ptr [[P]], align 4 +; MODULE-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write1 +; CGSCC-SAME: (ptr [[P:%.*]]) { +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 +; CGSCC-NEXT: ret void +; + call void @aligned_barrier() + store i32 0, i32* %p + ret void +} +define internal void @barrier_then_write_then_barrier1(i32* %p) { +; CHECK-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier1 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void +; + call void @aligned_barrier() + store i32 0, i32* %p + call void @aligned_barrier() + ret void +} +define void @multiple_blocks_functions_non_kernel_effects_1(i1 %c0, i1 %c1, i32* %p) { +; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1 +; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; MODULE-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]]) +; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] +; MODULE: t03: +; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]]) +; MODULE-NEXT: br label [[T0B3:%.*]] +; MODULE: t0b3: +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: br label [[M3:%.*]] +; MODULE: f03: +; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]]) +; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] +; MODULE: t13: +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: br label [[M3]] +; MODULE: f13: +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: br label [[M3]] +; MODULE: m3: +; MODULE-NEXT: call void @write_then_barrier1(ptr [[P]]) +; MODULE-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1 +; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; CGSCC-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]]) +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] +; CGSCC: t03: +; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]]) +; CGSCC-NEXT: br label [[T0B3:%.*]] +; CGSCC: t0b3: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3:%.*]] +; CGSCC: f03: +; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]]) +; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] +; CGSCC: t13: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3]] +; CGSCC: f13: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3]] +; CGSCC: m3: +; CGSCC-NEXT: call void @write_then_barrier1(ptr [[P]]) +; CGSCC-NEXT: ret void +; + call void @barrier_then_write_then_barrier1(i32* %p) + call void @aligned_barrier() + br i1 %c0, label %t03, label %f03 +t03: + call void @barrier_then_write1(i32* %p) + br label %t0b3 +t0b3: + call void @aligned_barrier() + br label %m3 +f03: + call void @aligned_barrier() + call void @barrier_then_write1(i32* %p) + br i1 %c1, label %t13, label %f13 +t13: + call void @aligned_barrier() + br label %m3 +f13: + call void @aligned_barrier() + br label %m3 +m3: + call void @aligned_barrier() + call void @write_then_barrier1(i32* %p) + ret void +} + +define internal void @write_then_barrier2(i32* %p) { +; CHECK-LABEL: define {{[^@]+}}@write_then_barrier2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void +; + store i32 0, i32* %p + call void @aligned_barrier() + ret void +} +define internal void @barrier_then_write2(i32* %p) { +; CHECK-LABEL: define {{[^@]+}}@barrier_then_write2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + call void @aligned_barrier() + store i32 0, i32* %p + ret void +} +define internal void @barrier_then_write_then_barrier2(i32* %p) { +; CHECK-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void +; + call void @aligned_barrier() + store i32 0, i32* %p + call void @aligned_barrier() + ret void +} +define void @multiple_blocks_functions_non_kernel_effects_2(i1 %c0, i1 %c1, i32* %p) { +; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2 +; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; MODULE-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]]) +; MODULE-NEXT: store i32 0, ptr [[P]], align 4 +; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] +; MODULE: t03: +; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]]) +; MODULE-NEXT: br label [[T0B3:%.*]] +; MODULE: t0b3: +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: br label [[M3:%.*]] +; MODULE: f03: +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]]) +; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] +; MODULE: t13: +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: br label [[M3]] +; MODULE: f13: +; MODULE-NEXT: call void @aligned_barrier() +; MODULE-NEXT: br label [[M3]] +; MODULE: m3: +; MODULE-NEXT: call void @write_then_barrier2(ptr [[P]]) +; MODULE-NEXT: store i32 0, ptr [[P]], align 4 +; MODULE-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2 +; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) { +; CGSCC-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]]) +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 +; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]] +; CGSCC: t03: +; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]]) +; CGSCC-NEXT: br label [[T0B3:%.*]] +; CGSCC: t0b3: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3:%.*]] +; CGSCC: f03: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]]) +; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]] +; CGSCC: t13: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3]] +; CGSCC: f13: +; CGSCC-NEXT: call void @aligned_barrier() +; CGSCC-NEXT: br label [[M3]] +; CGSCC: m3: +; CGSCC-NEXT: call void @write_then_barrier2(ptr [[P]]) +; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 +; CGSCC-NEXT: ret void +; + call void @barrier_then_write_then_barrier2(i32* %p) + call void @aligned_barrier() + store i32 0, i32* %p + br i1 %c0, label %t03, label %f03 +t03: + call void @barrier_then_write2(i32* %p) + br label %t0b3 +t0b3: + call void @aligned_barrier() + br label %m3 +f03: + call void @aligned_barrier() + call void @barrier_then_write2(i32* %p) + br i1 %c1, label %t13, label %f13 +t13: + call void @aligned_barrier() + br label %m3 +f13: + call void @aligned_barrier() + br label %m3 +m3: + call void @aligned_barrier() + call void @write_then_barrier2(i32* %p) + store i32 0, i32* %p + ret void +} + +!llvm.module.flags = !{!16,!15} +!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14} -!0 = !{ptr @pos_empty_1, !"kernel", i32 1} -!1 = !{ptr @pos_empty_2, !"kernel", i32 1} -!2 = !{ptr @pos_empty_3, !"kernel", i32 1} -!3 = !{ptr @pos_empty_4, !"kernel", i32 1} -!4 = !{ptr @pos_empty_5, !"kernel", i32 1} -!5 = !{ptr @pos_empty_6, !"kernel", i32 1} -!6 = !{ptr @neg_empty_7, !"kernel", i32 1} -!7 = !{ptr @pos_constant_loads, !"kernel", i32 1} -!8 = !{ptr @neg_loads, !"kernel", i32 1} -!9 = !{ptr @pos_priv_mem, !"kernel", i32 1} -!10 = !{ptr @neg_mem, !"kernel", i32 1} -!11 = !{ptr @pos_multiple, !"kernel", i32 1} -!12 = !{i32 7, !"openmp", i32 50} -!13 = !{i32 7, !"openmp-device", i32 50} +!0 = !{void ()* @pos_empty_1, !"kernel", i32 1} +!1 = !{void ()* @pos_empty_2, !"kernel", i32 1} +!2 = !{void ()* @pos_empty_3, !"kernel", i32 1} +!3 = !{void ()* @pos_empty_4, !"kernel", i32 1} +!4 = !{void ()* @pos_empty_5, !"kernel", i32 1} +!5 = !{void ()* @pos_empty_6, !"kernel", i32 1} +!6 = !{void ()* @neg_empty_7, !"kernel", i32 1} +!7 = !{void ()* @pos_constant_loads, !"kernel", i32 1} +!8 = !{void ()* @neg_loads, !"kernel", i32 1} +!9 = !{void ()* @pos_priv_mem, !"kernel", i32 1} +!10 = !{void ()* @neg_mem, !"kernel", i32 1} +!11 = !{void ()* @pos_multiple, !"kernel", i32 1} +!12 = !{void (i1,i1)* @multiple_blocks_kernel_1, !"kernel", i32 1} +!13 = !{void (i1,i1,i32*)* @multiple_blocks_kernel_2, !"kernel", i32 1} +!14 = !{void (i1,i1,i32*)* @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1} +!15 = !{i32 7, !"openmp", i32 50} +!16 = !{i32 7, !"openmp-device", i32 50} ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ;. -; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} -; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1} ; CHECK: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1} ; CHECK: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1} @@ -265,4 +885,7 @@ ; CHECK: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1} ; CHECK: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1} ; CHECK: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1} +; CHECK: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1} +; CHECK: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1} +; CHECK: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1} ;. diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -18,7 +18,6 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 diff --git a/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll b/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll --- a/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll +++ b/llvm/test/Transforms/OpenMP/heap-to-shared-missing-declarations.ll @@ -7,7 +7,7 @@ ; CHECK-LABEL: define {{[^@]+}}@outlined0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: call void @func() #[[ATTR0]] +; CHECK-NEXT: call void @func() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR0]] ; CHECK-NEXT: ret void ; @@ -18,9 +18,9 @@ } define internal void @func() { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nosync nounwind ; CHECK-LABEL: define {{[^@]+}}@func -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = load ptr, ptr null, align 4294967296 ; CHECK-NEXT: store i64 0, ptr [[I]], align 8 @@ -33,16 +33,16 @@ } define internal void @outlined1() { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nosync nounwind ; CHECK-LABEL: define {{[^@]+}}@outlined1 -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = icmp sle i32 1, 0 ; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: bb1: -; CHECK-NEXT: call void @func() #[[ATTR0]] +; CHECK-NEXT: call void @func() #[[ATTR1]] ; CHECK-NEXT: br label [[COMMON_RET:%.*]] ; CHECK: bb2: ; CHECK-NEXT: call void @__kmpc_free_shared(ptr null, i64 0) #[[ATTR0]] @@ -67,7 +67,7 @@ define void @user() { ; CHECK-LABEL: define {{[^@]+}}@user() { ; CHECK-NEXT: call void @outlined0() #[[ATTR0]] -; CHECK-NEXT: call void @outlined1() #[[ATTR0]] +; CHECK-NEXT: call void @outlined1() #[[ATTR1]] ; CHECK-NEXT: ret void ; call void @outlined0() @@ -84,7 +84,7 @@ !1 = !{i32 7, !"openmp-device", i32 50} ;. ; CHECK: attributes #[[ATTR0]] = { nounwind } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind } +; CHECK: attributes #[[ATTR1]] = { nosync nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/internals_cgscc.ll b/llvm/test/Transforms/OpenMP/internals_cgscc.ll --- a/llvm/test/Transforms/OpenMP/internals_cgscc.ll +++ b/llvm/test/Transforms/OpenMP/internals_cgscc.ll @@ -27,7 +27,6 @@ define internal void @bar() { ; CHECK-LABEL: @bar( -; CHECK-NEXT: call void @foo() ; CHECK-NEXT: ret void ; call void @foo() diff --git a/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll b/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll --- a/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll +++ b/llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll @@ -12,13 +12,7 @@ ret i32 0 } - define fastcc void @rec(ptr %0, i64 %1) { -; CHECK-LABEL: define {{[^@]+}}@rec( -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0:%.*]], i64 [[TMP1:%.*]] -; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4 -; CHECK-NEXT: call fastcc void @rec(ptr [[TMP0]], i64 0) -; CHECK-NEXT: ret void %3 = getelementptr i32, ptr %0, i64 %1 store i32 0, ptr %3, align 4 call fastcc void @rec(ptr %0, i64 0) @@ -44,9 +38,9 @@ ; ; ; CGSCC-LABEL: define {{[^@]+}}@rec -; CGSCC-SAME: (ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]) { +; CGSCC-SAME: (ptr nocapture writeonly [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[TMP1]] ; CGSCC-NEXT: store i32 0, ptr [[TMP3]], align 4 -; CGSCC-NEXT: call fastcc void @rec(ptr [[TMP0]], i64 0) +; CGSCC-NEXT: call fastcc void @rec(ptr nocapture writeonly [[TMP0]], i64 0) #[[ATTR1:[0-9]+]] ; CGSCC-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -19,8 +19,10 @@ ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. ;. ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr +; CHECK: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. ; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr +; CHECK-DISABLED: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ;. define weak i32 @__kmpc_target_init(ptr, i8, i1) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init @@ -70,17 +72,17 @@ ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 +; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@foo ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 +; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !12 + %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !12 call void @use(ptr %0) call void @__kmpc_free_shared(ptr %0, i64 4) ret void @@ -88,52 +90,42 @@ define internal void @bar() { ; CHECK-LABEL: define {{[^@]+}}@bar -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] -; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]] +; CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]], !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR0]], !dbg [[DBG8]] +; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar -; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] -; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]], !dbg [[DBG8:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR0]], !dbg [[DBG8]] +; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !13 + %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !13 call void @share(ptr %0), !dbg !13 call void @__kmpc_free_shared(ptr %0, i64 4) ret void } define internal void @use(ptr %x) { -; CHECK-LABEL: define {{[^@]+}}@use -; CHECK-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: ret void -; -; CHECK-DISABLED-LABEL: define {{[^@]+}}@use -; CHECK-DISABLED-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: ret void -; entry: ret void } define internal void @share(ptr %x) { ; CHECK-LABEL: define {{[^@]+}}@share -; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store ptr [[X]], ptr @S, align 8 ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@share -; CHECK-DISABLED-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-DISABLED-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: store ptr [[X]], ptr @S, align 8 ; CHECK-DISABLED-NEXT: ret void @@ -146,19 +138,17 @@ define void @unused() { ; CHECK-LABEL: define {{[^@]+}}@unused() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 -; CHECK-NEXT: call void @use(ptr undef) +; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @use(ptr [[TMP0]]) -; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR4]], !dbg [[DBG11:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !14 + %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !14 call void @use(ptr %0) call void @__kmpc_free_shared(ptr %0, i64 4) ret void @@ -166,9 +156,9 @@ define internal void @convert_and_move_alloca() { ; CHECK-LABEL: define {{[^@]+}}@convert_and_move_alloca -; CHECK-SAME: () #[[ATTR1]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 +; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 ; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[INITLOOP:%.*]] ; CHECK: initloop: @@ -186,9 +176,9 @@ ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@convert_and_move_alloca -; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 +; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4 ; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]] ; CHECK-DISABLED: initloop: @@ -217,7 +207,7 @@ br label %loopbody loopbody: - %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !16 + %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !16 call void @use(ptr %0) call void @__kmpc_free_shared(ptr %0, i64 4) %iv = load i32, ptr %iv_ptr @@ -263,19 +253,17 @@ !15 = !DILocation(line: 8, column: 2, scope: !9) !16 = !DILocation(line: 10, column: 2, scope: !9) ;. -; CHECK: attributes #[[ATTR0]] = { nounwind } -; CHECK: attributes #[[ATTR1]] = { nosync nounwind } -; CHECK: attributes #[[ATTR2]] = { nounwind memory(none) } -; CHECK: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR0]] = { nosync nounwind } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind allocsize(0) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR4]] = { nounwind } ;. -; CHECK-DISABLED: attributes #[[ATTR0]] = { nounwind } -; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind } -; CHECK-DISABLED: attributes #[[ATTR2]] = { nounwind memory(none) } -; CHECK-DISABLED: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) } -; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } -; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind } +; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) } +; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind allocsize(0) } +; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -132,6 +132,9 @@ ; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 ; CHECK: @[[OFFSET:[a-zA-Z0-9_$"\\.-]+]] = global i32 undef ; CHECK: @[[STACK:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [1024 x i8] undef +; CHECK: @[[FOO_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[BAR_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[BAZ_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 ; CHECK: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4 ; CHECK: @[[Y_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ;. @@ -141,7 +144,7 @@ ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) ; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR6]] ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void @@ -154,14 +157,14 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR3]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[B0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR3]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -176,7 +179,7 @@ ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: ; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR6]] +; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR3]] ; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -184,7 +187,7 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: nofree norecurse nounwind memory(write) +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(write) ; CHECK-LABEL: define {{[^@]+}}@use.internalized ; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -199,7 +202,7 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read) +; CHECK: Function Attrs: norecurse nosync nounwind allocsize(0) memory(read) ; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared ; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[L:%.*]] = load i32, ptr @offset, align 4 @@ -213,9 +216,9 @@ ; ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind memory(write) } -; CHECK: attributes #[[ATTR2]] = { nosync nounwind allocsize(0) memory(read) } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) } +; CHECK: attributes #[[ATTR2]] = { norecurse nosync nounwind allocsize(0) memory(read) } +; CHECK: attributes #[[ATTR3]] = { nosync nounwind } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } ; CHECK: attributes #[[ATTR6]] = { nounwind } diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -2450,9 +2450,6 @@ ; AMDGPU: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} ; AMDGPU: [[LOOP28]] = distinct !{!28, !23, !24} ; AMDGPU: [[LOOP29]] = distinct !{!29, !23, !24} -; AMDGPU: [[META30:![0-9]+]] = !{!31, !27, i64 0} -; AMDGPU: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} -; AMDGPU: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} ;. ; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -2484,9 +2481,6 @@ ; NVPTX: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} ; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24} ; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24} -; NVPTX: [[META30:![0-9]+]] = !{!31, !27, i64 0} -; NVPTX: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} -; NVPTX: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} ;. ; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -2518,9 +2512,6 @@ ; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} ; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} ; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} -; AMDGPU-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} -; AMDGPU-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} -; AMDGPU-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} ;. ; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} @@ -2552,7 +2543,4 @@ ; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} ; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} ; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} -; NVPTX-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} -; NVPTX-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} -; NVPTX-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -163,7 +163,7 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @generic_helper() #[[ATTR5]] +; CHECK-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -176,7 +176,7 @@ ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR5]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: @@ -202,7 +202,7 @@ ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-NEXT: call void @leaf() #[[ATTR5]] +; CHECK-NEXT: call void @leaf() #[[ATTR6]] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-NEXT: ret void @@ -211,7 +211,7 @@ ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]] ; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void @@ -231,7 +231,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -239,7 +239,7 @@ ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -333,13 +333,13 @@ ; CHECK-LABEL: define {{[^@]+}}@generic_helper ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @leaf() #[[ATTR5]] +; CHECK-NEXT: call void @leaf() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR4]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -376,17 +376,19 @@ ; CHECK: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR2]] = { nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR4]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR5]] = { convergent nounwind } -; CHECK: attributes #[[ATTR6]] = { convergent } +; CHECK: attributes #[[ATTR6]] = { convergent nosync nounwind } +; CHECK: attributes #[[ATTR7]] = { convergent } ;. ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5]] = { convergent nounwind } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent nosync nounwind } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll --- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll @@ -260,8 +260,8 @@ ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-NEXT: call void @leaf() #[[ATTR3:[0-9]+]] -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; CHECK-NEXT: call void @leaf() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-NEXT: ret void ; @@ -269,8 +269,8 @@ ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR3:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; @@ -289,7 +289,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: call void @leaf() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @leaf() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -297,7 +297,7 @@ ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -319,7 +319,7 @@ ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined___wrapper @@ -330,7 +330,7 @@ ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLE-SPMDIZATION-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR7]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -381,14 +381,14 @@ ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: call void @leaf() #[[ATTR3]] +; CHECK-NEXT: call void @leaf() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR3]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll --- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll +++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll @@ -19,53 +19,30 @@ ; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1 ;. define void @kernel() "kernel" { -; TUNIT: Function Attrs: norecurse -; TUNIT-LABEL: define {{[^@]+}}@kernel -; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { -; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) -; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 -; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; TUNIT: if.then: -; TUNIT-NEXT: store i32 1, ptr addrspace(3) @G, align 4 -; TUNIT-NEXT: br label [[IF_MERGE:%.*]] -; TUNIT: if.else: -; TUNIT-NEXT: call void @barrier() #[[ATTR5:[0-9]+]] -; TUNIT-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4 -; TUNIT-NEXT: call void @use1(i32 [[L]]) #[[ATTR5]] -; TUNIT-NEXT: br label [[IF_MERGE]] -; TUNIT: if.merge: -; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] -; TUNIT: if.then2: -; TUNIT-NEXT: store i32 2, ptr addrspace(3) @G, align 4 -; TUNIT-NEXT: call void @barrier() #[[ATTR5]] -; TUNIT-NEXT: br label [[IF_END]] -; TUNIT: if.end: -; TUNIT-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1) -; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: norecurse -; CGSCC-LABEL: define {{[^@]+}}@kernel -; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) -; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 -; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CGSCC: if.then: -; CGSCC-NEXT: store i32 1, ptr addrspace(3) @G, align 4 -; CGSCC-NEXT: br label [[IF_MERGE:%.*]] -; CGSCC: if.else: -; CGSCC-NEXT: call void @barrier() -; CGSCC-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4 -; CGSCC-NEXT: call void @use1(i32 [[L]]) -; CGSCC-NEXT: br label [[IF_MERGE]] -; CGSCC: if.merge: -; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] -; CGSCC: if.then2: -; CGSCC-NEXT: store i32 2, ptr addrspace(3) @G, align 4 -; CGSCC-NEXT: call void @barrier() -; CGSCC-NEXT: br label [[IF_END]] -; CGSCC: if.end: -; CGSCC-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1) -; CGSCC-NEXT: ret void +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define {{[^@]+}}@kernel +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 1, ptr addrspace(3) @G, align 4 +; CHECK-NEXT: br label [[IF_MERGE:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @barrier() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4 +; CHECK-NEXT: call void @use1(i32 [[L]]) #[[ATTR5]] +; CHECK-NEXT: br label [[IF_MERGE]] +; CHECK: if.merge: +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] +; CHECK: if.then2: +; CHECK-NEXT: store i32 2, ptr addrspace(3) @G, align 4 +; CHECK-NEXT: call void @barrier() #[[ATTR5]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1) +; CHECK-NEXT: ret void ; %call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false) %cmp = icmp eq i32 %call, -1 @@ -112,20 +89,17 @@ !2 = !{ptr @kernel, !"kernel", i32 1} ;. -; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind } -; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind } -; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback } -; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } -; TUNIT: attributes #[[ATTR5]] = { nounwind } -;. -; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" } -; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind } -; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CHECK: attributes #[[ATTR0]] = { norecurse "kernel" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CHECK: attributes #[[ATTR5]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1} ;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CGSCC: {{.*}} +; TUNIT: {{.*}}