diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -25,6 +25,7 @@ class Module; class Value; class Instruction; +class LegacyDivergenceAnalysis; class Loop; class raw_ostream; class TargetTransformInfo; @@ -141,38 +142,34 @@ }; class DivergenceInfo { - Function &F; - - // If the function contains an irreducible region the divergence - // analysis can run indefinitely. We set ContainsIrreducible and no - // analysis is actually performed on the function. All values in - // this function are conservatively reported as divergent instead. - bool ContainsIrreducible; - std::unique_ptr SDA; - std::unique_ptr DA; - public: + enum InfoKind { AlwaysUniform, AlwaysDivergent, Computed }; + DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible); + DivergenceInfo(Function &F, LegacyDivergenceAnalysis *L); + + DivergenceInfo(Function &F, const TargetTransformInfo &TTI); + + bool isComputed() const { return Kind == Computed; } + bool isAlwaysUniform() const { return Kind == AlwaysUniform; } + bool isAlwaysDivergent() const { return Kind == AlwaysDivergent; } + + static bool assumeAlwaysDivergent(); + /// Whether any divergence was detected. - bool hasDivergence() const { - return ContainsIrreducible || DA->hasDetectedDivergence(); - } + bool hasDivergence() const; /// The GPU kernel this analysis result is for const Function &getFunction() const { return F; } /// Whether \p V is divergent at its definition. - bool isDivergent(const Value &V) const { - return ContainsIrreducible || DA->isDivergent(V); - } + bool isDivergent(const Value &V) const; /// Whether \p U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use &U) const { - return ContainsIrreducible || DA->isDivergentUse(U); - } + bool isDivergentUse(const Use &U) const; /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } @@ -180,6 +177,13 @@ /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be /// divergent. bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } + +private: + Function &F; + InfoKind Kind = Computed; + const LegacyDivergenceAnalysis *LegacyDA = nullptr; + std::unique_ptr SDA; + std::unique_ptr DA; }; /// \brief Divergence analysis frontend for GPU kernels. diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -41,6 +41,9 @@ // Print all divergent branches in the function. void print(raw_ostream &OS, const Module *) const override; + /// Whether any divergence was detected. + bool hasDivergence() const; + // Returns true if V is divergent at its definition. bool isDivergent(const Value *V) const; @@ -65,6 +68,10 @@ // (optional) handle to new DivergenceAnalysis std::unique_ptr gpuDA; + // Wrapper to present a uniform interface at points that are + // independent of new/old pass manager. + std::unique_ptr DIProxy; + // Stores all divergent values. DenseSet DivergentValues; diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -74,6 +74,7 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" @@ -90,6 +91,12 @@ #define DEBUG_TYPE "divergence" +static cl::opt + AssumeDivergent("assume-always-divergent", cl::init(false), cl::Hidden, + cl::ValueDisallowed, + cl::desc("Assume that all control flow is divergent; " + "mainly useful for testing")); + DivergenceAnalysisImpl::DivergenceAnalysisImpl( const Function &F, const Loop *RegionLoop, const DominatorTree &DT, const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm) @@ -344,20 +351,70 @@ return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); } +bool DivergenceInfo::hasDivergence() const { + if (Kind == AlwaysUniform) + return false; + if (Kind == AlwaysDivergent) + return true; + if (LegacyDA) + return LegacyDA->hasDivergence(); + assert(DA); + return DA->hasDetectedDivergence(); +} + +/// Whether \p V is divergent at its definition. +bool DivergenceInfo::isDivergent(const Value &V) const { + if (Kind == AlwaysUniform) + return false; + if (Kind == AlwaysDivergent) + return true; + if (LegacyDA) + return LegacyDA->isDivergent(&V); + assert(DA); + return DA->isDivergent(V); +} + +/// Whether \p U is divergent. Uses of a uniform value can be divergent. +bool DivergenceInfo::isDivergentUse(const Use &U) const { + if (Kind == AlwaysUniform) + return false; + if (Kind == AlwaysDivergent) + return true; + if (LegacyDA) + return LegacyDA->isDivergentUse(&U); + assert(DA); + return DA->isDivergentUse(U); +} + +bool DivergenceInfo::assumeAlwaysDivergent() { return AssumeDivergent; } + DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible) - : F(F), ContainsIrreducible(false) { + : F(F) { + if (AssumeDivergent) { + Kind = AlwaysDivergent; + return; + } + if (!KnownReducible) { using RPOTraversal = ReversePostOrderTraversal; RPOTraversal FuncRPOT(&F); - if (containsIrreducibleCFG(FuncRPOT, LI)) { - ContainsIrreducible = true; - return; - } + KnownReducible = + !containsIrreducibleCFG(FuncRPOT, LI); } + + // If the function contains an irreducible region the divergence + // analysis can run indefinitely. We set AlwaysDivergent and no + // analysis is actually performed on the function. All values in + // this function are conservatively reported as divergent instead. + if (!KnownReducible) { + Kind = AlwaysDivergent; + return; + } + SDA = std::make_unique(DT, PDT, LI); DA = std::make_unique(F, nullptr, DT, LI, *SDA, /* LCSSA */ false); @@ -377,14 +434,40 @@ DA->compute(); } +DivergenceInfo::DivergenceInfo(Function &F, const TargetTransformInfo &TTI) + : F(F) { + Kind = AlwaysUniform; + if (AssumeDivergent || TTI.hasBranchDivergence()) { + Kind = AlwaysDivergent; + } +} + +DivergenceInfo::DivergenceInfo(Function &F, LegacyDivergenceAnalysis *L) + : F(F), LegacyDA(L) { + assert(Kind == Computed); +} + AnalysisKey DivergenceAnalysis::Key; DivergenceAnalysis::Result DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult(F); + + { + // Trivially return an empty analysis if the target does not have + // divergence. + DivergenceInfo DI{F, TTI}; + if (AssumeDivergent || DI.isAlwaysUniform()) + return DI; + + // DI is now set to AlwaysDivergent at this point, but we are + // about to compute divergence for real. + assert(DI.isAlwaysDivergent()); + } + auto &DT = AM.getResult(F); auto &PDT = AM.getResult(F); auto &LI = AM.getResult(F); - auto &TTI = AM.getResult(F); return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false); } diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -302,6 +302,7 @@ AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.setPreservesAll(); } @@ -319,16 +320,20 @@ } bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { - auto *TTIWP = getAnalysisIfAvailable(); - if (TTIWP == nullptr) - return false; - - TargetTransformInfo &TTI = TTIWP->getTTI(F); // Fast path: if the target does not have branch divergence, we do not mark // any branch as divergent. - if (!TTI.hasBranchDivergence()) + auto &TTI = getAnalysis().getTTI(F); + DIProxy = std::make_unique(F, TTI); + if (DivergenceInfo::assumeAlwaysDivergent() || DIProxy->isAlwaysUniform()) return false; + // DIProxy is now set to AlwaysDivergent at this point, but we are + // about to compute divergence for real. So we delete the proxy; it + // will be recreated later, but only if the new divergence analysis + // is not in use. + assert(DIProxy->isAlwaysDivergent()); + DIProxy.reset(); + DivergentValues.clear(); DivergentUses.clear(); gpuDA = nullptr; @@ -341,12 +346,12 @@ auto &LI = getAnalysis().getLoopInfo(); gpuDA = std::make_unique(F, DT, PDT, LI, TTI, /* KnownReducible = */ true); - } else { // run LLVM's existing DivergenceAnalysis DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); DP.populateWithSourcesOfDivergence(); DP.propagate(); + DIProxy = std::make_unique(F, this); } LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName() @@ -356,10 +361,25 @@ return false; } +bool LegacyDivergenceAnalysis::hasDivergence() const { + if (gpuDA) { + return gpuDA->hasDivergence(); + } + if (DIProxy->isAlwaysDivergent()) + return true; + if (DIProxy->isAlwaysUniform()) + return false; + return !DivergentValues.empty(); +} + bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { if (gpuDA) { return gpuDA->isDivergent(*V); } + if (DIProxy->isAlwaysDivergent()) + return true; + if (DIProxy->isAlwaysUniform()) + return false; return DivergentValues.count(V); } @@ -367,29 +387,25 @@ if (gpuDA) { return gpuDA->isDivergentUse(*U); } + if (DIProxy->isAlwaysDivergent()) + return true; + if (DIProxy->isAlwaysUniform()) + return false; return DivergentValues.count(U->get()) || DivergentUses.count(U); } void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { - if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) + if (!hasDivergence()) return; const Function *F = nullptr; - if (!DivergentValues.empty()) { - const Value *FirstDivergentValue = *DivergentValues.begin(); - if (const Argument *Arg = dyn_cast(FirstDivergentValue)) { - F = Arg->getParent(); - } else if (const Instruction *I = - dyn_cast(FirstDivergentValue)) { - F = I->getParent()->getParent(); - } else { - llvm_unreachable("Only arguments and instructions can be divergent"); - } - } else if (gpuDA) { + if (DIProxy) + F = &DIProxy->getFunction(); + else { + assert(gpuDA); F = &gpuDA->getFunction(); } - if (!F) - return; + assert(F); // Dumps all divergent values in F, arguments and then instructions. for (auto &Arg : F->args()) { diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" @@ -214,7 +215,9 @@ explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false) : LoopPass(ID), OptimizeForSize(Os), - HasBranchDivergence(HasBranchDivergence) { + HasBranchDivergence(DivergenceInfo::assumeAlwaysDivergent() + ? true + : HasBranchDivergence) { initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); } diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopAnalysisManager.h" @@ -3015,8 +3016,12 @@ // transform, we should allow unswitching for non-trivial uniform // branches even on targets that have divergence. // https://bugs.llvm.org/show_bug.cgi?id=48819 + // + // For now, we use a trivial DivergenceInfo object to check whether + // divergence exists. + const DivergenceInfo DI{*L.getHeader()->getParent(), TTI}; bool ContinueWithNonTrivial = - EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence()); + EnableNonTrivialUnswitch || (NonTrivial && !DI.hasDivergence()); if (!ContinueWithNonTrivial) return false; diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp --- a/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/llvm/lib/Transforms/Scalar/Sink.cpp @@ -14,7 +14,10 @@ #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" @@ -33,6 +36,7 @@ STATISTIC(NumSinkIter, "Number of sinking iterations"); static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA, + const DivergenceInfo &DI, SmallPtrSetImpl &Stores) { if (Inst->mayWriteToMemory()) { @@ -52,10 +56,11 @@ return false; if (auto *Call = dyn_cast(Inst)) { - // Convergent operations cannot be made control-dependent on additional - // values. - if (Call->isConvergent()) - return false; + // Convergent operations cannot be sunk across divergent branches. + if (Call->isConvergent()) { + if (DI.hasDivergence()) + return false; + } for (Instruction *S : Stores) if (isModSet(AA.getModRefInfo(S, Call))) @@ -106,7 +111,8 @@ /// instruction out of its current block into a successor. static bool SinkInstruction(Instruction *Inst, SmallPtrSetImpl &Stores, - DominatorTree &DT, LoopInfo &LI, AAResults &AA) { + DominatorTree &DT, LoopInfo &LI, AAResults &AA, + const DivergenceInfo &DI) { // Don't sink static alloca instructions. CodeGen assumes allocas outside the // entry block are dynamically sized stack objects. @@ -115,7 +121,7 @@ return false; // Check if it's safe to move the instruction. - if (!isSafeToMove(Inst, AA, Stores)) + if (!isSafeToMove(Inst, AA, DI, Stores)) return false; // FIXME: This should include support for sinking instructions within the @@ -177,7 +183,7 @@ } static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, - AAResults &AA) { + AAResults &AA, const DivergenceInfo &DI) { // Can't sink anything out of a block that has less than two successors. if (BB.getTerminator()->getNumSuccessors() <= 1) return false; @@ -205,7 +211,7 @@ if (Inst->isDebugOrPseudoInst()) continue; - if (SinkInstruction(Inst, Stores, DT, LI, AA)) { + if (SinkInstruction(Inst, Stores, DT, LI, AA, DI)) { ++NumSunk; MadeChange = true; } @@ -217,7 +223,8 @@ } static bool iterativelySinkInstructions(Function &F, DominatorTree &DT, - LoopInfo &LI, AAResults &AA) { + LoopInfo &LI, AAResults &AA, + const DivergenceInfo &DI) { bool MadeChange, EverMadeChange = false; do { @@ -225,7 +232,7 @@ LLVM_DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n"); // Process all basic blocks. for (BasicBlock &I : F) - MadeChange |= ProcessBlock(I, DT, LI, AA); + MadeChange |= ProcessBlock(I, DT, LI, AA, DI); EverMadeChange |= MadeChange; NumSinkIter++; } while (MadeChange); @@ -237,8 +244,13 @@ auto &DT = AM.getResult(F); auto &LI = AM.getResult(F); auto &AA = AM.getResult(F); + auto &TTI = AM.getResult(F); - if (!iterativelySinkInstructions(F, DT, LI, AA)) + // Sinking only checks whether divergence potentially exists in the + // function. For this, we rely on a trivial DivergenceInfo object. + const DivergenceInfo DI{F, TTI}; + + if (!iterativelySinkInstructions(F, DT, LI, AA, DI)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -258,8 +270,13 @@ auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); auto &AA = getAnalysis().getAAResults(); + auto &TTI = getAnalysis().getTTI(F); + + // Sinking only checks whether divergence potentially exists in the + // function. For this, we rely on a trivial DivergenceInfo object. + const DivergenceInfo DI{F, TTI}; - return iterativelySinkInstructions(F, DT, LI, AA); + return iterativelySinkInstructions(F, DT, LI, AA, DI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -270,6 +287,7 @@ AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); } }; } // end anonymous namespace diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll @@ -8,8 +8,6 @@ ; behaviour. Instead, it only checks for the values that are known to ; be divergent according to the legacy analysis. -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s - ; This test contains an unstructured loop. ; +-------------- entry ----------------+ ; | | diff --git a/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll deleted file mode 100644 --- a/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s - -; Check that loop unswitch does not happen if condition is divergent. - -; CHECK-LABEL: {{^}}define amdgpu_kernel void @divergent_unswitch -; CHECK: entry: -; CHECK: icmp -; CHECK: [[IF_COND:%[a-z0-9]+]] = icmp {{.*}} 567890 -; CHECK: br label -; CHECK: br i1 [[IF_COND]] - -define amdgpu_kernel void @divergent_unswitch(i32 * nocapture %out, i32 %n) { -entry: - %cmp9 = icmp sgt i32 %n, 0 - br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup - -for.body.lr.ph: ; preds = %entry - %call = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %cmp2 = icmp eq i32 %call, 567890 - br label %for.body - -for.cond.cleanup.loopexit: ; preds = %for.inc - br label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret void - -for.body: ; preds = %for.inc, %for.body.lr.ph - %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] - br i1 %cmp2, label %if.then, label %for.inc - -if.then: ; preds = %for.body - %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.010 - store i32 %i.010, i32 * %arrayidx, align 4 - br label %for.inc - -for.inc: ; preds = %for.body, %if.then - %inc = add nuw nsw i32 %i.010, 1 - %exitcond = icmp eq i32 %inc, %n - br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body -} - -declare i32 @llvm.amdgcn.workitem.id.x() #0 - -attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/LoopUnswitch/divergent.ll b/llvm/test/Transforms/LoopUnswitch/divergent.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/divergent.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O3 -S -enable-new-pm=0 | FileCheck %s --check-prefixes=UNI +; RUN: opt < %s -O3 -S -enable-new-pm=0 -assume-always-divergent | FileCheck %s --check-prefixes=DIV + +; RUN: opt < %s -O3 -S -enable-new-pm=1 | FileCheck %s --check-prefixes=UNI +; RUN: opt < %s -O3 -S -enable-new-pm=1 -assume-always-divergent | FileCheck %s --check-prefixes=DIV + +; Check that loop unswitch does not happen if condition is divergent. + +define void @divergent_unswitch(i32 * nocapture %out, i32 %n) { +; UNI-LABEL: @divergent_unswitch( +; UNI-NEXT: entry: +; UNI-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; UNI-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; UNI: for.body.lr.ph: +; UNI-NEXT: [[CALL:%.*]] = tail call i32 @extern_func() #[[ATTR2:[0-9]+]] +; UNI-NEXT: [[CMP2:%.*]] = icmp eq i32 [[CALL]], 567890 +; UNI-NEXT: br i1 [[CMP2]], label [[FOR_BODY_US:%.*]], label [[FOR_COND_CLEANUP]] +; UNI: for.body.us: +; UNI-NEXT: [[I_010_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY_US]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; UNI-NEXT: [[TMP0:%.*]] = zext i32 [[I_010_US]] to i64 +; UNI-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP0]] +; UNI-NEXT: store i32 [[I_010_US]], i32* [[ARRAYIDX_US]], align 4 +; UNI-NEXT: [[INC_US]] = add nuw nsw i32 [[I_010_US]], 1 +; UNI-NEXT: [[EXITCOND_US:%.*]] = icmp eq i32 [[INC_US]], [[N]] +; UNI-NEXT: br i1 [[EXITCOND_US]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_US]] +; UNI: for.cond.cleanup: +; UNI-NEXT: ret void +; +; DIV-LABEL: @divergent_unswitch( +; DIV-NEXT: entry: +; DIV-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; DIV-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; DIV: for.body.lr.ph: +; DIV-NEXT: [[CALL:%.*]] = tail call i32 @extern_func() #[[ATTR2:[0-9]+]] +; DIV-NEXT: [[CMP2:%.*]] = icmp eq i32 [[CALL]], 567890 +; DIV-NEXT: br label [[FOR_BODY:%.*]] +; DIV: for.body: +; DIV-NEXT: [[I_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; DIV-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; DIV: if.then: +; DIV-NEXT: [[TMP0:%.*]] = zext i32 [[I_010]] to i64 +; DIV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP0]] +; DIV-NEXT: store i32 [[I_010]], i32* [[ARRAYIDX]], align 4 +; DIV-NEXT: br label [[FOR_INC]] +; DIV: for.inc: +; DIV-NEXT: [[INC]] = add nuw nsw i32 [[I_010]], 1 +; DIV-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; DIV-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; DIV: for.cond.cleanup: +; DIV-NEXT: ret void +; +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %call = tail call i32 @extern_func() #0 + %cmp2 = icmp eq i32 %call, 567890 + br label %for.body + +for.body: ; preds = %for.inc, %for.body.lr.ph + %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + br i1 %cmp2, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32 * %out, i32 %i.010 + store i32 %i.010, i32 * %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %inc = add nuw nsw i32 %i.010, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.inc + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +} + +declare i32 @extern_func() #0 + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Transforms/Sink/convergent.ll b/llvm/test/Transforms/Sink/convergent.ll --- a/llvm/test/Transforms/Sink/convergent.ll +++ b/llvm/test/Transforms/Sink/convergent.ll @@ -1,12 +1,25 @@ -; RUN: opt -sink -S < %s | FileCheck %s +; RUN: opt -enable-new-pm=1 -sink -S < %s | FileCheck %s -check-prefixes=CHECK,UNI +; RUN: opt -enable-new-pm=0 -sink -S < %s | FileCheck %s -check-prefixes=CHECK,UNI +; RUN: opt -enable-new-pm=0 -use-gpu-divergence-analysis -sink -S < %s | FileCheck %s -check-prefixes=CHECK,UNI -; Verify that IR sinking does not move convergent operations to -; blocks that are not control equivalent. +; RUN: opt -enable-new-pm=1 -sink -assume-always-divergent -S < %s | FileCheck %s -check-prefixes=CHECK,DIV +; RUN: opt -enable-new-pm=0 -sink -assume-always-divergent -S < %s | FileCheck %s -check-prefixes=CHECK,DIV +; RUN: opt -enable-new-pm=0 -sink -use-gpu-divergence-analysis -assume-always-divergent -S < %s | FileCheck %s -check-prefixes=CHECK,DIV -; CHECK: define i32 @foo -; CHECK: entry -; CHECK-NEXT: call i32 @bar -; CHECK-NEXT: br i1 %arg +; Verify that sinking does not move convergent operations if the +; control flow is divergent. + +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; DIV-NEXT: [[C:%.*]] = call i32 @bar() +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK-EMPTY: +; CHECK-NEXT: then: +; UNI-NEXT: [[C:%.*]] = call i32 @bar() +; CHECK-NEXT: ret i32 [[C]] +; CHECK-EMPTY: +; CHECK: end: +; CHECK-NEXT: ret i32 0 define i32 @foo(i1 %arg) { entry: