diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -25,6 +25,7 @@ class Module; class Value; class Instruction; +class LegacyDivergenceAnalysis; class Loop; class raw_ostream; class TargetTransformInfo; @@ -141,38 +142,30 @@ }; class DivergenceInfo { - Function &F; - - // If the function contains an irreducible region the divergence - // analysis can run indefinitely. We set ContainsIrreducible and no - // analysis is actually performed on the function. All values in - // this function are conservatively reported as divergent instead. - bool ContainsIrreducible; - std::unique_ptr SDA; - std::unique_ptr DA; - public: + enum InfoKind { AlwaysUniform, AlwaysDivergent, Computed }; + DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible); + // LegacyDA does not respect AssumeDivergent, so do nothing here. + DivergenceInfo(Function &F, LegacyDivergenceAnalysis *L) + : F(F), LegacyDA(L) {} + + DivergenceInfo(Function &F, InfoKind Kind); + /// Whether any divergence was detected. - bool hasDivergence() const { - return ContainsIrreducible || DA->hasDetectedDivergence(); - } + bool hasDivergence() const; /// The GPU kernel this analysis result is for const Function &getFunction() const { return F; } /// Whether \p V is divergent at its definition. - bool isDivergent(const Value &V) const { - return ContainsIrreducible || DA->isDivergent(V); - } + bool isDivergent(const Value &V) const; /// Whether \p U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use &U) const { - return ContainsIrreducible || DA->isDivergentUse(U); - } + bool isDivergentUse(const Use &U) const; /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } @@ -180,6 +173,13 @@ /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be /// divergent. bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } + +private: + Function &F; + InfoKind Kind = Computed; + const LegacyDivergenceAnalysis *LegacyDA = nullptr; + std::unique_ptr SDA; + std::unique_ptr DA; }; /// \brief Divergence analysis frontend for GPU kernels. diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -41,6 +41,9 @@ // Print all divergent branches in the function. void print(raw_ostream &OS, const Module *) const override; + /// Whether any divergence was detected. + bool hasDivergence() const; + // Returns true if V is divergent at its definition. bool isDivergent(const Value *V) const; @@ -57,6 +60,8 @@ // Keep the analysis results uptodate by removing an erased value. void removeValue(const Value *V) { DivergentValues.erase(V); } + DivergenceInfo &getDI() { return gpuDA ? *gpuDA : *DIWrapper; } + private: // Whether analysis should be performed by GPUDivergenceAnalysis. bool shouldUseGPUDivergenceAnalysis(const Function &F, @@ -65,6 +70,10 @@ // (optional) handle to new DivergenceAnalysis std::unique_ptr gpuDA; + // Wrapper to present a uniform interface at points that are + // independent of new/old pass manager. + std::unique_ptr DIWrapper; + // Stores all divergent values. DenseSet DivergentValues; diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -74,6 +74,7 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" @@ -90,6 +91,12 @@ #define DEBUG_TYPE "divergence" +static cl::opt + AssumeDivergent("gpuda-assume-always-divergent", cl::init(false), + cl::Hidden, cl::ValueDisallowed, + cl::desc("Assume that all control flow is divergent; " + "mainly useful for testing")); + DivergenceAnalysisImpl::DivergenceAnalysisImpl( const Function &F, const Loop *RegionLoop, const DominatorTree &DT, const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm) @@ -344,20 +351,64 @@ return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); } +bool DivergenceInfo::hasDivergence() const { + if (Kind == AlwaysUniform) + return false; + if (Kind == AlwaysDivergent) + return true; + if (LegacyDA) + return LegacyDA->hasDivergence(); + assert(DA); + return DA->hasDetectedDivergence(); +} + +/// Whether \p V is divergent at its definition. +bool DivergenceInfo::isDivergent(const Value &V) const { + if (Kind == AlwaysUniform) + return false; + if (Kind == AlwaysDivergent) + return true; + if (LegacyDA) + return LegacyDA->isDivergent(&V); + assert(DA); + return DA->isDivergent(V); +} + +/// Whether \p U is divergent. Uses of a uniform value can be divergent. +bool DivergenceInfo::isDivergentUse(const Use &U) const { + if (Kind == AlwaysUniform) + return false; + if (Kind == AlwaysDivergent) + return true; + if (LegacyDA) + return LegacyDA->isDivergentUse(&U); + assert(DA); + return DA->isDivergentUse(U); +} + DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible) - : F(F), ContainsIrreducible(false) { + : F(F) { + if (AssumeDivergent) { + Kind = AlwaysDivergent; + return; + } + if (!KnownReducible) { using RPOTraversal = ReversePostOrderTraversal; RPOTraversal FuncRPOT(&F); - if (containsIrreducibleCFG(FuncRPOT, LI)) { - ContainsIrreducible = true; - return; - } + KnownReducible = + !containsIrreducibleCFG(FuncRPOT, LI); + } + + if (!KnownReducible) { + Kind = AlwaysDivergent; + return; } + SDA = std::make_unique(DT, PDT, LI); DA = std::make_unique(F, nullptr, DT, LI, *SDA, /* LCSSA */ false); @@ -377,14 +428,23 @@ DA->compute(); } +DivergenceInfo::DivergenceInfo(Function &F, InfoKind Kind) + : F(F), Kind(AssumeDivergent ? AlwaysDivergent : Kind) {} + AnalysisKey DivergenceAnalysis::Key; DivergenceAnalysis::Result DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult(F); + + // Trivially return an empty analysis if the target does not have + // divergence. + if (!TTI.hasBranchDivergence()) + return DivergenceInfo(F, DivergenceInfo::AlwaysUniform); + auto &DT = AM.getResult(F); auto &PDT = AM.getResult(F); auto &LI = AM.getResult(F); - auto &TTI = AM.getResult(F); return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false); } diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -302,6 +302,7 @@ AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.setPreservesAll(); } @@ -319,15 +320,19 @@ } bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { - auto *TTIWP = getAnalysisIfAvailable(); - if (TTIWP == nullptr) - return false; + auto &TTI = getAnalysis().getTTI(F); + DIWrapper = std::make_unique(F, this); - TargetTransformInfo &TTI = TTIWP->getTTI(F); // Fast path: if the target does not have branch divergence, we do not mark // any branch as divergent. - if (!TTI.hasBranchDivergence()) + if (!TTI.hasBranchDivergence()) { + // Create a DivergenceInfo object to capture whether the user + // enabled AssumeDivergent. + if (UseGPUDA) + gpuDA = + std::make_unique(F, DivergenceInfo::AlwaysUniform); return false; + } DivergentValues.clear(); DivergentUses.clear(); @@ -341,7 +346,6 @@ auto &LI = getAnalysis().getLoopInfo(); gpuDA = std::make_unique(F, DT, PDT, LI, TTI, /* KnownReducible = */ true); - } else { // run LLVM's existing DivergenceAnalysis DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); @@ -356,6 +360,13 @@ return false; } +bool LegacyDivergenceAnalysis::hasDivergence() const { + if (gpuDA) { + return gpuDA->hasDivergence(); + } + return !DivergentValues.empty(); +} + bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { if (gpuDA) { return gpuDA->isDivergent(*V); @@ -371,7 +382,7 @@ } void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { - if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) + if (!hasDivergence()) return; const Function *F = nullptr; diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp --- a/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/llvm/lib/Transforms/Scalar/Sink.cpp @@ -14,6 +14,8 @@ #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -33,6 +35,7 @@ STATISTIC(NumSinkIter, "Number of sinking iterations"); static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA, + DivergenceInfo &DI, SmallPtrSetImpl &Stores) { if (Inst->mayWriteToMemory()) { @@ -52,10 +55,11 @@ return false; if (auto *Call = dyn_cast(Inst)) { - // Convergent operations cannot be made control-dependent on additional - // values. - if (Call->isConvergent()) - return false; + // Convergent operations cannot be sunk across divergent branches. + if (Call->isConvergent()) { + if (DI.hasDivergence()) + return false; + } for (Instruction *S : Stores) if (isModSet(AA.getModRefInfo(S, Call))) @@ -106,7 +110,8 @@ /// instruction out of its current block into a successor. static bool SinkInstruction(Instruction *Inst, SmallPtrSetImpl &Stores, - DominatorTree &DT, LoopInfo &LI, AAResults &AA) { + DominatorTree &DT, LoopInfo &LI, AAResults &AA, + DivergenceInfo &DI) { // Don't sink static alloca instructions. CodeGen assumes allocas outside the // entry block are dynamically sized stack objects. @@ -115,7 +120,7 @@ return false; // Check if it's safe to move the instruction. - if (!isSafeToMove(Inst, AA, Stores)) + if (!isSafeToMove(Inst, AA, DI, Stores)) return false; // FIXME: This should include support for sinking instructions within the @@ -177,7 +182,7 @@ } static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, - AAResults &AA) { + AAResults &AA, DivergenceInfo &DI) { // Can't sink anything out of a block that has less than two successors. if (BB.getTerminator()->getNumSuccessors() <= 1) return false; @@ -205,7 +210,7 @@ if (Inst->isDebugOrPseudoInst()) continue; - if (SinkInstruction(Inst, Stores, DT, LI, AA)) { + if (SinkInstruction(Inst, Stores, DT, LI, AA, DI)) { ++NumSunk; MadeChange = true; } @@ -217,7 +222,8 @@ } static bool iterativelySinkInstructions(Function &F, DominatorTree &DT, - LoopInfo &LI, AAResults &AA) { + LoopInfo &LI, AAResults &AA, + DivergenceInfo &DI) { bool MadeChange, EverMadeChange = false; do { @@ -225,7 +231,7 @@ LLVM_DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n"); // Process all basic blocks. for (BasicBlock &I : F) - MadeChange |= ProcessBlock(I, DT, LI, AA); + MadeChange |= ProcessBlock(I, DT, LI, AA, DI); EverMadeChange |= MadeChange; NumSinkIter++; } while (MadeChange); @@ -237,8 +243,9 @@ auto &DT = AM.getResult(F); auto &LI = AM.getResult(F); auto &AA = AM.getResult(F); + auto &DI = AM.getResult(F); - if (!iterativelySinkInstructions(F, DT, LI, AA)) + if (!iterativelySinkInstructions(F, DT, LI, AA, DI)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -258,8 +265,9 @@ auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); auto &AA = getAnalysis().getAAResults(); + auto &DI = getAnalysis().getDI(); - return iterativelySinkInstructions(F, DT, LI, AA); + return iterativelySinkInstructions(F, DT, LI, AA, DI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -268,6 +276,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll @@ -8,8 +8,6 @@ ; behaviour. Instead, it only checks for the values that are known to ; be divergent according to the legacy analysis. -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s - ; This test contains an unstructured loop. ; +-------------- entry ----------------+ ; | | diff --git a/llvm/test/Transforms/Sink/convergent.ll b/llvm/test/Transforms/Sink/convergent.ll --- a/llvm/test/Transforms/Sink/convergent.ll +++ b/llvm/test/Transforms/Sink/convergent.ll @@ -1,12 +1,24 @@ -; RUN: opt -sink -S < %s | FileCheck %s +; RUN: opt -enable-new-pm=1 -sink -S < %s | FileCheck %s -check-prefixes=CHECK,UNI +; RUN: opt -enable-new-pm=0 -sink -S < %s | FileCheck %s -check-prefixes=CHECK,UNI +; RUN: opt -enable-new-pm=0 -use-gpu-divergence-analysis -sink -S < %s | FileCheck %s -check-prefixes=CHECK,UNI -; Verify that IR sinking does not move convergent operations to -; blocks that are not control equivalent. +; RUN: opt -enable-new-pm=1 -sink -gpuda-assume-always-divergent -S < %s | FileCheck %s -check-prefixes=CHECK,DIV +; RUN: opt -enable-new-pm=0 -sink -use-gpu-divergence-analysis -gpuda-assume-always-divergent -S < %s | FileCheck %s -check-prefixes=CHECK,DIV -; CHECK: define i32 @foo -; CHECK: entry -; CHECK-NEXT: call i32 @bar -; CHECK-NEXT: br i1 %arg +; Verify that sinking does not move convergent operations if the +; control flow is divergent. + +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; DIV-NEXT: [[C:%.*]] = call i32 @bar() +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK-EMPTY: +; CHECK-NEXT: then: +; UNI-NEXT: [[C:%.*]] = call i32 @bar() +; CHECK-NEXT: ret i32 [[C]] +; CHECK-EMPTY: +; CHECK: end: +; CHECK-NEXT: ret i32 0 define i32 @foo(i1 %arg) { entry: