diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -25,6 +25,7 @@ class Module; class Value; class Instruction; +class LegacyDivergenceAnalysis; class Loop; class raw_ostream; class TargetTransformInfo; @@ -147,7 +148,9 @@ // analysis can run indefinitely. We set ContainsIrreducible and no // analysis is actually performed on the function. All values in // this function are conservatively reported as divergent instead. - bool ContainsIrreducible; + bool ContainsIrreducible = false; + + const LegacyDivergenceAnalysis *LegacyDA = nullptr; std::unique_ptr SDA; std::unique_ptr DA; @@ -156,23 +159,22 @@ const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible); + DivergenceInfo(Function &F, LegacyDivergenceAnalysis *L) + : F(F), LegacyDA(L) {} + + DivergenceInfo(Function &F) : F(F) {} + /// Whether any divergence was detected. - bool hasDivergence() const { - return ContainsIrreducible || DA->hasDetectedDivergence(); - } + bool hasDivergence() const; /// The GPU kernel this analysis result is for const Function &getFunction() const { return F; } /// Whether \p V is divergent at its definition. - bool isDivergent(const Value &V) const { - return ContainsIrreducible || DA->isDivergent(V); - } + bool isDivergent(const Value &V) const; /// Whether \p U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use &U) const { - return ContainsIrreducible || DA->isDivergentUse(U); - } + bool isDivergentUse(const Use &U) const; /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -41,6 +41,9 @@ // Print all divergent branches in the function. void print(raw_ostream &OS, const Module *) const override; + /// Whether any divergence was detected. + bool hasDivergence() const; + // Returns true if V is divergent at its definition. bool isDivergent(const Value *V) const; @@ -57,6 +60,8 @@ // Keep the analysis results uptodate by removing an erased value. void removeValue(const Value *V) { DivergentValues.erase(V); } + DivergenceInfo &getDI() { return gpuDA ? *gpuDA : *DIWrapper; } + private: // Whether analysis should be performed by GPUDivergenceAnalysis. bool shouldUseGPUDivergenceAnalysis(const Function &F, @@ -65,6 +70,9 @@ // (optional) handle to new DivergenceAnalysis std::unique_ptr gpuDA; + // wrapper to implement new interface + std::unique_ptr DIWrapper; + // Stores all divergent values. DenseSet DivergentValues; diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -74,6 +74,7 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" @@ -344,11 +345,43 @@ return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); } +bool DivergenceInfo::hasDivergence() const { + if (LegacyDA) + return LegacyDA->hasDivergence(); + if (ContainsIrreducible) + return true; + if (!DA) + return false; + return DA->hasDetectedDivergence(); +} + +/// Whether \p V is divergent at its definition. +bool DivergenceInfo::isDivergent(const Value &V) const { + if (LegacyDA) + return LegacyDA->isDivergent(&V); + if (ContainsIrreducible) + return true; + if (!DA) + return false; + return DA->isDivergent(V); +} + +/// Whether \p U is divergent. Uses of a uniform value can be divergent. +bool DivergenceInfo::isDivergentUse(const Use &U) const { + if (LegacyDA) + return LegacyDA->isDivergentUse(&U); + if (ContainsIrreducible) + return true; + if (!DA) + return false; + return DA->isDivergentUse(U); +} + DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible) - : F(F), ContainsIrreducible(false) { + : F(F) { if (!KnownReducible) { using RPOTraversal = ReversePostOrderTraversal; RPOTraversal FuncRPOT(&F); @@ -381,10 +414,16 @@ DivergenceAnalysis::Result DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult(F); + + // Trivially return an empty analysis if the target does not have + // divergence. + if (!TTI.hasBranchDivergence()) + return DivergenceInfo(F); + auto &DT = AM.getResult(F); auto &PDT = AM.getResult(F); auto &LI = AM.getResult(F); - auto &TTI = AM.getResult(F); return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false); } diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -302,6 +302,7 @@ AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.setPreservesAll(); } @@ -319,15 +320,14 @@ } bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { - auto *TTIWP = getAnalysisIfAvailable(); - if (TTIWP == nullptr) - return false; + auto &TTI = getAnalysis().getTTI(F); - TargetTransformInfo &TTI = TTIWP->getTTI(F); // Fast path: if the target does not have branch divergence, we do not mark // any branch as divergent. - if (!TTI.hasBranchDivergence()) + if (!TTI.hasBranchDivergence()) { + DIWrapper = std::make_unique(F, this); return false; + } DivergentValues.clear(); DivergentUses.clear(); @@ -347,6 +347,7 @@ DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); DP.populateWithSourcesOfDivergence(); DP.propagate(); + DIWrapper = std::make_unique(F, this); } LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName() @@ -356,6 +357,13 @@ return false; } +bool LegacyDivergenceAnalysis::hasDivergence() const { + if (gpuDA) { + return gpuDA->hasDivergence(); + } + return !DivergentValues.empty(); +} + bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const { if (gpuDA) { return gpuDA->isDivergent(*V); @@ -371,7 +379,7 @@ } void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { - if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) + if (!hasDivergence()) return; const Function *F = nullptr; diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp --- a/llvm/lib/Transforms/Scalar/Sink.cpp +++ b/llvm/lib/Transforms/Scalar/Sink.cpp @@ -14,6 +14,8 @@ #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -33,6 +35,7 @@ STATISTIC(NumSinkIter, "Number of sinking iterations"); static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA, + DivergenceInfo &DI, SmallPtrSetImpl &Stores) { if (Inst->mayWriteToMemory()) { @@ -52,10 +55,11 @@ return false; if (auto *Call = dyn_cast(Inst)) { - // Convergent operations cannot be made control-dependent on additional - // values. - if (Call->isConvergent()) - return false; + // Convergent operations cannot be sunk across divergent branches. + if (Call->isConvergent()) { + if (DI.hasDivergence()) + return false; + } for (Instruction *S : Stores) if (isModSet(AA.getModRefInfo(S, Call))) @@ -106,7 +110,8 @@ /// instruction out of its current block into a successor. static bool SinkInstruction(Instruction *Inst, SmallPtrSetImpl &Stores, - DominatorTree &DT, LoopInfo &LI, AAResults &AA) { + DominatorTree &DT, LoopInfo &LI, AAResults &AA, + DivergenceInfo &DI) { // Don't sink static alloca instructions. CodeGen assumes allocas outside the // entry block are dynamically sized stack objects. @@ -115,7 +120,7 @@ return false; // Check if it's safe to move the instruction. - if (!isSafeToMove(Inst, AA, Stores)) + if (!isSafeToMove(Inst, AA, DI, Stores)) return false; // FIXME: This should include support for sinking instructions within the @@ -177,7 +182,7 @@ } static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, - AAResults &AA) { + AAResults &AA, DivergenceInfo &DI) { // Can't sink anything out of a block that has less than two successors. if (BB.getTerminator()->getNumSuccessors() <= 1) return false; @@ -205,7 +210,7 @@ if (Inst->isDebugOrPseudoInst()) continue; - if (SinkInstruction(Inst, Stores, DT, LI, AA)) { + if (SinkInstruction(Inst, Stores, DT, LI, AA, DI)) { ++NumSunk; MadeChange = true; } @@ -217,7 +222,8 @@ } static bool iterativelySinkInstructions(Function &F, DominatorTree &DT, - LoopInfo &LI, AAResults &AA) { + LoopInfo &LI, AAResults &AA, + DivergenceInfo &DI) { bool MadeChange, EverMadeChange = false; do { @@ -225,7 +231,7 @@ LLVM_DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n"); // Process all basic blocks. for (BasicBlock &I : F) - MadeChange |= ProcessBlock(I, DT, LI, AA); + MadeChange |= ProcessBlock(I, DT, LI, AA, DI); EverMadeChange |= MadeChange; NumSinkIter++; } while (MadeChange); @@ -237,8 +243,9 @@ auto &DT = AM.getResult(F); auto &LI = AM.getResult(F); auto &AA = AM.getResult(F); + auto &DI = AM.getResult(F); - if (!iterativelySinkInstructions(F, DT, LI, AA)) + if (!iterativelySinkInstructions(F, DT, LI, AA, DI)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -258,8 +265,9 @@ auto &DT = getAnalysis().getDomTree(); auto &LI = getAnalysis().getLoopInfo(); auto &AA = getAnalysis().getAAResults(); + auto &DI = getAnalysis().getDI(); - return iterativelySinkInstructions(F, DT, LI, AA); + return iterativelySinkInstructions(F, DT, LI, AA, DI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -268,6 +276,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll @@ -8,8 +8,6 @@ ; behaviour. Instead, it only checks for the values that are known to ; be divergent according to the legacy analysis. -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s - ; This test contains an unstructured loop. ; +-------------- entry ----------------+ ; | | diff --git a/llvm/test/Transforms/Sink/convergent-divergent.ll b/llvm/test/Transforms/Sink/convergent-divergent.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Sink/convergent-divergent.ll @@ -0,0 +1,31 @@ +; REQUIRES: amdgpu-registered-target +; RUN: opt -sink -mtriple=amdgcn -S < %s | FileCheck %s +; RUN: opt -enable-new-pm=0 -sink -mtriple=amdgcn -S < %s | FileCheck %s + +; Verify that IR sinking does not move convergent operations to +; blocks that are not control equivalent. + +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = call i32 @bar() +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK-EMPTY: +; CHECK-NEXT: then: +; CHECK-NEXT: ret i32 [[C]] +; CHECK-EMPTY: +; CHECK: end: +; CHECK-NEXT: ret i32 0 + +define i32 @foo(i1 %arg) { +entry: + %c = call i32 @bar() nounwind readonly convergent + br i1 %arg, label %then, label %end + +then: + ret i32 %c + +end: + ret i32 0 +} + +declare i32 @bar() nounwind readonly convergent diff --git a/llvm/test/Transforms/Sink/convergent-uniform.ll b/llvm/test/Transforms/Sink/convergent-uniform.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Sink/convergent-uniform.ll @@ -0,0 +1,30 @@ +; RUN: opt -sink -S < %s | FileCheck %s +; RUN: opt -enable-new-pm=0 -sink -S < %s | FileCheck %s -check-prefixes=CHECK + +; The default target does not have branch divergence. Verify that IR +; sinking moves convergent operations too. + +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK-EMPTY: +; CHECK-NEXT: then: +; CHECK-NEXT: [[C:%.*]] = call i32 @bar() +; CHECK-NEXT: ret i32 [[C]] +; CHECK-EMPTY: +; CHECK: end: +; CHECK-NEXT: ret i32 0 + +define i32 @foo(i1 %arg) { +entry: + %c = call i32 @bar() nounwind readonly convergent + br i1 %arg, label %then, label %end + +then: + ret i32 %c + +end: + ret i32 0 +} + +declare i32 @bar() nounwind readonly convergent diff --git a/llvm/test/Transforms/Sink/convergent.ll b/llvm/test/Transforms/Sink/convergent.ll deleted file mode 100644 --- a/llvm/test/Transforms/Sink/convergent.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: opt -sink -S < %s | FileCheck %s - -; Verify that IR sinking does not move convergent operations to -; blocks that are not control equivalent. - -; CHECK: define i32 @foo -; CHECK: entry -; CHECK-NEXT: call i32 @bar -; CHECK-NEXT: br i1 %arg - -define i32 @foo(i1 %arg) { -entry: - %c = call i32 @bar() nounwind readonly convergent - br i1 %arg, label %then, label %end - -then: - ret i32 %c - -end: - ret i32 0 -} - -declare i32 @bar() nounwind readonly convergent