diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -34,7 +34,7 @@ /// This analysis propagates divergence in a data-parallel context from sources /// of divergence to all users. It requires reducible CFGs. All assignments /// should be in SSA form. -class DivergenceAnalysis { +class DivergenceAnalysisImpl { public: /// \brief This instance will analyze the whole function \p F or the loop \p /// RegionLoop. @@ -43,9 +43,9 @@ /// Otherwise the whole function is analyzed. /// \param IsLCSSAForm whether the analysis may assume that the IR in the /// region in in LCSSA form. - DivergenceAnalysis(const Function &F, const Loop *RegionLoop, - const DominatorTree &DT, const LoopInfo &LI, - SyncDependenceAnalysis &SDA, bool IsLCSSAForm); + DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop, + const DominatorTree &DT, const LoopInfo &LI, + SyncDependenceAnalysis &SDA, bool IsLCSSAForm); /// \brief The loop that defines the analyzed region (if any). const Loop *getRegionLoop() const { return RegionLoop; } @@ -82,8 +82,6 @@ /// divergent. bool isDivergentUse(const Use &U) const; - void print(raw_ostream &OS, const Module *) const; - private: /// \brief Mark \p Term as divergent and push all Instructions that become /// divergent as a result on the worklist. @@ -152,28 +150,39 @@ std::vector Worklist; }; -/// \brief Divergence analysis frontend for GPU kernels. -class GPUDivergenceAnalysis { - SyncDependenceAnalysis SDA; - DivergenceAnalysis DA; +class DivergenceInfo { + Function &F; + + // If the function contains an irreducible region the divergence + // analysis can run indefinitely. We set ContainsIrreducible and no + // analysis is actually performed on the function. All values in + // this function are conservatively reported as divergent instead. + bool ContainsIrreducible; + std::unique_ptr SDA; + std::unique_ptr DA; public: - /// Runs the divergence analysis on @F, a GPU kernel - GPUDivergenceAnalysis(Function &F, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI, - const TargetTransformInfo &TTI); + DivergenceInfo(Function &F, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const TargetTransformInfo &TTI, bool KnownReducible); /// Whether any divergence was detected. - bool hasDivergence() const { return DA.hasDetectedDivergence(); } + bool hasDivergence() const { + return ContainsIrreducible || DA->hasDetectedDivergence(); + } /// The GPU kernel this analysis result is for - const Function &getFunction() const { return DA.getFunction(); } + const Function &getFunction() const { return F; } /// Whether \p V is divergent at its definition. - bool isDivergent(const Value &V) const; + bool isDivergent(const Value &V) const { + return ContainsIrreducible || DA->isDivergent(V); + } /// Whether \p U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use &U) const; + bool isDivergentUse(const Use &U) const { + return ContainsIrreducible || DA->isDivergentUse(U); + } /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } @@ -181,11 +190,32 @@ /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be /// divergent. bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } +}; - /// Print all divergent values in the kernel. - void print(raw_ostream &OS, const Module *) const; +/// \brief Divergence analysis frontend for GPU kernels. +class DivergenceAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + + static AnalysisKey Key; + +public: + using Result = DivergenceInfo; + + /// Runs the divergence analysis on @F, a GPU kernel + Result run(Function &F, FunctionAnalysisManager &AM); }; +/// Printer pass to dump divergence analysis results. +struct DivergenceAnalysisPrinterPass + : public PassInfoMixin { + DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + +private: + raw_ostream &OS; +}; // class DivergenceAnalysisPrinterPass + } // namespace llvm #endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -20,8 +20,8 @@ #include namespace llvm { +class DivergenceInfo; class Function; -class GPUDivergenceAnalysis; class Module; class raw_ostream; class TargetTransformInfo; @@ -63,7 +63,7 @@ const TargetTransformInfo &TTI) const; // (optional) handle to new DivergenceAnalysis - std::unique_ptr gpuDA; + std::unique_ptr gpuDA; // Stores all divergent values. DenseSet DivergentValues; diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -31,10 +31,10 @@ // Ralf Karrenberg and Sebastian Hack // CC '12 // -// This DivergenceAnalysis implementation is generic in the sense that it does +// This implementation is generic in the sense that it does // not itself identify original sources of divergence. // Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and -// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence +// (DivergenceAnalysis) for functions, identify the sources of divergence // (e.g., special variables that hold the thread ID or the iteration variable). // // The generic implementation propagates divergence to variables that are data @@ -61,7 +61,7 @@ // The sync dependence detection (which branch induces divergence in which join // points) is implemented in the SyncDependenceAnalysis. // -// The current DivergenceAnalysis implementation has the following limitations: +// The current implementation has the following limitations: // 1. intra-procedural. It conservatively considers the arguments of a // non-kernel-entry function and the return value of a function call as // divergent. @@ -73,6 +73,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" @@ -87,16 +88,15 @@ using namespace llvm; -#define DEBUG_TYPE "divergence-analysis" +#define DEBUG_TYPE "divergence" -// class DivergenceAnalysis -DivergenceAnalysis::DivergenceAnalysis( +DivergenceAnalysisImpl::DivergenceAnalysisImpl( const Function &F, const Loop *RegionLoop, const DominatorTree &DT, const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm) : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA), IsLCSSAForm(IsLCSSAForm) {} -bool DivergenceAnalysis::markDivergent(const Value &DivVal) { +bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) { if (isAlwaysUniform(DivVal)) return false; assert(isa(DivVal) || isa(DivVal)); @@ -104,12 +104,12 @@ return DivergentValues.insert(&DivVal).second; } -void DivergenceAnalysis::addUniformOverride(const Value &UniVal) { +void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) { UniformOverrides.insert(&UniVal); } -bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock, - const Value &Val) const { +bool DivergenceAnalysisImpl::isTemporalDivergent( + const BasicBlock &ObservingBlock, const Value &Val) const { const auto *Inst = dyn_cast(&Val); if (!Inst) return false; @@ -125,15 +125,15 @@ return false; } -bool DivergenceAnalysis::inRegion(const Instruction &I) const { +bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const { return I.getParent() && inRegion(*I.getParent()); } -bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const { +bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const { return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB); } -void DivergenceAnalysis::pushUsers(const Value &V) { +void DivergenceAnalysisImpl::pushUsers(const Value &V) { const auto *I = dyn_cast(&V); if (I && I->isTerminator()) { @@ -166,8 +166,8 @@ return I; } -void DivergenceAnalysis::analyzeTemporalDivergence(const Instruction &I, - const Loop &OuterDivLoop) { +void DivergenceAnalysisImpl::analyzeTemporalDivergence( + const Instruction &I, const Loop &OuterDivLoop) { if (isAlwaysUniform(I)) return; if (isDivergent(I)) @@ -188,8 +188,8 @@ // marks all users of loop-carried values of the loop headed by LoopHeader as // divergent -void DivergenceAnalysis::analyzeLoopExitDivergence(const BasicBlock &DivExit, - const Loop &OuterDivLoop) { +void DivergenceAnalysisImpl::analyzeLoopExitDivergence( + const BasicBlock &DivExit, const Loop &OuterDivLoop) { // All users are in immediate exit blocks if (IsLCSSAForm) { for (const auto &Phi : DivExit.phis()) { @@ -242,8 +242,8 @@ } while (!TaintStack.empty()); } -void DivergenceAnalysis::propagateLoopExitDivergence(const BasicBlock &DivExit, - const Loop &InnerDivLoop) { +void DivergenceAnalysisImpl::propagateLoopExitDivergence( + const BasicBlock &DivExit, const Loop &InnerDivLoop) { LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n"); // Find outer-most loop that does not contain \p DivExit @@ -265,7 +265,7 @@ // this is a divergent join point - mark all phi nodes as divergent and push // them onto the stack. -void DivergenceAnalysis::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { +void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName() << "\n"); @@ -287,7 +287,7 @@ } } -void DivergenceAnalysis::analyzeControlDivergence(const Instruction &Term) { +void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) { LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName() << "\n"); @@ -310,7 +310,7 @@ } } -void DivergenceAnalysis::compute() { +void DivergenceAnalysisImpl::compute() { // Initialize worklist. auto DivValuesCopy = DivergentValues; for (const auto *DivVal : DivValuesCopy) { @@ -330,63 +330,82 @@ } } -bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const { +bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const { return UniformOverrides.contains(&V); } -bool DivergenceAnalysis::isDivergent(const Value &V) const { +bool DivergenceAnalysisImpl::isDivergent(const Value &V) const { return DivergentValues.contains(&V); } -bool DivergenceAnalysis::isDivergentUse(const Use &U) const { +bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const { Value &V = *U.get(); Instruction &I = *cast(U.getUser()); return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); } -void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { - if (DivergentValues.empty()) - return; - // iterate instructions using instructions() to ensure a deterministic order. - for (auto &I : instructions(F)) { - if (isDivergent(I)) - OS << "DIVERGENT:" << I << '\n'; +DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, + const PostDominatorTree &PDT, const LoopInfo &LI, + const TargetTransformInfo &TTI, + bool KnownReducible) + : F(F), ContainsIrreducible(false) { + if (!KnownReducible) { + using RPOTraversal = ReversePostOrderTraversal; + RPOTraversal FuncRPOT(&F); + if (containsIrreducibleCFG(FuncRPOT, LI)) { + ContainsIrreducible = true; + return; + } } -} - -// class GPUDivergenceAnalysis -GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F, - const DominatorTree &DT, - const PostDominatorTree &PDT, - const LoopInfo &LI, - const TargetTransformInfo &TTI) - : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, /* LCSSA */ false) { + SDA = std::make_unique(DT, PDT, LI); + DA = std::make_unique(F, nullptr, DT, LI, *SDA, + /* LCSSA */ false); for (auto &I : instructions(F)) { if (TTI.isSourceOfDivergence(&I)) { - DA.markDivergent(I); + DA->markDivergent(I); } else if (TTI.isAlwaysUniform(&I)) { - DA.addUniformOverride(I); + DA->addUniformOverride(I); } } for (auto &Arg : F.args()) { if (TTI.isSourceOfDivergence(&Arg)) { - DA.markDivergent(Arg); + DA->markDivergent(Arg); } } - DA.compute(); + DA->compute(); } -bool GPUDivergenceAnalysis::isDivergent(const Value &val) const { - return DA.isDivergent(val); -} +AnalysisKey DivergenceAnalysis::Key; -bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const { - return DA.isDivergentUse(use); +DivergenceAnalysis::Result +DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + auto &DT = AM.getResult(F); + auto &PDT = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &TTI = AM.getResult(F); + + return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false); } -void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const { - OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n"; - DA.print(OS, mod); - OS << "}\n"; +PreservedAnalyses +DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { + auto &DI = FAM.getResult(F); + OS << "'Divergence Analysis' for function '" << F.getName() << "':\n"; + if (DI.hasDivergence()) { + for (auto &Arg : F.args()) { + OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " "); + OS << Arg << "\n"; + } + for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + auto &BB = *BI; + OS << "\n " << BB.getName() << ":\n"; + for (auto &I : BB.instructionsWithoutDebug()) { + OS << (DI.isDivergent(I) ? "DIVERGENT: " : " "); + OS << I << "\n"; + } + } + } + return PreservedAnalyses::all(); } diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -339,7 +339,8 @@ if (shouldUseGPUDivergenceAnalysis(F, TTI)) { // run the new GPU divergence analysis auto &LI = getAnalysis().getLoopInfo(); - gpuDA = std::make_unique(F, DT, PDT, LI, TTI); + gpuDA = std::make_unique(F, DT, PDT, LI, TTI, + /* KnownReducible = */ true); } else { // run LLVM's existing DivergenceAnalysis diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/Delinearization.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -172,6 +172,7 @@ TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) FUNCTION_ANALYSIS("verify", VerifierAnalysis()) FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) +FUNCTION_ANALYSIS("divergence", DivergenceAnalysis()) #ifndef FUNCTION_ALIAS_ANALYSIS #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ @@ -273,6 +274,7 @@ FUNCTION_PASS("print", BlockFrequencyPrinterPass(dbgs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(dbgs())) FUNCTION_PASS("print", DependenceAnalysisPrinterPass(dbgs())) +FUNCTION_PASS("print", DivergenceAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", DelinearizationPrinterPass(dbgs())) diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-LABEL: for function 'readfirstlane': define amdgpu_kernel void @readfirstlane() { @@ -39,7 +40,7 @@ ret i32 %sgpr } -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'asm_mixed_sgpr_vgpr': +; CHECK-LABEL: Divergence Analysis' for function 'asm_mixed_sgpr_vgpr': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1, $2", "=s,=v,v"(i32 %divergent) ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst define i32 @test1(i32* %ptr, i32 %val) #0 { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll @@ -1,10 +1,11 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s declare i32 @gf2(i32) declare i32 @gf1(i32) define void @tw1(i32 addrspace(4)* noalias nocapture readonly %A, i32 addrspace(4)* noalias nocapture %B) local_unnamed_addr #2 { -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'tw1': +; CHECK: Divergence Analysis' for function 'tw1': ; CHECK: DIVERGENT: i32 addrspace(4)* %A ; CHECK: DIVERGENT: i32 addrspace(4)* %B entry: diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll @@ -1,8 +1,9 @@ -; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ; Tests control flow intrinsics that should be treated as uniform -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break': +; CHECK: Divergence Analysis' for function 'test_if_break': ; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 ; CHECK-NOT: DIVERGENT ; CHECK: ret void @@ -14,7 +15,7 @@ ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if': +; CHECK: Divergence Analysis' for function 'test_if': ; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 ; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) ; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 @@ -33,7 +34,7 @@ } ; The result should still be treated as divergent, even with a uniform source. -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform': +; CHECK: Divergence Analysis' for function 'test_if_uniform': ; CHECK-NOT: DIVERGENT ; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) ; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 @@ -51,7 +52,7 @@ ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform': +; CHECK: Divergence Analysis' for function 'test_loop_uniform': ; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask) define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) { entry: @@ -61,7 +62,7 @@ ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else': +; CHECK: Divergence Analysis' for function 'test_else': ; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) ; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0 ; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1 @@ -77,7 +78,7 @@ } ; This case is probably always broken -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask': +; CHECK: Divergence Analysis' for function 'test_else_divergent_mask': ; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) ; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 ; CHECK-NOT: DIVERGENT diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll @@ -1,7 +1,7 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge' +; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_diverge' entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %cond.var = icmp slt i32 %tid, 0 @@ -22,7 +22,7 @@ } define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd' +; CHECK-LABEL: 'Divergence Analysis' for function 'hidden_loop_ipd' entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %cond.var = icmp slt i32 %tid, 0 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll @@ -1,9 +1,10 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; divergent loop (H
, B) ; the divergent join point in %exit is obscured by uniform control joining in %X define amdgpu_kernel void @hidden_loop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_loop_diverge' ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -45,7 +46,7 @@ ; divergent loop (H
, B) ; the phi nodes in X and Y don't actually receive divergent values define amdgpu_kernel void @unobserved_loop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unobserved_loop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'unobserved_loop_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -86,7 +87,7 @@ ; the inner loop has no exit to top level. ; the outer loop becomes divergent as its exiting branch in C is control-dependent on the inner loop's divergent loop exit in D. define amdgpu_kernel void @hidden_nestedloop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_nestedloop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_nestedloop_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -137,7 +138,7 @@ ; the outer loop has no immediately divergent exiting edge. ; the inner exiting edge is exiting to top-level through the outer loop causing both to become divergent. define amdgpu_kernel void @hidden_doublebreak_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_doublebreak_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_doublebreak_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -179,7 +180,7 @@ ; divergent loop (G
, L) contained inside a uniform loop (H
, B, G, L , D) define amdgpu_kernel void @hidden_containedloop_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_containedloop_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'hidden_containedloop_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll @@ -1,50 +1,52 @@ -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx908 -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print' -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print' -disable-output %s 2>&1 | FileCheck %s ; Make sure nothing crashes on targets with or without AGPRs -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_virtreg_output': ; CHECK-NOT: DIVERGENT define i32 @inline_asm_1_sgpr_virtreg_output() { %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"() ret i32 %sgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_sgpr_physreg_output': ; CHECK-NOT: DIVERGENT define i32 @inline_asm_1_sgpr_physreg_output() { %sgpr = call i32 asm "s_mov_b32 s0, 0", "={s0}"() ret i32 %sgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_virtreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"() define i32 @inline_asm_1_vgpr_virtreg_output() { %vgpr = call i32 asm "v_mov_b32 $0, 0", "=v"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_vgpr_physreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"() define i32 @inline_asm_1_vgpr_physreg_output() { %vgpr = call i32 asm "v_mov_b32 v0, 0", "={v0}"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_virtreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "; def $0", "=a"() define i32 @inline_asm_1_agpr_virtreg_output() { %vgpr = call i32 asm "; def $0", "=a"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_1_agpr_physreg_output': ; CHECK: DIVERGENT: %vgpr = call i32 asm "; def a0", "={a0}"() define i32 @inline_asm_1_agpr_physreg_output() { %vgpr = call i32 asm "; def a0", "={a0}"() ret i32 %vgpr } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_2_sgpr_virtreg_output': ; CHECK-NOT: DIVERGENT define void @inline_asm_2_sgpr_virtreg_output() { %asm = call { i32, i32 } asm "; def $0, $1", "=s,=s"() @@ -56,7 +58,7 @@ } ; One output is SGPR, one is VGPR. Infer divergent for the aggregate, but uniform on the SGPR extract -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_sgpr_vgpr_virtreg_output': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=s,=v"() ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 1 @@ -69,7 +71,7 @@ ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output': +; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s"() ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1 @@ -83,7 +85,7 @@ } ; Have an extra output constraint -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint': +; CHECK: Divergence Analysis' for function 'multi_sgpr_inline_asm_output_input_constraint': ; CHECK-NOT: DIVERGENT define void @multi_sgpr_inline_asm_output_input_constraint() { %asm = call { i32, i32 } asm "; def $0, $1", "=s,=s,s"(i32 1234) @@ -94,7 +96,7 @@ ret void } -; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint': +; CHECK: Divergence Analysis' for function 'inline_asm_vgpr_sgpr_virtreg_output_input_constraint': ; CHECK: DIVERGENT: %asm = call { i32, i32 } asm "; def $0, $1", "=v,=s,v"(i32 1234) ; CHECK-NEXT: DIVERGENT: %vgpr = extractvalue { i32, i32 } %asm, 0 ; CHECK-NEXT: {{^[ \t]+}}%sgpr = extractvalue { i32, i32 } %asm, 1 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: for function 'interp_p1_f16' ; CHECK: DIVERGENT: %p1 = call float @llvm.amdgcn.interp.p1.f16 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible.ll @@ -1,4 +1,14 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -passes='print' -disable-output 2>&1 | FileCheck %s + +; NOTE: The new pass manager does not fall back on legacy divergence +; analysis even when the function contains an irreducible loop. The +; (new) divergence analysis conservatively reports all values as +; divergent. This test does not check for this conservative +; behaviour. Instead, it only checks for the values that are known to +; be divergent according to the legacy analysis. + +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; This test contains an unstructured loop. ; +-------------- entry ----------------+ @@ -14,7 +24,7 @@ ; if (i3 == 5) // divergent ; because sync dependent on (tid / i3). define i32 @unstructured_loop(i1 %entry_cond) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop' +; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop' entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: bb3: ; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,6 +1,7 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps': +; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_ps': ; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0 ; CHECK-NOT: DIVERGENT ; CHECK: DIVERGENT: <2 x i32> %arg3 @@ -12,7 +13,7 @@ ret void } -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_kernel': +; CHECK-LABEL: Divergence Analysis' for function 'test_amdgpu_kernel': ; CHECK-NOT: %arg0 ; CHECK-NOT: %arg1 ; CHECK-NOT: %arg2 @@ -24,7 +25,7 @@ ret void } -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_c': +; CHECK-LABEL: Divergence Analysis' for function 'test_c': ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,4 +1,5 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32( define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,4 +1,5 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll @@ -1,4 +1,5 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2 ; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-LABEL: 'test1': ; CHECK-NEXT: DIVERGENT: i32 %bound diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: bb6: ; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll @@ -1,8 +1,9 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; temporal-divergent use of value carried by divergent loop define amdgpu_kernel void @temporal_diverge(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -26,7 +27,7 @@ ; temporal-divergent use of value carried by divergent loop inside a top-level loop define amdgpu_kernel void @temporal_diverge_inloop(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_inloop': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_inloop': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -58,7 +59,7 @@ ; temporal-uniform use of a valud, definition and users are carried by a surrounding divergent loop define amdgpu_kernel void @temporal_uniform_indivloop(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_uniform_indivloop': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_uniform_indivloop': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -90,7 +91,7 @@ ; temporal-divergent use of value carried by divergent loop, user is inside sibling loop define amdgpu_kernel void @temporal_diverge_loopuser(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. @@ -120,7 +121,7 @@ ; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'temporal_diverge_loopuser_nested': +; CHECK-LABEL: Divergence Analysis' for function 'temporal_diverge_loopuser_nested': ; CHECK-NOT: DIVERGENT: %uni. ; CHECK-NOT: DIVERGENT: br i1 %uni. diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: bb2: ; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll @@ -1,4 +1,5 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %tmp = cmpxchg volatile define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll @@ -1,10 +1,11 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" define i32 @daorder(i32 %n) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'daorder' +; CHECK-LABEL: Divergence Analysis' for function 'daorder' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cond = icmp slt i32 %tid, 0 diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll @@ -1,11 +1,12 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) define i32 @no_diverge(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'no_diverge' +; CHECK-LABEL: Divergence Analysis' for function 'no_diverge' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cond = icmp slt i32 %n, 0 @@ -27,7 +28,7 @@ ; c = b; ; return c; // c is divergent: sync dependent define i32 @sync(i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'sync' +; CHECK-LABEL: Divergence Analysis' for function 'sync' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() %cond = icmp slt i32 %tid, 5 @@ -48,7 +49,7 @@ ; // c here is divergent because it is sync dependent on threadIdx.x >= 5 ; return c; define i32 @mixed(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'mixed' +; CHECK-LABEL: Divergence Analysis' for function 'mixed' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() %cond = icmp slt i32 %tid, 5 @@ -73,7 +74,7 @@ ; We conservatively treats all parameters of a __device__ function as divergent. define i32 @device(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'device' +; CHECK-LABEL: Divergence Analysis' for function 'device' ; CHECK: DIVERGENT: i32 %n ; CHECK: DIVERGENT: i32 %a ; CHECK: DIVERGENT: i32 %b @@ -98,7 +99,7 @@ ; ; The i defined in the loop is used outside. define i32 @loop() { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'loop' +; CHECK-LABEL: Divergence Analysis' for function 'loop' entry: %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid() br label %loop @@ -120,7 +121,7 @@ ; Same as @loop, but the loop is in the LCSSA form. define i32 @lcssa() { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'lcssa' +; CHECK-LABEL: Divergence Analysis' for function 'lcssa' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() br label %loop diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll @@ -1,10 +1,11 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_diverge' +; CHECK-LABEL: Divergence Analysis' for function 'hidden_diverge' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cond.var = icmp slt i32 %tid, 0 diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll @@ -1,4 +1,12 @@ -; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s +; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; NOTE: The new pass manager does not fall back on legacy divergence +; analysis even when the function contains an irreducible loop. The +; (new) divergence analysis conservatively reports all values as +; divergent. This test does not check for this conservative +; behaviour. Instead, it only checks for the values that are known to +; be divergent according to the legacy analysis. target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" @@ -17,7 +25,7 @@ ; if (i3 == 5) // divergent ; because sync dependent on (tid / i3). define i32 @unstructured_loop(i1 %entry_cond) { -; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'unstructured_loop' +; CHECK-LABEL: Divergence Analysis' for function 'unstructured_loop' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2 diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst define i32 @test1(i32* %ptr, i32 %val) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -enable-new-pm=0 -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s ; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps': ; CHECK: DIVERGENT: [4 x <16 x i8>] addrspace(4)* %arg0 diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,4 +1,4 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32( define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,4 +1,4 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ; Test that we consider loads from flat and private addrspaces to be divergent. diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s ; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2 ; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4 diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s ; CHECK-LABEL: 'test1': ; CHECK-NEXT: DIVERGENT: i32 %bound diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence | FileCheck %s ; CHECK: DIVERGENT: %tmp = cmpxchg volatile define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -enable-new-pm=0 -analyze -divergence %s | FileCheck %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -analyze -divergence | FileCheck %s +; RUN: opt %s -enable-new-pm=0 -analyze -divergence | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -496,7 +496,7 @@ "safe-stack", "cost-model", "codegenprepare", "interleaved-load-combine", "unreachableblockelim", "verify-safepoint-ir", - "divergence", "atomic-expand", + "atomic-expand", "hardware-loops", "type-promotion", "mve-tail-predication", "interleaved-access", "global-merge", "pre-isel-intrinsic-lowering", diff --git a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp b/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp --- a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp +++ b/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp @@ -38,7 +38,7 @@ return nullptr; } -// We use this fixture to ensure that we clean up DivergenceAnalysis before +// We use this fixture to ensure that we clean up DivergenceAnalysisImpl before // deleting the PassManager. class DivergenceAnalysisTest : public testing::Test { protected: @@ -54,21 +54,21 @@ DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {} - DivergenceAnalysis buildDA(Function &F, bool IsLCSSA) { + DivergenceAnalysisImpl buildDA(Function &F, bool IsLCSSA) { DT.reset(new DominatorTree(F)); PDT.reset(new PostDominatorTree(F)); LI.reset(new LoopInfo(*DT)); SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI)); - return DivergenceAnalysis(F, nullptr, *DT, *LI, *SDA, IsLCSSA); + return DivergenceAnalysisImpl(F, nullptr, *DT, *LI, *SDA, IsLCSSA); } void runWithDA( Module &M, StringRef FuncName, bool IsLCSSA, - function_ref + function_ref Test) { auto *F = M.getFunction(FuncName); ASSERT_NE(F, nullptr) << "Could not find " << FuncName; - DivergenceAnalysis DA = buildDA(*F, IsLCSSA); + DivergenceAnalysisImpl DA = buildDA(*F, IsLCSSA); Test(*F, *LI, DA); } }; @@ -82,7 +82,7 @@ BasicBlock *BB = BasicBlock::Create(Context, "entry", F); ReturnInst::Create(Context, nullptr, BB); - DivergenceAnalysis DA = buildDA(*F, false); + DivergenceAnalysisImpl DA = buildDA(*F, false); // Whole function region EXPECT_EQ(DA.getRegionLoop(), nullptr); @@ -135,7 +135,7 @@ Err, C); Function *F = M->getFunction("f_1"); - DivergenceAnalysis DA = buildDA(*F, false); + DivergenceAnalysisImpl DA = buildDA(*F, false); EXPECT_FALSE(DA.hasDetectedDivergence()); auto ItArg = F->arg_begin(); @@ -189,7 +189,7 @@ Err, C); Function *F = M->getFunction("f_lcssa"); - DivergenceAnalysis DA = buildDA(*F, true); + DivergenceAnalysisImpl DA = buildDA(*F, true); EXPECT_FALSE(DA.hasDetectedDivergence()); auto ItArg = F->arg_begin();