diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -54,7 +54,8 @@ private: // Whether analysis should be performed by GPUDivergenceAnalysis. - bool shouldUseGPUDivergenceAnalysis(const Function &F) const; + bool shouldUseGPUDivergenceAnalysis(const Function &F, + const TargetTransformInfo &TTI) const; // (optional) handle to new DivergenceAnalysis std::unique_ptr gpuDA; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -342,6 +342,10 @@ /// branches. bool hasBranchDivergence() const; + /// Return true if the target prefers to use GPU divergence analysis to + /// replace the legacy version. + bool useGPUDivergenceAnalysis() const; + /// Returns whether V is a source of divergence. /// /// This function provides the target-dependent information for @@ -1198,6 +1202,7 @@ virtual int getUserCost(const User *U, ArrayRef Operands) = 0; virtual bool hasBranchDivergence() = 0; + virtual bool useGPUDivergenceAnalysis() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isAlwaysUniform(const Value *V) = 0; virtual unsigned getFlatAddressSpace() = 0; @@ -1452,6 +1457,7 @@ return Impl.getUserCost(U, Operands); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } + bool useGPUDivergenceAnalysis() override { return Impl.useGPUDivergenceAnalysis(); } bool isSourceOfDivergence(const Value *V) override { return Impl.isSourceOfDivergence(V); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -152,6 +152,8 @@ bool hasBranchDivergence() { return false; } + bool useGPUDivergenceAnalysis() { return false; } + bool isSourceOfDivergence(const Value *V) { return false; } bool isAlwaysUniform(const Value *V) { return false; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -207,6 +207,8 @@ bool hasBranchDivergence() { return false; } + bool useGPUDivergenceAnalysis() { return false; } + bool isSourceOfDivergence(const Value *V) { return false; } bool isAlwaysUniform(const Value *V) { return false; } diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -301,14 +301,13 @@ void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); - if (UseGPUDA) - AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis( - const Function &F) const { - if (!UseGPUDA) + const Function &F, const TargetTransformInfo &TTI) const { + if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis())) return false; // GPUDivergenceAnalysis requires a reducible CFG. @@ -337,7 +336,7 @@ auto &DT = getAnalysis().getDomTree(); auto &PDT = getAnalysis().getPostDomTree(); - if (shouldUseGPUDivergenceAnalysis(F)) { + if (shouldUseGPUDivergenceAnalysis(F, TTI)) { // run the new GPU divergence analysis auto &LI = getAnalysis().getLoopInfo(); gpuDA = std::make_unique(F, DT, PDT, LI, TTI); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -212,6 +212,10 @@ return TTIImpl->hasBranchDivergence(); } +bool TargetTransformInfo::useGPUDivergenceAnalysis() const { + return TTIImpl->useGPUDivergenceAnalysis(); +} + bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -136,6 +136,7 @@ HasFP32Denormals(ST->hasFP32Denormals(F)) { } bool hasBranchDivergence() { return true; } + bool useGPUDivergenceAnalysis() const; void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -69,6 +69,11 @@ cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), cl::init(150), cl::Hidden); +static cl::opt UseLegacyDA( + "amdgpu-use-legacy-divergence-analysis", + cl::desc("Enable legacy divergence analysis for AMDGPU"), + cl::init(false), cl::Hidden); + static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, unsigned Depth = 0) { const Instruction *I = dyn_cast(Cond); @@ -601,6 +606,11 @@ } } +/// \returns true if the new GPU divergence analysis is enabled. +bool GCNTTIImpl::useGPUDivergenceAnalysis() const { + return !UseLegacyDA; +} + /// \returns true if the result of the value could potentially be /// different across workitems in a wavefront. bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst define i32 @test1(i32* %ptr, i32 %val) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s ; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps': ; CHECK: DIVERGENT: diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,4 +1,4 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32( define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,4 +1,4 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s +;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s ; Test that we consider loads from flat and private addrspaces to be divergent. diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s ; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2 ; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4 diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s ; CHECK-LABEL: 'test1': ; CHECK-NEXT: DIVERGENT: i32 %bound diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s +; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s ; CHECK: DIVERGENT: %tmp = cmpxchg volatile define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 { diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence %s | FileCheck %s +; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0