diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -16,8 +16,8 @@ #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDGPUMemoryUtils.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/UniformityAnalysis.h" #include "llvm/IR/InstVisitor.h" #include "llvm/InitializePasses.h" @@ -29,7 +29,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, public InstVisitor { - LegacyDivergenceAnalysis *DA; + UniformityInfo *UA; MemorySSA *MSSA; AliasAnalysis *AA; bool isEntryFunc; @@ -55,7 +55,7 @@ return "AMDGPU Annotate Uniform Values"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); @@ -69,7 +69,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) -INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, @@ -78,13 +78,13 @@ char AMDGPUAnnotateUniformValues::ID = 0; void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { - if (DA->isUniform(&I)) + if (!UA->hasDivergentTerminator(*I.getParent())) setUniformMetadata(&I); } void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); - if (!DA->isUniform(Ptr)) + if (!UA->isUniform(Ptr)) return; Instruction *PtrI = dyn_cast(Ptr); if (PtrI) @@ -108,7 +108,7 @@ if (skipFunction(F)) return false; - DA = &getAnalysis(); + UA = &getAnalysis().getUniformityInfo(); MSSA = &getAnalysis().getMSSA(); AA = &getAnalysis().getAAResults(); isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv()); diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -20,46 +20,48 @@ ; ISA: ; %bb.0: ; %start ; ISA-NEXT: v_readfirstlane_b32 s0, v0 ; ISA-NEXT: s_mov_b32 m0, s0 -; ISA-NEXT: s_mov_b32 s8, 0 +; ISA-NEXT: s_mov_b32 s10, 0 ; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x -; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; ISA-NEXT: s_mov_b64 s[0:1], 0 +; ISA-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0 +; ISA-NEXT: s_mov_b64 s[2:3], 0 +; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7 ; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5 -; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3 ; ISA-NEXT: s_branch .LBB0_3 ; ISA-NEXT: .LBB0_1: ; %Flow1 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; ISA-NEXT: s_or_b64 exec, exec, s[6:7] -; ISA-NEXT: s_mov_b64 s[6:7], 0 +; ISA-NEXT: s_or_b64 exec, exec, s[8:9] +; ISA-NEXT: s_mov_b64 s[8:9], 0 ; ISA-NEXT: .LBB0_2: ; %Flow ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; ISA-NEXT: s_and_b64 s[10:11], exec, s[4:5] -; ISA-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1] -; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec -; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] -; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1] +; ISA-NEXT: s_and_b64 s[12:13], exec, s[6:7] +; ISA-NEXT: s_or_b64 s[2:3], s[12:13], s[2:3] +; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; ISA-NEXT: s_and_b64 s[8:9], s[8:9], exec +; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; ISA-NEXT: s_andn2_b64 exec, exec, s[2:3] ; ISA-NEXT: s_cbranch_execz .LBB0_6 ; ISA-NEXT: .LBB0_3: ; %loop ; ISA-NEXT: ; =>This Inner Loop Header: Depth=1 -; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec -; ISA-NEXT: s_cmp_lt_u32 s8, 32 -; ISA-NEXT: s_mov_b64 s[6:7], -1 -; ISA-NEXT: s_cbranch_scc0 .LBB0_2 +; ISA-NEXT: s_cmp_lt_u32 s10, 32 +; ISA-NEXT: s_cselect_b64 s[12:13], -1, 0 +; ISA-NEXT: s_mov_b64 s[8:9], -1 +; ISA-NEXT: s_and_b64 vcc, exec, s[12:13] +; ISA-NEXT: s_or_b64 s[6:7], s[6:7], exec +; ISA-NEXT: s_cbranch_vccz .LBB0_2 ; ISA-NEXT: ; %bb.4: ; %endif1 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; ISA-NEXT: s_mov_b64 s[4:5], -1 -; ISA-NEXT: s_and_saveexec_b64 s[6:7], vcc +; ISA-NEXT: s_mov_b64 s[6:7], -1 +; ISA-NEXT: s_and_saveexec_b64 s[8:9], s[0:1] ; ISA-NEXT: s_cbranch_execz .LBB0_1 ; ISA-NEXT: ; %bb.5: ; %endif2 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; ISA-NEXT: s_add_i32 s8, s8, 1 -; ISA-NEXT: s_xor_b64 s[4:5], exec, -1 +; ISA-NEXT: s_add_i32 s10, s10, 1 +; ISA-NEXT: s_xor_b64 s[6:7], exec, -1 ; ISA-NEXT: s_branch .LBB0_1 ; ISA-NEXT: .LBB0_6: ; %Flow2 -; ISA-NEXT: s_or_b64 exec, exec, s[0:1] +; ISA-NEXT: s_or_b64 exec, exec, s[2:3] ; ISA-NEXT: v_mov_b32_e32 v1, 0 -; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] +; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[4:5] ; ISA-NEXT: ; %bb.7: ; %if1 ; ISA-NEXT: v_sqrt_f32_e32 v1, v0 ; ISA-NEXT: ; %bb.8: ; %endloop diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -78,13 +78,15 @@ ; GCN-O0-NEXT: Detect single entry single exit regions ; GCN-O0-NEXT: Region Pass Manager ; GCN-O0-NEXT: Structurize control flow -; GCN-O0-NEXT: Post-Dominator Tree Construction -; GCN-O0-NEXT: Natural Loop Information -; GCN-O0-NEXT: Legacy Divergence Analysis +; GCN-O0-NEXT: Cycle Info Analysis +; GCN-O0-NEXT: Uniform Info Analysis ; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O0-NEXT: Function Alias Analysis Results ; GCN-O0-NEXT: Memory SSA ; GCN-O0-NEXT: AMDGPU Annotate Uniform Values +; GCN-O0-NEXT: Natural Loop Information +; GCN-O0-NEXT: Post-Dominator Tree Construction +; GCN-O0-NEXT: Legacy Divergence Analysis ; GCN-O0-NEXT: SI annotate control flow ; GCN-O0-NEXT: Post-Dominator Tree Construction ; GCN-O0-NEXT: Legacy Divergence Analysis @@ -270,13 +272,15 @@ ; GCN-O1-NEXT: Detect single entry single exit regions ; GCN-O1-NEXT: Region Pass Manager ; GCN-O1-NEXT: Structurize control flow -; GCN-O1-NEXT: Post-Dominator Tree Construction -; GCN-O1-NEXT: Natural Loop Information -; GCN-O1-NEXT: Legacy Divergence Analysis +; GCN-O1-NEXT: Cycle Info Analysis +; GCN-O1-NEXT: Uniform Info Analysis ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Memory SSA ; GCN-O1-NEXT: AMDGPU Annotate Uniform Values +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Legacy Divergence Analysis ; GCN-O1-NEXT: SI annotate control flow ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Legacy Divergence Analysis @@ -566,13 +570,15 @@ ; GCN-O1-OPTS-NEXT: Detect single entry single exit regions ; GCN-O1-OPTS-NEXT: Region Pass Manager ; GCN-O1-OPTS-NEXT: Structurize control flow -; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction -; GCN-O1-OPTS-NEXT: Natural Loop Information -; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis +; GCN-O1-OPTS-NEXT: Cycle Info Analysis +; GCN-O1-OPTS-NEXT: Uniform Info Analysis ; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-OPTS-NEXT: Function Alias Analysis Results ; GCN-O1-OPTS-NEXT: Memory SSA ; GCN-O1-OPTS-NEXT: AMDGPU Annotate Uniform Values +; GCN-O1-OPTS-NEXT: Natural Loop Information +; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction +; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis ; GCN-O1-OPTS-NEXT: SI annotate control flow ; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Legacy Divergence Analysis @@ -870,13 +876,15 @@ ; GCN-O2-NEXT: Detect single entry single exit regions ; GCN-O2-NEXT: Region Pass Manager ; GCN-O2-NEXT: Structurize control flow -; GCN-O2-NEXT: Post-Dominator Tree Construction -; GCN-O2-NEXT: Natural Loop Information -; GCN-O2-NEXT: Legacy Divergence Analysis +; GCN-O2-NEXT: Cycle Info Analysis +; GCN-O2-NEXT: Uniform Info Analysis ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: AMDGPU Annotate Uniform Values +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Legacy Divergence Analysis ; GCN-O2-NEXT: SI annotate control flow ; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Legacy Divergence Analysis @@ -1187,13 +1195,15 @@ ; GCN-O3-NEXT: Detect single entry single exit regions ; GCN-O3-NEXT: Region Pass Manager ; GCN-O3-NEXT: Structurize control flow -; GCN-O3-NEXT: Post-Dominator Tree Construction -; GCN-O3-NEXT: Natural Loop Information -; GCN-O3-NEXT: Legacy Divergence Analysis +; GCN-O3-NEXT: Cycle Info Analysis +; GCN-O3-NEXT: Uniform Info Analysis ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: AMDGPU Annotate Uniform Values +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Legacy Divergence Analysis ; GCN-O3-NEXT: SI annotate control flow ; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Legacy Divergence Analysis