Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -316,6 +316,7 @@ /// createStackProtectorPass - This pass adds stack protectors to functions. /// FunctionPass *createStackProtectorPass(); + extern char &StackProtectorID; /// createMachineVerifierPass - This pass verifies cenerated machine code /// instructions for correctness. Index: lib/CodeGen/StackProtector.cpp =================================================================== --- lib/CodeGen/StackProtector.cpp +++ lib/CodeGen/StackProtector.cpp @@ -68,6 +68,7 @@ "Insert stack protectors", false, true) FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); } +char &llvm::StackProtectorID = StackProtector::ID; void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Utils.h" #include #include #include @@ -86,6 +87,10 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + // LCSSA generates a phi node in an exit block so that use divergence + // info can be passed to isel in the cases where the value is uniform + // inside the loop but is non-uniform for uses outside the loop. + AU.addRequiredID(LCSSAID); AU.addRequired(); SelectionDAGISel::getAnalysisUsage(AU); } @@ -292,6 +297,7 @@ "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) Index: lib/Transforms/Utils/LCSSA.cpp =================================================================== --- lib/Transforms/Utils/LCSSA.cpp +++ lib/Transforms/Utils/LCSSA.cpp @@ -48,6 +48,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/CodeGen/Passes.h" using namespace llvm; #define DEBUG_TYPE "lcssa" @@ -446,6 +447,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreservedID(StackProtectorID); // This is needed to perform LCSSA verification inside LPPassManager AU.addRequired(); Index: test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll =================================================================== --- test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -17,14 +17,15 @@ ; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x ; CHECK-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 +; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 +; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 ; CHECK-NEXT: BB0_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec ; CHECK-NEXT: s_cbranch_vccz BB0_5 ; CHECK-NEXT: ; %bb.2: ; %endif1 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 @@ -39,26 +40,26 @@ ; CHECK-NEXT: BB0_4: ; %Flow1 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_branch BB0_6 -; CHECK-NEXT: BB0_5: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: ; implicit-def: $vgpr1 -; CHECK-NEXT: BB0_6: ; %Flow +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: BB0_5: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] +; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] ; CHECK-NEXT: s_cbranch_execnz BB0_1 -; CHECK-NEXT: ; %bb.7: ; %Flow2 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: ; %bb.6: ; %Flow2 +; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; this is the divergent branch with the condition not marked as divergent ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] -; CHECK-NEXT: ; mask branch BB0_9 -; CHECK-NEXT: BB0_8: ; %if1 +; CHECK-NEXT: ; mask branch BB0_8 +; CHECK-NEXT: BB0_7: ; %if1 ; CHECK-NEXT: v_sqrt_f32_e32 v1, v0 -; CHECK-NEXT: BB0_9: ; %endloop +; CHECK-NEXT: BB0_8: ; %endloop ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm