Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -811,6 +811,7 @@ if (!LateCFGStructurize) { addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions } + addPass(createLCSSAPass()); addPass(createSinkingPass()); addPass(createAMDGPUAnnotateUniformValues()); if (!LateCFGStructurize) { Index: test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll =================================================================== --- test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -17,14 +17,15 @@ ; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x ; CHECK-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 +; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 +; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 ; CHECK-NEXT: BB0_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec ; CHECK-NEXT: s_cbranch_vccz BB0_5 ; CHECK-NEXT: ; %bb.2: ; %endif1 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 @@ -39,26 +40,26 @@ ; CHECK-NEXT: BB0_4: ; %Flow1 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_branch BB0_6 -; CHECK-NEXT: BB0_5: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: ; implicit-def: $vgpr1 -; CHECK-NEXT: BB0_6: ; %Flow +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: BB0_5: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] +; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] ; CHECK-NEXT: s_cbranch_execnz BB0_1 -; CHECK-NEXT: ; %bb.7: ; %Flow2 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: ; %bb.6: ; %Flow2 +; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; this is the divergent branch with the condition not marked as divergent ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] -; CHECK-NEXT: ; mask branch BB0_9 -; CHECK-NEXT: BB0_8: ; %if1 +; CHECK-NEXT: ; mask branch BB0_8 +; CHECK-NEXT: BB0_7: ; %if1 ; CHECK-NEXT: v_sqrt_f32_e32 v1, v0 -; CHECK-NEXT: BB0_9: ; %endloop +; CHECK-NEXT: BB0_8: ; %endloop ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm Index: test/CodeGen/AMDGPU/multilevel-break.ll =================================================================== --- test/CodeGen/AMDGPU/multilevel-break.ll +++ test/CodeGen/AMDGPU/multilevel-break.ll @@ -47,13 +47,16 @@ ; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]] ; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]] ; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_andn2_b64 [[BREAK_OUTER2:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER2]], exec +; GCN: s_and_b64 [[LEFT_INNER]], [[BREAK_OUTER]], exec +; GCN: s_or_b64 [[BREAK_OUTER2]], [[BREAK_OUTER2]], [[LEFT_INNER]] ; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] ; GCN: s_andn2_b64 exec, exec, [[TMP0]] ; GCN: s_cbranch_execnz [[INNER_LOOP]] ; GCN: ; %Flow2 ; GCN: s_or_b64 exec, exec, [[TMP0]] -; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]] +; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER2]] ; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] ; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] ; GCN: s_andn2_b64 exec, exec, [[TMP1]]