Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -418,6 +418,7 @@ let Size = 12; let hasSideEffects = 1; let IsNeverUniform = 1; + let isConvergent = 1; } def SI_ELSE : CFPseudoInstSI < @@ -426,6 +427,7 @@ let Size = 12; let hasSideEffects = 1; let IsNeverUniform = 1; + let isConvergent = 1; } def SI_WATERFALL_LOOP : CFPseudoInstSI < @@ -434,6 +436,7 @@ let Size = 8; let isBranch = 1; let Defs = []; + let isConvergent = 1; } def SI_LOOP : CFPseudoInstSI < @@ -443,6 +446,7 @@ let isBranch = 1; let hasSideEffects = 1; let IsNeverUniform = 1; + let isConvergent = 1; } } // End isTerminator = 1 @@ -453,6 +457,7 @@ let isAsCheapAsAMove = 1; let isReMaterializable = 1; let hasSideEffects = 1; + let isConvergent = 1; let isNotDuplicable = 1; // Not a hard requirement, see long comments above for details. let mayLoad = 1; // FIXME: Should not need memory flags let mayStore = 1; @@ -464,6 +469,7 @@ let isNotDuplicable = 1; // Not a hard requirement, see long comments above for details. let isAsCheapAsAMove = 1; let isReMaterializable = 1; + let isConvergent = 1; } // Branch to the early termination block of the shader if SCC is 0. Index: llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll +++ llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll @@ -18,12 +18,12 @@ ; GFX10-NEXT: ; in Loop: Header=BB0_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3 -; GFX10-NEXT: s_and_b32 s0, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s2, s0, s2 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_execz .LBB0_5 ; GFX10-NEXT: .LBB0_2: ; %bb4 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX10-NEXT: s_and_b32 s0, exec_lo, vcc_lo +; GFX10-NEXT: s_or_b32 s2, s0, s2 ; GFX10-NEXT: s_and_saveexec_b32 s3, s1 ; GFX10-NEXT: s_cbranch_execz .LBB0_1 ; GFX10-NEXT: ; %bb.3: ; in Loop: Header=BB0_2 Depth=1 Index: llvm/test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -1010,8 +1010,8 @@ ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 ; GCN-NEXT: s_mov_b32 s8, 0 ; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GCN-NEXT: s_mov_b64 s[12:13], 0 ; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: s_mov_b64 s[10:11], 0 ; GCN-NEXT: s_branch .LBB5_3 ; GCN-NEXT: .LBB5_1: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 @@ -1019,20 +1019,21 @@ ; GCN-NEXT: .LBB5_2: ; %bb10 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[14:15] -; GCN-NEXT: s_and_b64 s[6:7], exec, s[4:5] -; GCN-NEXT: s_or_b64 s[12:13], s[6:7], s[12:13] -; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: s_mov_b64 s[6:7], s[12:13] +; GCN-NEXT: s_mov_b64 s[10:11], 0 ; GCN-NEXT: s_andn2_b64 exec, exec, s[12:13] ; GCN-NEXT: s_cbranch_execz .LBB5_7 ; GCN-NEXT: .LBB5_3: ; %bb1 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: s_and_b64 s[10:11], exec, vcc -; GCN-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] -; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] +; GCN-NEXT: s_and_b64 s[12:13], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[12:13], s[12:13], s[6:7] +; GCN-NEXT: s_and_b64 s[14:15], exec, vcc +; GCN-NEXT: s_or_b64 s[10:11], s[14:15], s[10:11] +; GCN-NEXT: s_andn2_b64 exec, exec, s[10:11] ; GCN-NEXT: s_cbranch_execnz .LBB5_3 ; GCN-NEXT: ; %bb.4: ; %bb2 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 -; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: s_or_b64 exec, exec, s[10:11] ; GCN-NEXT: s_mov_b32 s9, s8 ; GCN-NEXT: s_mov_b32 s10, s8 ; GCN-NEXT: s_mov_b32 s11, s8 Index: llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -10,26 +10,25 @@ ; GCN-LABEL: needs_and: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s10, 1 +; GCN-NEXT: s_mov_b32 s8, 1 ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_branch .LBB0_2 ; GCN-NEXT: .LBB0_1: ; %endif ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 -; GCN-NEXT: s_or_b64 exec, exec, s[8:9] -; GCN-NEXT: s_and_b64 s[4:5], exec, vcc -; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] -; GCN-NEXT: s_add_i32 s10, s10, 1 +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_add_i32 s8, s8, 1 ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB0_4 ; GCN-NEXT: .LBB0_2: ; %loop ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_cmp_gt_u32_e64 s[4:5], s10, v0 -; GCN-NEXT: v_cmp_le_u32_e32 vcc, s10, v0 -; GCN-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0 +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s8, v0 +; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7] +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: ; %bb.3: ; %then ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 -; GCN-NEXT: s_nop 1 +; GCN-NEXT: s_nop 0 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4 ; GCN-NEXT: s_branch .LBB0_1 ; GCN-NEXT: .LBB0_4: ; %loopexit @@ -108,13 +107,13 @@ ; GCN-NEXT: .LBB2_1: ; %endif ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] -; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] -; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] ; GCN-NEXT: s_add_i32 s10, s10, 1 ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB2_4 ; GCN-NEXT: .LBB2_2: ; %loop ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GCN-NEXT: s_cbranch_execz .LBB2_1 Index: llvm/test/CodeGen/AMDGPU/multilevel-break.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -49,37 +49,38 @@ ; GCN-NEXT: s_branch .LBB0_2 ; GCN-NEXT: .LBB0_1: ; %loop.exit.guard ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-NEXT: s_and_b64 s[2:3], exec, s[2:3] -; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] -; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_or_b64 exec, exec, s[2:3] +; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] +; GCN-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execz .LBB0_6 ; GCN-NEXT: .LBB0_2: ; %LOOP.outer ; GCN-NEXT: ; =>This Loop Header: Depth=1 ; GCN-NEXT: ; Child Loop BB0_4 Depth 2 ; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7 -; GCN-NEXT: ; implicit-def: $sgpr2_sgpr3 -; GCN-NEXT: s_mov_b64 s[4:5], 0 +; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GCN-NEXT: s_mov_b64 s[2:3], 0 ; GCN-NEXT: s_branch .LBB0_4 ; GCN-NEXT: .LBB0_3: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] ; GCN-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; GCN-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] -; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GCN-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] +; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5] +; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: .LBB0_4: ; %LOOP ; GCN-NEXT: ; Parent Loop BB0_2 Depth=1 ; GCN-NEXT: ; => This Inner Loop Header: Depth=2 ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, v0, v4 -; GCN-NEXT: s_or_b64 s[2:3], s[2:3], exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec ; GCN-NEXT: s_or_b64 s[6:7], s[6:7], exec ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GCN-NEXT: s_cbranch_execz .LBB0_3 ; GCN-NEXT: ; %bb.5: ; %ENDIF ; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GCN-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, v5, v0 ; GCN-NEXT: s_andn2_b64 s[6:7], s[6:7], exec ; GCN-NEXT: s_and_b64 s[10:11], vcc, exec Index: llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll +++ llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll @@ -13,12 +13,12 @@ ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 ; GCN-NEXT: s_waitcnt_depctr 0xffe3 ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GCN-NEXT: s_and_b32 s8, exec_lo, s6 -; GCN-NEXT: s_or_b32 s7, s8, s7 ; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s7 ; GCN-NEXT: s_cbranch_execz .LBB0_5 ; GCN-NEXT: .LBB0_2: ; %bb ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_and_b32 s8, exec_lo, s6 +; GCN-NEXT: s_or_b32 s7, s8, s7 ; GCN-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: ; %bb.3: ; %bb1