diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3343,6 +3343,14 @@ if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; + // Fourth: Modify an existing instruction + if (Condition->hasOneUse()) { + if (auto *CI = dyn_cast(Condition)) { + CI->setPredicate(CI->getInversePredicate()); + return Condition; + } + } + // Last option: Create a new instruction auto *Inverted = BinaryOperator::CreateNot(Condition, Condition->getName() + ".inv"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -139,8 +139,10 @@ ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB4_4 -; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: s_cbranch_scc0 .LBB4_2 +; CHECK-NEXT: .LBB4_1: ; %bb12 +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: .LBB4_2: ; %bb2 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12 @@ -153,15 +155,13 @@ ; CHECK-NEXT: s_mov_b32 s4, -1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 -; CHECK-NEXT: s_cbranch_vccnz .LBB4_3 -; CHECK-NEXT: ; %bb.2: ; %bb7 +; CHECK-NEXT: s_cbranch_vccnz .LBB4_4 +; CHECK-NEXT: ; %bb.3: ; %bb7 ; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: .LBB4_3: ; %bb8 +; CHECK-NEXT: .LBB4_4: ; %bb8 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB4_5 -; CHECK-NEXT: .LBB4_4: ; %bb12 -; CHECK-NEXT: s_setpc_b64 s[30:31] -; CHECK-NEXT: .LBB4_5: ; %bb11 +; CHECK-NEXT: s_cbranch_scc1 .LBB4_1 +; CHECK-NEXT: ; %bb.5: ; %bb11 ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0 ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen ; CHECK-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -838,7 +838,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -3118,7 +3118,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -824,7 +824,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -3072,7 +3072,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -759,7 +759,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -1641,7 +1641,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -750,7 +750,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -2181,7 +2181,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -227,30 +227,31 @@ ; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch: ; GCN: s_cmp_eq_u32 -; GCN: s_cbranch_scc{{[0-1]}} [[BB2:.LBB[0-9]+_[0-9]+]] +; GCN: s_cbranch_scc{{[0-1]}} [[BB1:.LBB[0-9]+_[0-9]+]] ; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} ; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}} -; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295 -; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32 +; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB4:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295 +; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB4]]-[[POST_GETPC]])>>32 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}} -; GCN: [[BB2]]: ; %bb3 -; GCN: v_nop_e64 -; GCN: v_nop_e64 -; GCN: v_nop_e64 -; GCN: v_nop_e64 -; GCN: ;;#ASMEND - -; GCN: [[BB3]]: +; GCN: [[BB1]]: ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 ; GCN: buffer_store_dword [[BB2_K]] ; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63 ; GCN: buffer_store_dword [[BB4_K]] ; GCN: s_endpgm -; GCN-NEXT: .Lfunc_end{{[0-9]+}}: + +; GCN: [[BB4]]: ; %bb3 +; GCN: v_nop_e64 +; GCN: v_nop_e64 +; GCN: v_nop_e64 +; GCN: v_nop_e64 +; GCN: ;;#ASMEND + +; GCN: .Lfunc_end{{[0-9]+}}: define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %arg1) { bb0: %tmp = icmp ne i32 %arg1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll --- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll @@ -1502,7 +1502,7 @@ ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshr_b32 s5, s4, 16 ; SI-NEXT: s_cmp_lg_u32 s5, 0 -; SI-NEXT: s_cbranch_scc0 .LBB14_2 +; SI-NEXT: s_cbranch_scc0 .LBB14_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s10, -1 @@ -1510,22 +1510,22 @@ ; SI-NEXT: s_mov_b32 s9, s3 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 ; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: s_cbranch_execz .LBB14_3 -; SI-NEXT: s_branch .LBB14_4 -; SI-NEXT: .LBB14_2: -; SI-NEXT: s_mov_b64 s[2:3], -1 -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: .LBB14_3: ; %if +; SI-NEXT: s_cbranch_execnz .LBB14_3 +; SI-NEXT: .LBB14_2: ; %if ; SI-NEXT: s_and_b32 s2, s4, 0xffff ; SI-NEXT: s_bcnt1_i32_b32 s2, s2 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s2 -; SI-NEXT: .LBB14_4: ; %endif +; SI-NEXT: .LBB14_3: ; %endif ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB14_4: +; SI-NEXT: s_mov_b64 s[2:3], -1 +; SI-NEXT: v_mov_b32_e32 v0, 0 +; SI-NEXT: s_branch .LBB14_2 ; ; VI-LABEL: ctpop_i16_in_br: ; VI: ; %bb.0: ; %entry @@ -1535,7 +1535,7 @@ ; VI-NEXT: s_lshr_b32 s5, s4, 16 ; VI-NEXT: v_cmp_ne_u16_e64 s[6:7], s5, 0 ; VI-NEXT: s_and_b64 vcc, exec, s[6:7] -; VI-NEXT: s_cbranch_vccz .LBB14_2 +; VI-NEXT: s_cbranch_vccz .LBB14_4 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_mov_b32 s11, 0xf000 ; VI-NEXT: s_mov_b32 s10, -1 @@ -1543,22 +1543,22 @@ ; VI-NEXT: s_mov_b32 s9, s3 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 ; VI-NEXT: s_mov_b64 s[2:3], 0 -; VI-NEXT: s_cbranch_execz .LBB14_3 -; VI-NEXT: s_branch .LBB14_4 -; VI-NEXT: .LBB14_2: -; VI-NEXT: s_mov_b64 s[2:3], -1 -; VI-NEXT: ; implicit-def: $vgpr0 -; VI-NEXT: .LBB14_3: ; %if +; VI-NEXT: s_cbranch_execnz .LBB14_3 +; VI-NEXT: .LBB14_2: ; %if ; VI-NEXT: s_and_b32 s2, s4, 0xffff ; VI-NEXT: s_bcnt1_i32_b32 s2, s2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: .LBB14_4: ; %endif +; VI-NEXT: .LBB14_3: ; %endif ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm +; VI-NEXT: .LBB14_4: +; VI-NEXT: s_mov_b64 s[2:3], -1 +; VI-NEXT: ; implicit-def: $vgpr0 +; VI-NEXT: s_branch .LBB14_2 ; ; EG-LABEL: ctpop_i16_in_br: ; EG: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1534,19 +1534,17 @@ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s6, 0 -; SI-NEXT: s_cbranch_scc0 .LBB30_2 +; SI-NEXT: s_cbranch_scc0 .LBB30_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_load_dword s7, s[2:3], 0x1 ; SI-NEXT: s_mov_b64 s[4:5], 0 ; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccz .LBB30_3 -; SI-NEXT: s_branch .LBB30_4 -; SI-NEXT: .LBB30_2: -; SI-NEXT: .LBB30_3: ; %if +; SI-NEXT: s_cbranch_vccnz .LBB30_3 +; SI-NEXT: .LBB30_2: ; %if ; SI-NEXT: s_load_dword s7, s[2:3], 0x0 -; SI-NEXT: .LBB30_4: ; %endif +; SI-NEXT: .LBB30_3: ; %endif ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: s_mov_b32 s3, 0x100f000 @@ -1554,6 +1552,8 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB30_4: +; SI-NEXT: s_branch .LBB30_2 ; ; VI-LABEL: insert_split_bb: ; VI: ; %bb.0: ; %entry @@ -1561,16 +1561,14 @@ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u32 s6, 0 -; VI-NEXT: s_cbranch_scc0 .LBB30_2 +; VI-NEXT: s_cbranch_scc0 .LBB30_4 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_load_dword s7, s[2:3], 0x4 -; VI-NEXT: s_cbranch_execz .LBB30_3 -; VI-NEXT: s_branch .LBB30_4 -; VI-NEXT: .LBB30_2: -; VI-NEXT: .LBB30_3: ; %if +; VI-NEXT: s_cbranch_execnz .LBB30_3 +; VI-NEXT: .LBB30_2: ; %if ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_load_dword s7, s[2:3], 0x0 -; VI-NEXT: .LBB30_4: ; %endif +; VI-NEXT: .LBB30_3: ; %endif ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 @@ -1578,6 +1576,8 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm +; VI-NEXT: .LBB30_4: +; VI-NEXT: s_branch .LBB30_2 entry: %0 = insertelement <2 x i32> undef, i32 %a, i32 0 %1 = icmp eq i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -17,11 +17,10 @@ ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] ; OPT: bb4: ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]] -; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true +; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] ; OPT-NEXT: br label [[FLOW]] ; OPT: Flow: -; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] ; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) ; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) ; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -9,14 +9,14 @@ ; StructurizeCFG. ; IR-LABEL: @multi_divergent_region_exit_ret_ret( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) ; IR: %1 = extractvalue { i1, i64 } %0, 0 ; IR: %2 = extractvalue { i1, i64 } %0, 1 ; IR: br i1 %1, label %LeafBlock1, label %Flow ; IR: Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: %6 = extractvalue { i1, i64 } %5, 0 ; IR: %7 = extractvalue { i1, i64 } %5, 1 @@ -42,7 +42,7 @@ ; IR: Flow1: ; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %SwitchLeaf.inv, %LeafBlock ], [ %4, %Flow ] +; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %7) ; IR: %14 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %13) ; IR: %15 = extractvalue { i1, i64 } %14, 0 @@ -75,16 +75,13 @@ ; GCN-NEXT: s_or_saveexec_b64 ; GCN-NEXT: s_xor_b64 -; FIXME: Why is this compare essentially repeated? ; GCN: ; %LeafBlock -; GCN-DAG: v_cmp_eq_u32_e32 vcc, 1, -; GCN-DAG: v_cmp_ne_u32_e64 [[TMP1:s\[[0-9]+:[0-9]+\]]], 1, +; GCN-DAG: v_cmp_ne_u32_e32 vcc, 1, ; GCN-DAG: s_andn2_b64 [[EXIT0]], [[EXIT0]], exec ; GCN-DAG: s_andn2_b64 [[EXIT1]], [[EXIT1]], exec ; GCN-DAG: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], vcc, exec -; GCN-DAG: s_and_b64 [[TMP1]], [[TMP1]], exec ; GCN-DAG: s_or_b64 [[EXIT0]], [[EXIT0]], [[TMP0]] -; GCN-DAG: s_or_b64 [[EXIT1]], [[EXIT1]], [[TMP1]] +; GCN-DAG: s_or_b64 [[EXIT1]], [[EXIT1]], [[TMP0]] ; GCN: ; %Flow4 ; GCN-NEXT: s_or_b64 exec, exec, @@ -141,7 +138,7 @@ } ; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) @@ -196,24 +193,22 @@ } ; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret( -; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2 +; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2 ; IR: llvm.amdgcn.if ; IR: br i1 ; IR: {{^}}Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %uniform.cond0.inv, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: br i1 %6, label %LeafBlock, label %Flow1 ; IR: {{^}}LeafBlock: -; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1 -; IR: %divergent.cond1.inv = xor i1 %divergent.cond1, true +; IR: %divergent.cond1 = icmp ne i32 %tmp16, 1 ; IR: br label %Flow1 ; IR: LeafBlock1: -; IR: %uniform.cond0 = icmp eq i32 %arg3, 2 -; IR: %uniform.cond0.inv = xor i1 %uniform.cond0, true +; IR: %uniform.cond0 = icmp ne i32 %arg3, 2 ; IR: br label %Flow ; IR: Flow2: @@ -228,7 +223,7 @@ ; IR: {{^}}Flow1: ; IR: %12 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %divergent.cond1.inv, %LeafBlock ], [ %4, %Flow ] +; IR: %13 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ] ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %7) ; IR: %14 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %13) ; IR: %15 = extractvalue { i1, i64 } %14, 0 @@ -279,12 +274,12 @@ } ; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) ; IR: br i1 %1, label %LeafBlock1, label %Flow ; IR: Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] @@ -401,11 +396,11 @@ } ; IR-LABEL: @multi_divergent_region_exit_ret_unreachable( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) ; IR: Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: Flow2: @@ -420,7 +415,7 @@ ; IR: Flow1: ; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %SwitchLeaf.inv, %LeafBlock ], [ %4, %Flow ] +; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %7) ; IR: %14 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %13) ; IR: %15 = extractvalue { i1, i64 } %14, 0 @@ -640,7 +635,7 @@ ; IR: br i1 %6, label %uniform.if, label %Flow2 ; IR: Flow: ; preds = %uniform.then, %uniform.if -; IR: %7 = phi i1 [ %uniform.cond2.inv, %uniform.then ], [ %uniform.cond1.inv, %uniform.if ] +; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1, %uniform.if ] ; IR: br i1 %7, label %uniform.endif, label %uniform.ret0 ; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -25,9 +25,8 @@ ; OPT: Flow: ; OPT-NEXT: [[TMP4]] = phi i32 [ [[TMP47]], [[ENDIF]] ], [ [[TMP0]], [[LOOP]] ] ; OPT-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ] -; OPT-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ] ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]]) -; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP6]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]]) ; OPT-NEXT: [[TMP8:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP7]]) ; OPT-NEXT: [[TMP9]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN2]]) ; OPT-NEXT: br i1 [[TMP8]], label [[FLOW1]], label [[LOOP]] @@ -39,8 +38,7 @@ ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP9]]) ; OPT-NEXT: ret void ; OPT: ENDIF: -; OPT-NEXT: [[TMP51]] = icmp eq i32 [[TMP47]], [[CONT:%.*]] -; OPT-NEXT: [[TMP51_INV]] = xor i1 [[TMP51]], true +; OPT-NEXT: [[TMP51]] = icmp ne i32 [[TMP47]], [[CONT:%.*]] ; OPT-NEXT: br label [[FLOW]] ; ; GCN-LABEL: multi_else_break: @@ -123,14 +121,13 @@ ; OPT-NEXT: [[LOAD0:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; OPT-NEXT: br label [[NODEBLOCK:%.*]] ; OPT: NodeBlock: -; OPT-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[LOAD0]], 1 -; OPT-NEXT: [[PIVOT_INV:%.*]] = xor i1 [[PIVOT]], true -; OPT-NEXT: br i1 [[PIVOT_INV]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]] +; OPT-NEXT: [[PIVOT:%.*]] = icmp sge i32 [[LOAD0]], 1 +; OPT-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]] ; OPT: LeafBlock1: ; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[LOAD0]], 1 ; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[CASE1:%.*]], label [[FLOW3:%.*]] ; OPT: Flow3: -; OPT-NEXT: [[TMP0:%.*]] = phi i1 [ [[CMP2_INV:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ] +; OPT-NEXT: [[TMP0:%.*]] = phi i1 [ [[CMP2:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ] ; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[CASE1]] ], [ true, [[LEAFBLOCK1]] ] ; OPT-NEXT: br label [[FLOW]] ; OPT: LeafBlock: @@ -144,8 +141,7 @@ ; OPT-NEXT: br i1 [[TMP5]], label [[FLOW6:%.*]], label [[BB1]] ; OPT: case0: ; OPT-NEXT: [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP]], [[LOAD1]] -; OPT-NEXT: [[CMP1_INV:%.*]] = xor i1 [[CMP1]], true +; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[TMP]], [[LOAD1]] ; OPT-NEXT: br label [[FLOW5]] ; OPT: Flow: ; OPT-NEXT: [[TMP6]] = phi i1 [ [[TMP0]], [[FLOW3]] ], [ true, [[NODEBLOCK]] ] @@ -154,11 +150,10 @@ ; OPT-NEXT: br i1 [[TMP8]], label [[LEAFBLOCK:%.*]], label [[FLOW4]] ; OPT: case1: ; OPT-NEXT: [[LOAD2:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP]], [[LOAD2]] -; OPT-NEXT: [[CMP2_INV]] = xor i1 [[CMP2]], true +; OPT-NEXT: [[CMP2:%.*]] = icmp sge i32 [[TMP]], [[LOAD2]] ; OPT-NEXT: br label [[FLOW3]] ; OPT: Flow5: -; OPT-NEXT: [[TMP9]] = phi i1 [ [[CMP1_INV]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ] +; OPT-NEXT: [[TMP9]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ] ; OPT-NEXT: [[TMP10]] = phi i1 [ false, [[CASE0]] ], [ true, [[LEAFBLOCK]] ] ; OPT-NEXT: br label [[FLOW4]] ; OPT: Flow6: @@ -196,8 +191,8 @@ ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1 ; GCN-NEXT: s_mov_b64 s[6:7], -1 +; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1 ; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GCN-NEXT: s_mov_b64 s[10:11], -1 diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -236,8 +236,8 @@ ; IR: Flow1: ; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ] ; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ] -; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP18:%.*]], [[BB21]] ], [ true, [[BB14]] ] -; IR-NEXT: [[TMP14]] = phi i1 [ [[TMP18]], [[BB21]] ], [ false, [[BB14]] ] +; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], [[BB21]] ], [ true, [[BB14]] ] +; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], [[BB21]] ], [ false, [[BB14]] ] ; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ] ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]]) ; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) @@ -262,8 +262,7 @@ ; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16 ; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 ; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef -; IR-NEXT: [[MY_TMP12:%.*]] = icmp slt i32 [[MY_TMP11]], 9 -; IR-NEXT: [[TMP18]] = xor i1 [[MY_TMP12]], true +; IR-NEXT: [[MY_TMP12]] = icmp slt i32 [[MY_TMP11]], 9 ; IR-NEXT: br label [[FLOW1]] ; IR: Flow2: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -36,19 +36,17 @@ ; GCN-LABEL: {{^}}negated_cond_dominated_blocks: ; GCN: s_cmp_lg_u32 -; GCN: s_cselect_b64 [[CC1:[^,]+]], -1, 0 +; GCN: s_cselect_b64 [[CC1:[^,]+]], -1, 0 ; GCN: s_branch [[BB1:.LBB[0-9]+_[0-9]+]] ; GCN: [[BB0:.LBB[0-9]+_[0-9]+]] ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: [[BB1]]: -; GCN: s_mov_b64 [[CC2:[^,]+]], -1 ; GCN: s_mov_b64 vcc, [[CC1]] ; GCN: s_cbranch_vccz [[BB2:.LBB[0-9]+_[0-9]+]] -; GCN: s_mov_b64 [[CC2]], 0 +; GCN: s_mov_b64 vcc, exec +; GCN: s_cbranch_execnz [[BB0]] ; GCN: [[BB2]]: -; GCN: s_andn2_b64 vcc, exec, [[CC2]] -; GCN: s_cbranch_vccnz [[BB0]] define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll --- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll +++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll @@ -54,7 +54,7 @@ } ; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable: -; GCN: s_cbranch_vccz +; GCN: s_cbranch_scc0 ; GCN: ; %bb.{{[0-9]+}}: ; %Flow ; GCN: s_cbranch_execnz [[RETURN:.LBB[0-9]+_[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -16,22 +16,22 @@ ; SI-NEXT: s_load_dword s0, s[0:1], 0xf ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s8, 0 -; SI-NEXT: s_cbranch_scc0 .LBB0_2 +; SI-NEXT: s_cbranch_scc0 .LBB0_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_add_i32 s2, s11, s0 -; SI-NEXT: s_cbranch_execz .LBB0_3 -; SI-NEXT: s_branch .LBB0_4 -; SI-NEXT: .LBB0_2: -; SI-NEXT: ; implicit-def: $sgpr2 -; SI-NEXT: .LBB0_3: ; %if +; SI-NEXT: s_cbranch_execnz .LBB0_3 +; SI-NEXT: .LBB0_2: ; %if ; SI-NEXT: s_sub_i32 s2, s9, s10 -; SI-NEXT: .LBB0_4: ; %endif +; SI-NEXT: .LBB0_3: ; %endif ; SI-NEXT: s_add_i32 s0, s2, s8 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB0_4: +; SI-NEXT: ; implicit-def: $sgpr2 +; SI-NEXT: s_branch .LBB0_2 entry: %0 = icmp eq i32 %a, 0 @@ -59,28 +59,28 @@ ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s6, 0 -; SI-NEXT: s_cbranch_scc0 .LBB1_2 +; SI-NEXT: s_cbranch_scc0 .LBB1_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_load_dword s2, s[0:1], 0x2e ; SI-NEXT: s_load_dword s3, s[0:1], 0x37 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_add_i32 s7, s2, s3 -; SI-NEXT: s_cbranch_execz .LBB1_3 -; SI-NEXT: s_branch .LBB1_4 -; SI-NEXT: .LBB1_2: -; SI-NEXT: ; implicit-def: $sgpr7 -; SI-NEXT: .LBB1_3: ; %if +; SI-NEXT: s_cbranch_execnz .LBB1_3 +; SI-NEXT: .LBB1_2: ; %if ; SI-NEXT: s_load_dword s2, s[0:1], 0x1c ; SI-NEXT: s_load_dword s0, s[0:1], 0x25 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_add_i32 s7, s2, s0 -; SI-NEXT: .LBB1_4: ; %endif +; SI-NEXT: .LBB1_3: ; %endif ; SI-NEXT: s_add_i32 s0, s7, s6 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB1_4: +; SI-NEXT: ; implicit-def: $sgpr7 +; SI-NEXT: s_branch .LBB1_2 entry: %cmp0 = icmp eq i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -213,7 +213,7 @@ ; CHECK-LABEL: {{^}}sample_v3: ; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5 ; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7 -; CHECK: s_branch +; CHECK: s_cbranch ; CHECK: BB{{[0-9]+_[0-9]+}}: ; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11 @@ -315,13 +315,15 @@ ; CHECK-LABEL:{{^}}sample_rsrc ; CHECK: s_cmp_eq_u32 -; CHECK: s_cbranch_scc0 [[END:.LBB[0-9]+_[0-9]+]] +; CHECK: s_cbranch_scc1 [[END:.LBB[0-9]+_[0-9]+]] -; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} +; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +; CHECK: s_endpgm ; [[END]]: +; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}} -; CHECK: s_endpgm +; CHECK: s_branch define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <4 x i32>] addrspace(4)* inreg %arg2, [32 x <8 x i32>] addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { bb: %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -172,8 +172,8 @@ ; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] ; GCN: s_cbranch_execz ; GCN: .LBB{{.*}}: -; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo -; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}} +; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}} +; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GCN: s_cbranch_execz ; GCN: ; %bb.{{[0-9]+}}: ; GCN: .LBB{{.*}}: diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll --- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll +++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll @@ -22,26 +22,25 @@ ; CHECK-LABEL: @loop_subregion_misordered( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16 -; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef +; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef, align 8 ; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]] ; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: LOOP.HEADER: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[FLOW3:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW3:%.*]] ] ; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP17]], true -; CHECK-NEXT: br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 1 +; CHECK-NEXT: br i1 [[TMP17]], label [[BB62:%.*]], label [[FLOW:%.*]] ; CHECK: Flow1: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] ; CHECK-NEXT: br label [[FLOW]] ; CHECK: bb18: ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0 @@ -50,9 +49,9 @@ ; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: Flow2: -; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP8:%.*]], [[FLOW]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP10:%.*]], [[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP5]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP6:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] ; CHECK: INNER_LOOP: ; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ] ; CHECK-NEXT: call void asm sideeffect " @@ -61,33 +60,32 @@ ; CHECK-NEXT: br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]] ; CHECK: INNER_LOOP_BREAK: ; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2 -; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #0 +; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #[[ATTR0:[0-9]+]] ; CHECK-NEXT: br label [[FLOW2:%.*]] ; CHECK: bb62: -; CHECK-NEXT: [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271 -; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[LOAD13]], true -; CHECK-NEXT: br i1 [[TMP6]], label [[INCREMENT_I]], label [[FLOW1:%.*]] +; CHECK-NEXT: [[LOAD13:%.*]] = icmp uge i32 [[TMP16]], 271 +; CHECK-NEXT: br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW1:%.*]] ; CHECK: Flow3: -; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] -; CHECK-NEXT: br i1 [[TMP7]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] ; CHECK: Flow4: -; CHECK-NEXT: br i1 [[TMP9:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] ; CHECK: bb64: -; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #0 +; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #[[ATTR0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP8]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP10]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] -; CHECK-NEXT: br i1 [[TMP11]], label [[BB18]], label [[FLOW2]] +; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW2]] ; CHECK: INCREMENT_I: ; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1 ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336 ; CHECK-NEXT: br label [[FLOW1]] ; CHECK: END_ELSE_BLOCK: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337 -; CHECK-NEXT: [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP4]], -1 +; CHECK-NEXT: [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP3]], -1 ; CHECK-NEXT: br label [[FLOW3]] ; CHECK: RETURN: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x99 diff --git a/llvm/test/Transforms/StructurizeCFG/bug36015.ll b/llvm/test/Transforms/StructurizeCFG/bug36015.ll --- a/llvm/test/Transforms/StructurizeCFG/bug36015.ll +++ b/llvm/test/Transforms/StructurizeCFG/bug36015.ll @@ -18,7 +18,7 @@ br i1 %cond.inner, label %if, label %else ; CHECK: if: -; CHECK: %cond.if.inv = xor i1 %cond.if, true +; CHECK: %cond.if = icmp sge i32 %ctr.if, %count ; CHECK: br label %Flow if: %ctr.if = add i32 %ctr.loop.inner, 1 @@ -27,7 +27,7 @@ br i1 %cond.if, label %loop.inner, label %exit ; CHECK: Flow: -; CHECK: %1 = phi i1 [ %cond.if.inv, %if ], [ true, %loop.inner ] +; CHECK: %1 = phi i1 [ %cond.if, %if ], [ true, %loop.inner ] ; CHECK: %2 = phi i1 [ false, %if ], [ true, %loop.inner ] ; CHECK: br i1 %1, label %Flow1, label %loop.inner @@ -43,7 +43,7 @@ br i1 %cond.else, label %loop.outer, label %exit ; CHECK: Flow2: -; CHECK: %4 = phi i1 [ %cond.else.inv, %else ], [ true, %Flow1 ] +; CHECK: %4 = phi i1 [ %cond.else, %else ], [ true, %Flow1 ] ; CHECK: br i1 %4, label %exit, label %loop.outer exit: diff --git a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll --- a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll +++ b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll @@ -6,8 +6,7 @@ define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 { ; CHECK-LABEL: @invert_constantexpr_condition( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP]], true +; CHECK-NEXT: [[TMP:%.*]] = icmp ne i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 icmp eq (i32 ptrtoint (i32* @g to i32), i32 0), label [[BB2:%.*]], label [[FLOW:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[FLOW]] @@ -16,8 +15,8 @@ ; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], icmp eq (i32 ptrtoint (i32* @g to i32), i32 0) ; CHECK-NEXT: br label [[BB8:%.*]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[BB6]], label [[BB3:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[BB6]], label [[BB3:%.*]] ; CHECK: bb6: ; CHECK-NEXT: [[TMP7]] = icmp slt i32 [[ARG]], [[ARG1:%.*]] ; CHECK-NEXT: br label [[BB3]] diff --git a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll --- a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll +++ b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll @@ -8,22 +8,22 @@ br label %bb3 ; CHECK: bb3: -; CHECK: %tmp4.inv = xor i1 %tmp4, true -; CHECK: br i1 %tmp4.inv, label %bb5, label %Flow +; CHECK: %tmp4 = fcmp oge float %arg1, 3.500000e+00 +; CHECK: br i1 %tmp4, label %bb5, label %Flow bb3: ; preds = %bb7, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ] %tmp4 = fcmp ult float %arg1, 3.500000e+00 br i1 %tmp4, label %bb7, label %bb5 ; CHECK: bb5: -; CHECK: %tmp6.inv = xor i1 %tmp6, true +; CHECK: %tmp6 = fcmp uge float 0.000000e+00, %arg2 ; CHECK: br label %Flow bb5: ; preds = %bb3 %tmp6 = fcmp olt float 0.000000e+00, %arg2 br i1 %tmp6, label %bb10, label %bb7 ; CHECK: Flow: -; CHECK: %0 = phi i1 [ %tmp6.inv, %bb5 ], [ %tmp4, %bb3 ] +; CHECK: %0 = phi i1 [ %tmp6, %bb5 ], [ %tmp4, %bb3 ] ; CHECK: br i1 %0, label %bb7, label %Flow1 ; CHECK: bb7: @@ -34,7 +34,7 @@ br i1 %tmp9, label %bb3, label %bb10 ; CHECK: Flow1: -; CHECK: %3 = phi i1 [ %tmp9.inv, %bb7 ], [ true, %Flow ] +; CHECK: %3 = phi i1 [ %tmp9, %bb7 ], [ true, %Flow ] ; CHECK: br i1 %3, label %bb10, label %bb3 ; CHECK: bb10: diff --git a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll --- a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll +++ b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll @@ -59,7 +59,7 @@ ; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2 ; CHECK: for.body.1: -; CHECK: br i1 %cmp1.5.inv, label %for.body.6, label %Flow3 +; CHECK: br i1 %cmp1.5, label %for.body.6, label %Flow3 for.body.1: ; preds = %if.then, %lor.lhs.false %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ] %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]