diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3343,6 +3343,14 @@ if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; + // Fourth: Modify an existing instruction + if (Condition->hasOneUse()) { + if (auto *CI = dyn_cast(Condition)) { + CI->setPredicate(CI->getInversePredicate()); + return Condition; + } + } + // Last option: Create a new instruction auto *Inverted = BinaryOperator::CreateNot(Condition, Condition->getName() + ".inv"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -139,8 +139,10 @@ ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB4_4 -; CHECK-NEXT: ; %bb.1: ; %bb2 +; CHECK-NEXT: s_cbranch_scc0 .LBB4_2 +; CHECK-NEXT: .LBB4_1: ; %bb12 +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: .LBB4_2: ; %bb2 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12 @@ -153,15 +155,13 @@ ; CHECK-NEXT: s_mov_b32 s4, -1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 -; CHECK-NEXT: s_cbranch_vccnz .LBB4_3 -; CHECK-NEXT: ; %bb.2: ; %bb7 +; CHECK-NEXT: s_cbranch_vccnz .LBB4_4 +; CHECK-NEXT: ; %bb.3: ; %bb7 ; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: .LBB4_3: ; %bb8 +; CHECK-NEXT: .LBB4_4: ; %bb8 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB4_5 -; CHECK-NEXT: .LBB4_4: ; %bb12 -; CHECK-NEXT: s_setpc_b64 s[30:31] -; CHECK-NEXT: .LBB4_5: ; %bb11 +; CHECK-NEXT: s_cbranch_scc1 .LBB4_1 +; CHECK-NEXT: ; %bb.5: ; %bb11 ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0 ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen ; CHECK-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -838,7 +838,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -3118,7 +3118,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -824,7 +824,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -3072,7 +3072,7 @@ ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -759,7 +759,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -1641,7 +1641,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -750,7 +750,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow2 +; CGP-NEXT: .LBB2_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -2181,7 +2181,7 @@ ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow2 +; CGP-NEXT: .LBB8_2: ; %Flow1 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll --- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll @@ -1502,7 +1502,7 @@ ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshr_b32 s5, s4, 16 ; SI-NEXT: s_cmp_lg_u32 s5, 0 -; SI-NEXT: s_cbranch_scc0 .LBB14_2 +; SI-NEXT: s_cbranch_scc0 .LBB14_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s10, -1 @@ -1510,22 +1510,22 @@ ; SI-NEXT: s_mov_b32 s9, s3 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 ; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: s_cbranch_execz .LBB14_3 -; SI-NEXT: s_branch .LBB14_4 -; SI-NEXT: .LBB14_2: -; SI-NEXT: s_mov_b64 s[2:3], -1 -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: .LBB14_3: ; %if +; SI-NEXT: s_cbranch_execnz .LBB14_3 +; SI-NEXT: .LBB14_2: ; %if ; SI-NEXT: s_and_b32 s2, s4, 0xffff ; SI-NEXT: s_bcnt1_i32_b32 s2, s2 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s2 -; SI-NEXT: .LBB14_4: ; %endif +; SI-NEXT: .LBB14_3: ; %endif ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB14_4: +; SI-NEXT: s_mov_b64 s[2:3], -1 +; SI-NEXT: v_mov_b32_e32 v0, 0 +; SI-NEXT: s_branch .LBB14_2 ; ; VI-LABEL: ctpop_i16_in_br: ; VI: ; %bb.0: ; %entry @@ -1535,7 +1535,7 @@ ; VI-NEXT: s_lshr_b32 s5, s4, 16 ; VI-NEXT: v_cmp_ne_u16_e64 s[6:7], s5, 0 ; VI-NEXT: s_and_b64 vcc, exec, s[6:7] -; VI-NEXT: s_cbranch_vccz .LBB14_2 +; VI-NEXT: s_cbranch_vccz .LBB14_4 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_mov_b32 s11, 0xf000 ; VI-NEXT: s_mov_b32 s10, -1 @@ -1543,22 +1543,22 @@ ; VI-NEXT: s_mov_b32 s9, s3 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 ; VI-NEXT: s_mov_b64 s[2:3], 0 -; VI-NEXT: s_cbranch_execz .LBB14_3 -; VI-NEXT: s_branch .LBB14_4 -; VI-NEXT: .LBB14_2: -; VI-NEXT: s_mov_b64 s[2:3], -1 -; VI-NEXT: ; implicit-def: $vgpr0 -; VI-NEXT: .LBB14_3: ; %if +; VI-NEXT: s_cbranch_execnz .LBB14_3 +; VI-NEXT: .LBB14_2: ; %if ; VI-NEXT: s_and_b32 s2, s4, 0xffff ; VI-NEXT: s_bcnt1_i32_b32 s2, s2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: .LBB14_4: ; %endif +; VI-NEXT: .LBB14_3: ; %endif ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm +; VI-NEXT: .LBB14_4: +; VI-NEXT: s_mov_b64 s[2:3], -1 +; VI-NEXT: ; implicit-def: $vgpr0 +; VI-NEXT: s_branch .LBB14_2 ; ; EG-LABEL: ctpop_i16_in_br: ; EG: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1534,19 +1534,17 @@ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s6, 0 -; SI-NEXT: s_cbranch_scc0 .LBB30_2 +; SI-NEXT: s_cbranch_scc0 .LBB30_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_load_dword s7, s[2:3], 0x1 ; SI-NEXT: s_mov_b64 s[4:5], 0 ; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccz .LBB30_3 -; SI-NEXT: s_branch .LBB30_4 -; SI-NEXT: .LBB30_2: -; SI-NEXT: .LBB30_3: ; %if +; SI-NEXT: s_cbranch_vccnz .LBB30_3 +; SI-NEXT: .LBB30_2: ; %if ; SI-NEXT: s_load_dword s7, s[2:3], 0x0 -; SI-NEXT: .LBB30_4: ; %endif +; SI-NEXT: .LBB30_3: ; %endif ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: s_mov_b32 s3, 0x100f000 @@ -1554,6 +1552,8 @@ ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB30_4: +; SI-NEXT: s_branch .LBB30_2 ; ; VI-LABEL: insert_split_bb: ; VI: ; %bb.0: ; %entry @@ -1561,16 +1561,14 @@ ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u32 s6, 0 -; VI-NEXT: s_cbranch_scc0 .LBB30_2 +; VI-NEXT: s_cbranch_scc0 .LBB30_4 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_load_dword s7, s[2:3], 0x4 -; VI-NEXT: s_cbranch_execz .LBB30_3 -; VI-NEXT: s_branch .LBB30_4 -; VI-NEXT: .LBB30_2: -; VI-NEXT: .LBB30_3: ; %if +; VI-NEXT: s_cbranch_execnz .LBB30_3 +; VI-NEXT: .LBB30_2: ; %if ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_load_dword s7, s[2:3], 0x0 -; VI-NEXT: .LBB30_4: ; %endif +; VI-NEXT: .LBB30_3: ; %endif ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 @@ -1578,6 +1576,8 @@ ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm +; VI-NEXT: .LBB30_4: +; VI-NEXT: s_branch .LBB30_2 entry: %0 = insertelement <2 x i32> undef, i32 %a, i32 0 %1 = icmp eq i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll --- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -16,22 +16,22 @@ ; SI-NEXT: s_load_dword s0, s[0:1], 0xf ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s8, 0 -; SI-NEXT: s_cbranch_scc0 .LBB0_2 +; SI-NEXT: s_cbranch_scc0 .LBB0_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_add_i32 s2, s11, s0 -; SI-NEXT: s_cbranch_execz .LBB0_3 -; SI-NEXT: s_branch .LBB0_4 -; SI-NEXT: .LBB0_2: -; SI-NEXT: ; implicit-def: $sgpr2 -; SI-NEXT: .LBB0_3: ; %if +; SI-NEXT: s_cbranch_execnz .LBB0_3 +; SI-NEXT: .LBB0_2: ; %if ; SI-NEXT: s_sub_i32 s2, s9, s10 -; SI-NEXT: .LBB0_4: ; %endif +; SI-NEXT: .LBB0_3: ; %endif ; SI-NEXT: s_add_i32 s0, s2, s8 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB0_4: +; SI-NEXT: ; implicit-def: $sgpr2 +; SI-NEXT: s_branch .LBB0_2 entry: %0 = icmp eq i32 %a, 0 @@ -59,28 +59,28 @@ ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s6, 0 -; SI-NEXT: s_cbranch_scc0 .LBB1_2 +; SI-NEXT: s_cbranch_scc0 .LBB1_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_load_dword s2, s[0:1], 0x2e ; SI-NEXT: s_load_dword s3, s[0:1], 0x37 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_add_i32 s7, s2, s3 -; SI-NEXT: s_cbranch_execz .LBB1_3 -; SI-NEXT: s_branch .LBB1_4 -; SI-NEXT: .LBB1_2: -; SI-NEXT: ; implicit-def: $sgpr7 -; SI-NEXT: .LBB1_3: ; %if +; SI-NEXT: s_cbranch_execnz .LBB1_3 +; SI-NEXT: .LBB1_2: ; %if ; SI-NEXT: s_load_dword s2, s[0:1], 0x1c ; SI-NEXT: s_load_dword s0, s[0:1], 0x25 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_add_i32 s7, s2, s0 -; SI-NEXT: .LBB1_4: ; %endif +; SI-NEXT: .LBB1_3: ; %endif ; SI-NEXT: s_add_i32 s0, s7, s6 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm +; SI-NEXT: .LBB1_4: +; SI-NEXT: ; implicit-def: $sgpr7 +; SI-NEXT: s_branch .LBB1_2 entry: %cmp0 = icmp eq i32 %a, 0 diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll --- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll +++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll @@ -22,26 +22,25 @@ ; CHECK-LABEL: @loop_subregion_misordered( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16 -; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef +; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef, align 8 ; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]] ; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: LOOP.HEADER: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[FLOW3:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW3:%.*]] ] ; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP17]], true -; CHECK-NEXT: br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 1 +; CHECK-NEXT: br i1 [[TMP17]], label [[BB62:%.*]], label [[FLOW:%.*]] ; CHECK: Flow1: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] ; CHECK-NEXT: br label [[FLOW]] ; CHECK: bb18: ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0 @@ -50,9 +49,9 @@ ; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: Flow2: -; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP8:%.*]], [[FLOW]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP10:%.*]], [[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP5]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] +; CHECK-NEXT: [[TMP3]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP6:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP4]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] ; CHECK: INNER_LOOP: ; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ] ; CHECK-NEXT: call void asm sideeffect " @@ -61,33 +60,32 @@ ; CHECK-NEXT: br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]] ; CHECK: INNER_LOOP_BREAK: ; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2 -; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #0 +; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #[[ATTR0:[0-9]+]] ; CHECK-NEXT: br label [[FLOW2:%.*]] ; CHECK: bb62: -; CHECK-NEXT: [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271 -; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[LOAD13]], true -; CHECK-NEXT: br i1 [[TMP6]], label [[INCREMENT_I]], label [[FLOW1:%.*]] +; CHECK-NEXT: [[LOAD13:%.*]] = icmp uge i32 [[TMP16]], 271 +; CHECK-NEXT: br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW1:%.*]] ; CHECK: Flow3: -; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] -; CHECK-NEXT: br i1 [[TMP7]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] ; CHECK: Flow4: -; CHECK-NEXT: br i1 [[TMP9:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] ; CHECK: bb64: -; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #0 +; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #[[ATTR0]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP8]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP10]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] -; CHECK-NEXT: br i1 [[TMP11]], label [[BB18]], label [[FLOW2]] +; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW2]] ; CHECK: INCREMENT_I: ; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1 ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336 ; CHECK-NEXT: br label [[FLOW1]] ; CHECK: END_ELSE_BLOCK: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337 -; CHECK-NEXT: [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP4]], -1 +; CHECK-NEXT: [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP3]], -1 ; CHECK-NEXT: br label [[FLOW3]] ; CHECK: RETURN: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x99 diff --git a/llvm/test/Transforms/StructurizeCFG/bug36015.ll b/llvm/test/Transforms/StructurizeCFG/bug36015.ll --- a/llvm/test/Transforms/StructurizeCFG/bug36015.ll +++ b/llvm/test/Transforms/StructurizeCFG/bug36015.ll @@ -18,7 +18,7 @@ br i1 %cond.inner, label %if, label %else ; CHECK: if: -; CHECK: %cond.if.inv = xor i1 %cond.if, true +; CHECK: %cond.if = icmp sge i32 %ctr.if, %count ; CHECK: br label %Flow if: %ctr.if = add i32 %ctr.loop.inner, 1 @@ -27,7 +27,7 @@ br i1 %cond.if, label %loop.inner, label %exit ; CHECK: Flow: -; CHECK: %1 = phi i1 [ %cond.if.inv, %if ], [ true, %loop.inner ] +; CHECK: %1 = phi i1 [ %cond.if, %if ], [ true, %loop.inner ] ; CHECK: %2 = phi i1 [ false, %if ], [ true, %loop.inner ] ; CHECK: br i1 %1, label %Flow1, label %loop.inner @@ -43,7 +43,7 @@ br i1 %cond.else, label %loop.outer, label %exit ; CHECK: Flow2: -; CHECK: %4 = phi i1 [ %cond.else.inv, %else ], [ true, %Flow1 ] +; CHECK: %4 = phi i1 [ %cond.else, %else ], [ true, %Flow1 ] ; CHECK: br i1 %4, label %exit, label %loop.outer exit: diff --git a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll --- a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll +++ b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll @@ -6,8 +6,7 @@ define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 { ; CHECK-LABEL: @invert_constantexpr_condition( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP]], true +; CHECK-NEXT: [[TMP:%.*]] = icmp ne i32 [[ARG:%.*]], 0 ; CHECK-NEXT: br i1 icmp eq (i32 ptrtoint (i32* @g to i32), i32 0), label [[BB2:%.*]], label [[FLOW:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[FLOW]] @@ -16,8 +15,8 @@ ; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], icmp eq (i32 ptrtoint (i32* @g to i32), i32 0) ; CHECK-NEXT: br label [[BB8:%.*]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[BB6]], label [[BB3:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[BB6]], label [[BB3:%.*]] ; CHECK: bb6: ; CHECK-NEXT: [[TMP7]] = icmp slt i32 [[ARG]], [[ARG1:%.*]] ; CHECK-NEXT: br label [[BB3]] diff --git a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll --- a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll +++ b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll @@ -8,22 +8,22 @@ br label %bb3 ; CHECK: bb3: -; CHECK: %tmp4.inv = xor i1 %tmp4, true -; CHECK: br i1 %tmp4.inv, label %bb5, label %Flow +; CHECK: %tmp4 = fcmp oge float %arg1, 3.500000e+00 +; CHECK: br i1 %tmp4, label %bb5, label %Flow bb3: ; preds = %bb7, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ] %tmp4 = fcmp ult float %arg1, 3.500000e+00 br i1 %tmp4, label %bb7, label %bb5 ; CHECK: bb5: -; CHECK: %tmp6.inv = xor i1 %tmp6, true +; CHECK: %tmp6 = fcmp uge float 0.000000e+00, %arg2 ; CHECK: br label %Flow bb5: ; preds = %bb3 %tmp6 = fcmp olt float 0.000000e+00, %arg2 br i1 %tmp6, label %bb10, label %bb7 ; CHECK: Flow: -; CHECK: %0 = phi i1 [ %tmp6.inv, %bb5 ], [ %tmp4, %bb3 ] +; CHECK: %0 = phi i1 [ %tmp6, %bb5 ], [ %tmp4, %bb3 ] ; CHECK: br i1 %0, label %bb7, label %Flow1 ; CHECK: bb7: @@ -34,7 +34,7 @@ br i1 %tmp9, label %bb3, label %bb10 ; CHECK: Flow1: -; CHECK: %3 = phi i1 [ %tmp9.inv, %bb7 ], [ true, %Flow ] +; CHECK: %3 = phi i1 [ %tmp9, %bb7 ], [ true, %Flow ] ; CHECK: br i1 %3, label %bb10, label %bb3 ; CHECK: bb10: diff --git a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll --- a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll +++ b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll @@ -59,7 +59,7 @@ ; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2 ; CHECK: for.body.1: -; CHECK: br i1 %cmp1.5.inv, label %for.body.6, label %Flow3 +; CHECK: br i1 %cmp1.5, label %for.body.6, label %Flow3 for.body.1: ; preds = %if.then, %lor.lhs.false %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ] %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ]