Index: llvm/lib/CodeGen/BranchFolding.cpp =================================================================== --- llvm/lib/CodeGen/BranchFolding.cpp +++ llvm/lib/CodeGen/BranchFolding.cpp @@ -1231,6 +1231,15 @@ return I->isBranch(); } +static bool AreOperandsIdentical(SmallVectorImpl &V1, + SmallVectorImpl &V2) { + if (V1.size() != V2.size()) + return false; + return std::equal(V1.begin(), V1.end(), V2.begin(), + [](MachineOperand &Op1, + MachineOperand &Op2) { return Op1.isIdenticalTo(Op2); }); +} + /// IsBetterFallthrough - Return true if it would be clearly better to /// fall-through to MBB1 than to fall through into MBB2. This has to return /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will @@ -1656,6 +1665,39 @@ } } + // BB1: Conditional jump outside + // Fallthrough or explicit branch to BB2 + // BB2: The same conditional branch leading to somewhere else outside + // Fallthrough or explicit branch to BB3 + // can be transformed to + // BB1: Conditional jump outside + // Fallthrough or explicit branch to BB2 + // BB2: Fallthrough or explicit branch to BB3 + if (!CurUnAnalyzable && !PriorUnAnalyzable && MBB->pred_size() == 1) { + // If the our predecessor has a conditional branch to other block and + // fallthrough or jump to us + if (PriorTBB && PriorTBB != MBB && (!PriorFBB || PriorFBB == MBB)) { + // and our block is branch only and has the same conditional branch + // it will never be taken, so we can eliminate it + // until MBB has address taken or target of an exception handling. + if (CurTBB && IsBranchOnlyBlock(MBB) && + AreOperandsIdentical(CurCond, PriorCond) && !MBB->hasAddressTaken() && + !MBB->isEHPad()) { + assert(PrevBB.isSuccessor(MBB) && "Not a predecessor?"); + TII->removeBranch(*MBB); + if (IsEmptyBlock(MBB)) + MBB->erase(MBB->begin(), MBB->end()); + // If we had unconditional jump, just restore it. + if (CurFBB) + TII->insertUnconditionalBranch(*MBB, CurFBB, + getBranchDebugLoc(*CurFBB)); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + } + } + // If the prior block doesn't fall through into this block, and if this // block doesn't fall through into some other block, see if we can find a // place to move this block where a fall-through will happen. Index: llvm/test/CodeGen/AMDGPU/swdev373493.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/swdev373493.ll +++ llvm/test/CodeGen/AMDGPU/swdev373493.ll @@ -7,52 +7,7 @@ ; CHECK-LABEL: bar: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v15, v12 -; CHECK-NEXT: v_mov_b32_e32 v14, v11 -; CHECK-NEXT: v_mov_b32_e32 v13, v10 -; CHECK-NEXT: v_mov_b32_e32 v12, v9 -; CHECK-NEXT: v_mov_b32_e32 v11, v8 -; CHECK-NEXT: v_mov_b32_e32 v10, v7 -; CHECK-NEXT: v_mov_b32_e32 v9, v6 -; CHECK-NEXT: v_mov_b32_e32 v8, v5 -; CHECK-NEXT: v_mov_b32_e32 v7, v4 -; CHECK-NEXT: v_mov_b32_e32 v6, v3 ; CHECK-NEXT: s_cmp_lt_i32 s4, 3 -; CHECK-NEXT: s_cbranch_scc0 .LBB0_3 -; CHECK-NEXT: ; %bb.1: ; %LeafBlock -; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 -; CHECK-NEXT: ; %bb.2: ; %bb7 -; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[0:1] -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, global@rel32@lo+1948 -; CHECK-NEXT: s_addc_u32 s17, s17, global@rel32@hi+1956 -; CHECK-NEXT: v_mov_b32_e32 v5, 0 -; CHECK-NEXT: v_mov_b32_e32 v0, s16 -; CHECK-NEXT: v_mov_b32_e32 v1, s17 -; CHECK-NEXT: s_getpc_b64 s[18:19] -; CHECK-NEXT: s_add_u32 s18, s18, eggs@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s19, s19, eggs@rel32@hi+12 -; CHECK-NEXT: s_setpc_b64 s[18:19] -; CHECK-NEXT: .LBB0_3: ; %LeafBlock1 -; CHECK-NEXT: s_cbranch_scc0 .LBB0_5 -; CHECK-NEXT: ; %bb.4: ; %bb8 -; CHECK-NEXT: v_mov_b32_e32 v0, v1 -; CHECK-NEXT: v_mov_b32_e32 v1, v2 -; CHECK-NEXT: v_mov_b32_e32 v2, v6 -; CHECK-NEXT: v_mov_b32_e32 v3, v7 -; CHECK-NEXT: v_mov_b32_e32 v4, v8 -; CHECK-NEXT: v_mov_b32_e32 v5, v9 -; CHECK-NEXT: v_mov_b32_e32 v6, v10 -; CHECK-NEXT: v_mov_b32_e32 v7, v11 -; CHECK-NEXT: v_mov_b32_e32 v8, v12 -; CHECK-NEXT: v_mov_b32_e32 v9, v13 -; CHECK-NEXT: v_mov_b32_e32 v10, v14 -; CHECK-NEXT: v_mov_b32_e32 v11, v15 -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, quux@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s17, s17, quux@rel32@hi+12 -; CHECK-NEXT: s_setpc_b64 s[16:17] -; CHECK-NEXT: .LBB0_5: ; %bb9 ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: switch i32 undef, label %bb9 [ Index: llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll =================================================================== --- llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll +++ llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll @@ -8,7 +8,7 @@ @g0 = external hidden unnamed_addr global [182 x i16], align 8 -define void @fred(i16 signext %a0, i16 signext %a1) #0 { +define void @fred(i16 signext %a0, i16 signext %a1, i1 %c) #0 { b1: %v1 = sext i16 %a0 to i32 %v2 = getelementptr inbounds [182 x i16], ptr @g0, i32 0, i32 %v1 @@ -38,7 +38,7 @@ b16: ; preds = %b14 %v17 = getelementptr [182 x i16], ptr @g0, i32 0, i32 %v3 %v18 = icmp ugt ptr %v17, %v2 - %v19 = or i1 %v18, undef + %v19 = or i1 %v18, %c br i1 %v19, label %b20, label %b21 b20: ; preds = %b16 Index: llvm/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll +++ llvm/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll @@ -4,7 +4,7 @@ target triple = "powerpc64le-grtev4-linux-gnu" ; Function Attrs: nounwind -define void @_ZN10SubProcess19ScrubbedForkAndExecEiPiS0_PNS_7ResultsE() #0 align 2 { +define void @_ZN10SubProcess19ScrubbedForkAndExecEiPiS0_PNS_7ResultsE(i1 %c) #0 align 2 { ; CHECK: lis 3, 1234 ; CHECK-NOT: li 3 ; CHECK-NOT: ori 3 @@ -12,7 +12,7 @@ ; CHECK-NOT: addis 3 ; CHECK-NOT: lis 3 ; CHECK: sc - br i1 undef, label %1, label %2 + br i1 %c, label %1, label %2 ;