Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -597,7 +597,8 @@ MachineBasicBlock::iterator &I1, MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB, - DenseMap &FuncletMembership) { + DenseMap &FuncletMembership, + bool AfterPlacement) { // It is never profitable to tail-merge blocks from two different funclets. if (!FuncletMembership.empty()) { auto Funclet1 = FuncletMembership.find(MBB1); @@ -617,7 +618,9 @@ // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. - if (MBB1 == PredBB || MBB2 == PredBB) { + // This is true only for a single successor. For multiple successors, we are + // trading a conditional branch for an unconditional one. + if ((MBB1 == PredBB || MBB2 == PredBB) && MBB1->succ_size() == 1) { MachineBasicBlock::iterator I; unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I); if (CommonTailLen > NumTerms) @@ -635,9 +638,12 @@ // If both blocks have an unconditional branch temporarily stripped out, // count that as an additional common instruction for the following - // heuristics. + // heuristics. This heuristic is only accurate for single-succ blocks, so to + // make sure that during layout merging and duplicating don't crash, we check + // for that when merging during layout. unsigned EffectiveTailLen = CommonTailLen; if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && + (MBB1->succ_size() == 1 || !AfterPlacement) && !MBB1->back().isBarrier() && !MBB2->back().isBarrier()) ++EffectiveTailLen; @@ -682,7 +688,8 @@ minCommonTailLength, CommonTailLen, TrialBBI1, TrialBBI2, SuccBB, PredBB, - FuncletMembership)) { + FuncletMembership, + AfterBlockPlacement)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; Index: test/CodeGen/ARM/ifcvt4.ll =================================================================== --- test/CodeGen/ARM/ifcvt4.ll +++ test/CodeGen/ARM/ifcvt4.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s ; CHECK-LABEL: t: -; CHECK: subgt -; CHECK: suble +; CHECK-DAG: subgt +; CHECK-DAG: suble define i32 @t(i32 %a, i32 %b) { entry: %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] Index: test/CodeGen/Hexagon/rdf-copy.ll =================================================================== --- test/CodeGen/Hexagon/rdf-copy.ll +++ test/CodeGen/Hexagon/rdf-copy.ll @@ -17,7 +17,7 @@ ; CHECK: [[DST:r[0-9]+]] = [[SRC:r[0-9]+]] ; CHECK-DAG: memw([[SRC]] ; CHECK-NOT: memw([[DST]] -; CHECK-LABEL: LBB0_2 +; CHECK: %if.end target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" target triple = "hexagon" Index: test/CodeGen/X86/2008-02-18-TailMergingBug.ll =================================================================== --- test/CodeGen/X86/2008-02-18-TailMergingBug.ll +++ test/CodeGen/X86/2008-02-18-TailMergingBug.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16 +; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 14 ; PR1909 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1]