Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -600,6 +600,22 @@ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) return true; + // If both blocks are identical and end in a branch, merge them unless they + // both have a fallthrough predecessor and successor. + // We can only do this after block placement because it depends on whether + // there are fallthroughs, and we don't know until after layout. + if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) { + auto BothFallThrough = [](MachineBasicBlock *MBB) { + if (MBB->succ_size() != 0 && !MBB->canFallThrough()) + return false; + MachineFunction::iterator I(MBB); + MachineFunction *MF = MBB->getParent(); + return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough(); + }; + if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2)) + return true; + } + // If both blocks have an unconditional branch temporarily stripped out, // count that as an additional common instruction for the following // heuristics. This heuristic is only accurate for single-succ blocks, so to Index: test/CodeGen/X86/tail-merge-identical.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/tail-merge-identical.ll @@ -0,0 +1,41 @@ +; RUN: llc -o - -verify-machineinstrs %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@data = external global [3 x i32], align 4 +@store = external global i32, align 4 + +; %else1 and %then2 end up lowering to identical blocks. These blocks should be +; merged during tail-merging. +; CHECK-LABEL: merge_identical_blocks +; CHECK: movl $data+4 +; CHECK-NOT: movl $data+4 +; CHECK: retq +define void @merge_identical_blocks(i1 %a, i1 %b) { +entry: + br label %if1 + +if1: ; predfs = %entry + br i1 %a, label %else1, label %if2 + +else1: ; preds = %if1 + %ptr.else1 = getelementptr inbounds [3 x i32], [3 x i32]* @data, i64 0, i32 1 + br label %phi_join + +if2: ; preds = %if1 + br i1 %b, label %then2, label %else2 + +then2: ; preds = %if2 + %ptr.then2 = getelementptr inbounds [3 x i32], [3 x i32]* @data, i64 0, i32 1 + br label %phi_join + +else2: ; preds = %if2 + %ptr.else2 = getelementptr inbounds [3 x i32], [3 x i32]* @data, i64 0, i32 2 + br label %phi_join + +phi_join: ; preds = %else1, %then2, %else2 + %val.ptr = phi i32* [ %ptr.else1, %else1 ], [ %ptr.then2, %then2 ], [ %ptr.else2, %else2 ] + %val = load i32, i32* %val.ptr, align 4 + store i32 %val, i32* @store, align 4 + ret void +}