Index: include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- include/llvm/Target/TargetSubtargetInfo.h +++ include/llvm/Target/TargetSubtargetInfo.h @@ -136,6 +136,9 @@ return 0; } + /// \brief True if the subtarget supports branch coalescing + virtual bool enableBranchCoalescing() const; + /// \brief True if the subtarget should run MachineScheduler after aggressive /// coalescing. /// Index: lib/CodeGen/BranchCoalescing.cpp =================================================================== --- lib/CodeGen/BranchCoalescing.cpp +++ lib/CodeGen/BranchCoalescing.cpp @@ -157,9 +157,7 @@ bool validateCandidates(CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) const; - static bool isBranchCoalescingEnabled() { - return EnableBranchCoalescing == cl::BOU_TRUE; - } + static bool isBranchCoalescingEnabled(const MachineFunction &MF); public: static char ID; @@ -688,7 +686,7 @@ bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction()) || MF.empty() || - !isBranchCoalescingEnabled()) + !isBranchCoalescingEnabled(MF)) return false; bool didSomething = false; @@ -756,3 +754,15 @@ DEBUG(dbgs() << "Finished Branch Coalescing\n"); return didSomething; } + +bool BranchCoalescing::isBranchCoalescingEnabled(const MachineFunction &MF) { + switch (EnableBranchCoalescing) { + case cl::BOU_UNSET: + return MF.getSubtarget().enableBranchCoalescing(); + case cl::BOU_TRUE: + return true; + case cl::BOU_FALSE: + return false; + } + llvm_unreachable("Invalid branch coalescing state"); +} Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -719,7 +719,8 @@ addPass(&MachineCSEID, false); // Coalesce basic blocks with the same branch condition - addPass(&BranchCoalescingID); + if (getOptLevel() != CodeGenOpt::None) + addPass(&BranchCoalescingID); addPass(&MachineSinkingID); Index: lib/CodeGen/TargetSubtargetInfo.cpp =================================================================== --- lib/CodeGen/TargetSubtargetInfo.cpp +++ lib/CodeGen/TargetSubtargetInfo.cpp @@ -35,6 +35,10 @@ return true; } +bool TargetSubtargetInfo::enableBranchCoalescing() const { + return false; +} + bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -302,6 +302,9 @@ /// but may expand the ISEL instruction later. bool enableEarlyIfConversion() const override { return true; } + // Branch coalesce support + bool enableBranchCoalescing() const override; + // Scheduling customization. bool enableMachineScheduler() const override; // This overrides the PostRAScheduler bit in the SchedModel for each CPU. Index: lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- lib/Target/PowerPC/PPCSubtarget.cpp +++ lib/Target/PowerPC/PPCSubtarget.cpp @@ -179,6 +179,10 @@ } } +bool PPCSubtarget::enableBranchCoalescing() const { + return (isSVR4ABI() && isPPC64()); +} + bool PPCSubtarget::enableMachineScheduler() const { // Enable MI scheduling for the embedded cores. // FIXME: Enable this for all cores (some additional modeling Index: test/CodeGen/PowerPC/branch_coalesce.ll =================================================================== --- test/CodeGen/PowerPC/branch_coalesce.ll +++ test/CodeGen/PowerPC/branch_coalesce.ll @@ -1,26 +1,19 @@ -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -enable-branch-coalesce=false -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s +; RUN: llc -enable-branch-coalesce=false -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s ; Function Attrs: nounwind define double @testBranchCoal(double %a, double %b, double %c, i32 %x) { -entry: - %test = icmp eq i32 %x, 0 - %tmp1 = select i1 %test, double %a, double 2.000000e-03 - %tmp2 = select i1 %test, double %b, double 0.000000e+00 - %tmp3 = select i1 %test, double %c, double 5.000000e-03 - %res1 = fadd double %tmp1, %tmp2 - %result = fadd double %res1, %tmp3 - ret double %result - -; CHECK-LABEL: @testBranchCoal +; CHECK-LABEL: @testBranchCoal ; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0 ; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]] ; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha ; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha ; CHECK-DAG: xxlxor 2, 2, 2 -; CHECK-NOT: beq -; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] +; CHECK-NOT: beq +; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] ; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] ; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]] ; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]] @@ -28,4 +21,40 @@ ; CHECK: xsadddp 0, 1, 2 ; CHECK: xsadddp 1, 0, 3 ; CHECK: blr + +; CHECK-NOCOALESCE-LABEL: testBranchCoal: +; CHECK-NOCOALESCE: # BB#0: # %entry +; CHECK-NOCOALESCE-NEXT: cmplwi 0, 6, 0 +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_5 +; CHECK-NOCOALESCE-NEXT: # BB#1: # %entry +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_6 +; CHECK-NOCOALESCE-NEXT: .LBB0_2: # %entry +; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_4 +; CHECK-NOCOALESCE-NEXT: .LBB0_3: # %entry +; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NOCOALESCE-NEXT: lxsdx 3, 0, 3 +; CHECK-NOCOALESCE-NEXT: .LBB0_4: # %entry +; CHECK-NOCOALESCE-NEXT: xsadddp 0, 1, 2 +; CHECK-NOCOALESCE-NEXT: xsadddp 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: blr +; CHECK-NOCOALESCE-NEXT: .LBB0_5: # %entry +; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NOCOALESCE-NEXT: lxsdx 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_2 +; CHECK-NOCOALESCE-NEXT: .LBB0_6: # %entry +; CHECK-NOCOALESCE-NEXT: xxlxor 2, 2, 2 +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_3 +; CHECK-NOCOALESCE-NEXT: b .LBB0_4 + entry: + + %test = icmp eq i32 %x, 0 + %tmp1 = select i1 %test, double %a, double 2.000000e-03 + %tmp2 = select i1 %test, double %b, double 0.000000e+00 + %tmp3 = select i1 %test, double %c, double 5.000000e-03 + + %res1 = fadd double %tmp1, %tmp2 + %result = fadd double %res1, %tmp3 + ret double %result } Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -1026,10 +1026,6 @@ %cond = select i1 %cmp3, ppc_fp128 %a1, ppc_fp128 %a2 ret ppc_fp128 %cond -; FIXME: Because of the way that the late SELECT_* pseudo-instruction expansion -; works, we end up with two blocks with the same predicate. These could be -; combined. - ; CHECK-LABEL: @testppc_fp128eq ; CHECK-DAG: fcmpu {{[0-9]+}}, 6, 8 ; CHECK-DAG: fcmpu {{[0-9]+}}, 5, 7 @@ -1040,10 +1036,8 @@ ; CHECK: crxor [[REG3:[0-9]+]], [[REG2]], [[REG1]] ; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]] ; CHECK: fmr 11, 9 -; CHECK: .LBB[[BB1]]: -; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]] ; CHECK: fmr 12, 10 -; CHECK: .LBB[[BB2]]: +; CHECK: .LBB[[BB1]]: ; CHECK-DAG: fmr 1, 11 ; CHECK-DAG: fmr 2, 12 ; CHECK: blr