Index: lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h =================================================================== --- lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -55,7 +55,12 @@ PRED_BIT_SET = 1024, PRED_BIT_UNSET = 1025 }; - + + // Bit mask for branch taken/not-taken hint + const unsigned PRED_NOT_TAKEN_HINT = 0x2; + const unsigned PRED_TAKEN_HINT = 0x3; + const unsigned PRED_HINT_MASK = 0x3; + /// Invert the specified predicate. != -> ==, < -> >=. Predicate InvertPredicate(Predicate Opcode); Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -52,6 +52,13 @@ "bit permutations"), cl::Hidden); +static cl::opt EnableBranchHint( + "ppc-enable-branch-hint", + cl::desc("Enable branch with hint codegen on ppc for cases which are easy to " + "predict at static time, e.g. c++ throw statement is very likely not " + "to be taken, or never returned function, or programer's hint"), + cl::Hidden); + namespace llvm { void initializePPCDAGToDAGISelPass(PassRegistry&); } @@ -2353,6 +2360,48 @@ return Result; } +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" + +static void tryToUseBranchHint(unsigned &PCC, + FunctionLoweringInfo *FuncInfo, SDNode *N) { + if (!FuncInfo->BPI) return; + + const uint32_t Threshold = 16; + const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); + const TerminatorInst *BBTerm = BB->getTerminator(); + + if (BBTerm->getNumSuccessors() != 2) return; + + const BasicBlock *TBB = BBTerm->getSuccessor(0); + const BasicBlock *FBB = BBTerm->getSuccessor(1); + + uint32_t TWeight = FuncInfo->BPI->getEdgeWeight(BB, TBB); + uint32_t FWeight = FuncInfo->BPI->getEdgeWeight(BB, FBB); + + // Why 16, because branch ratio for '__builtin_expect' is 64:4 = 16. + // For c++ throw statement or call never returned function, the BasicBlock + // will terminated by an unreachable instruction, and the branch ratio is + // 1048575:1 + if (std::max(TWeight, FWeight) / std::min({TWeight, FWeight, 1u}) < Threshold) + return; + + DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" + << BB->getName() << "'\n" + << " -> " << TBB->getName() << ": " << TWeight << "\n" + << " -> " << FBB->getName() << ": " << FWeight << "\n"); + + // Op #4 is the Dest MBB + const BasicBlockSDNode *BBDN = cast(N->getOperand(4)); + + // If Dest BasicBlock is False-BasicBlock (FBB), swap branch weight, + // because we want 'TWeight' stands for 'branch weight' to Dest BasicBlock + if (BBDN->getBasicBlock()->getBasicBlock() != TBB) + std::swap(TWeight, FWeight); + + if (TWeight > FWeight) PCC |= PPC::PRED_TAKEN_HINT; + else PCC |= PPC::PRED_NOT_TAKEN_HINT; +} // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. @@ -2871,6 +2920,9 @@ BitComp, N->getOperand(4), N->getOperand(0)); } + if (EnableBranchHint) + tryToUseBranchHint(PCC, FuncInfo, N); + SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, N->getOperand(4), N->getOperand(0) }; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -709,6 +709,10 @@ !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) return false; + // isel is for branch without branch-hint only (0x2: none-taken, 0x3: taken) + if (Cond[0].getImm() & PPC::PRED_HINT_MASK) + return false; + // FIXME: These numbers are for the A2, how well they work for other cores is // an open question. On the A2, the isel instruction has a 2-cycle latency // but single-cycle throughput. These numbers are used in combination with Index: test/CodeGen/PowerPC/branch-hint.ll =================================================================== --- test/CodeGen/PowerPC/branch-hint.ll +++ test/CodeGen/PowerPC/branch-hint.ll @@ -0,0 +1,132 @@ +; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -ppc-enable-branch-hint | FileCheck %s -check-prefix=CHECK-HINT +define void @branch_hint_1(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() #0 + unreachable + +if.end: + ret void + +; CHECK-LABEL: branch_hint_1: +; CHECK: beq + +; CHECK-HINT-LABEL: branch_hint_1: +; CHECK-HINT: beq- +} + +define void @branch_hint_2(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + ret void + +if.end: + tail call void @foo() #0 + unreachable + +; CHECK-LABEL: branch_hint_2: +; CHECK: bne + +; CHECK-HINT-LABEL: branch_hint_2: +; CHECK-HINT: bne- +} + +declare void @foo() +attributes #0 = { noreturn } + +define void @branch_hint_3(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !0 + +if.then: + ret void + +if.end: + call void @goo() + ret void + +; CHECK-LABEL: branch_hint_3: +; CHECK: bne + +; CHECK-HINT-LABEL: branch_hint_3: +; CHECK-HINT: bne- +} + +!0 = !{!"branch_weights", i32 64, i32 4} + +define void @branch_hint_4(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + ret void + +if.end: + call void @goo() + ret void + +; CHECK-HINT-LABEL: branch_hint_4: +; CHECK-HINT: bne +} + +!1 = !{!"branch_weights", i32 64, i32 8} + +define void @branch_hint_5(i32 %src) { +entry: + %cmp = icmp eq i32 %src, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + ret void + +if.end: + call void @goo() + ret void + +; CHECK-HINT-LABEL: branch_hint_5: +; CHECK-HINT: beq +} + +declare void @goo() + +define void @branch_hint_6(i32 %src1, i32 %src2, i32 %src3) { +entry: + %cmp = icmp eq i32 %src1, 0 + br i1 %cmp, label %if.end.6, label %if.end, !prof !3 + +if.end: + %cmp1 = icmp eq i32 %src2, 0 + br i1 %cmp1, label %if.end.3, label %if.then.2 + +if.then.2: + tail call void @foo() #0 + unreachable + +if.end.3: + %cmp4 = icmp eq i32 %src3, 1 + br i1 %cmp4, label %if.then.5, label %if.end.6 + +if.then.5: + tail call void @foo() #0 + unreachable + +if.end.6: + ret void + +; CHECK-HINT-LABEL: branch_hint_6: +; CHECK-HINT: bne- +; CHECK-HINT: bne- +; CHECK-HINT: bne+ +} + +!3 = !{!"branch_weights", i32 64, i32 4} +