Index: llvm/trunk/include/llvm/Support/BranchProbability.h =================================================================== --- llvm/trunk/include/llvm/Support/BranchProbability.h +++ llvm/trunk/include/llvm/Support/BranchProbability.h @@ -118,6 +118,13 @@ return *this; } + BranchProbability &operator/=(BranchProbability RHS) { + assert(N != UnknownN && RHS.N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + N = (static_cast(N) * D + RHS.N / 2) / RHS.N; + return *this; + } + BranchProbability &operator/=(uint32_t RHS) { assert(N != UnknownN && "Unknown probability cannot participate in arithmetics."); @@ -150,6 +157,12 @@ return Prob; } + BranchProbability operator/(BranchProbability RHS) const { + BranchProbability Prob(*this); + Prob /= RHS; + return Prob; + } + BranchProbability operator/(uint32_t RHS) const { BranchProbability Prob(*this); Prob /= RHS; Index: llvm/trunk/lib/Target/PowerPC/PPCReduceCRLogicals.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -166,9 +166,33 @@ : *ThisMBB->succ_begin(); MachineBasicBlock *NewBRTarget = BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; - BranchProbability ProbToNewTarget = - !BSI.MBPI ? BranchProbability::getUnknown() - : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget); + + // It's impossible to know the precise branch probability after the split. + // But it still needs to be reasonable, the whole probability to original + // targets should not be changed. + // After split NewBRTarget will get two incoming edges. Assume P0 is the + // original branch probability to NewBRTarget, P1 and P2 are new branch + // probabilies to NewBRTarget after split. If the two edge frequencies are + // same, then + // F * P1 = F * P0 / 2 ==> P1 = P0 / 2 + // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1) + BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br. + BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br. + ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown(); + ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown(); + if (BSI.MBPI) { + if (BSI.BranchToFallThrough) { + ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2; + ProbFallThrough = ProbToNewTarget.getCompl(); + ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl(); + ProbOrigTarget = ProbOrigFallThrough.getCompl(); + } else { + ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2; + ProbFallThrough = ProbToNewTarget.getCompl(); + ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl(); + ProbOrigFallThrough = ProbOrigTarget.getCompl(); + } + } // Create a new basic block. MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; @@ -180,11 +204,16 @@ // Move everything after SplitBefore into the new block. NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); NewMBB->transferSuccessors(ThisMBB); + if (!ProbOrigTarget.isUnknown()) { + auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget); + NewMBB->setSuccProbability(MBBI, ProbOrigTarget); + MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough); + NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough); + } - // Add the two successors to ThisMBB. The probabilities come from the - // existing blocks if available. + // Add the two successors to ThisMBB. ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); - ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl()); + ThisMBB->addSuccessor(NewMBB, ProbFallThrough); // Add the branches to ThisMBB. BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), Index: llvm/trunk/test/CodeGen/PowerPC/reduce_cr.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/reduce_cr.ll +++ llvm/trunk/test/CodeGen/PowerPC/reduce_cr.ll @@ -0,0 +1,88 @@ +; RUN: llc -O2 -ppc-reduce-cr-logicals -print-machine-bfi -o - %s 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-grtev4-linux-gnu" + +; First block frequency info +;CHECK: block-frequency-info: loop_test +;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12 +;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34 +;CHECK-NEXT: - BB2[test1]: float = 1.6667, int = 21 +;CHECK-NEXT: - BB3[optional1]: float = 0.625, int = 8 + +;CHECK: block-frequency-info: loop_test +;CHECK: block-frequency-info: loop_test +;CHECK: block-frequency-info: loop_test + +; Last block frequency info +;CHECK: block-frequency-info: loop_test +;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12 +;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34 +;CHECK-NEXT: - BB2[for.check]: float = 2.1667, int = 27 +;CHECK-NEXT: - BB3[test1]: float = 1.6667, int = 21 +;CHECK-NEXT: - BB4[optional1]: float = 0.625, int = 8 + + +define void @loop_test(i32* %tags, i32 %count) { +entry: + br label %for.check +for.check: + %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] + %done.count = icmp ugt i32 %count.loop, 0 + %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count + %tag = load i32, i32* %tag_ptr + %done.tag = icmp eq i32 %tag, 0 + %done = and i1 %done.count, %done.tag + br i1 %done, label %test1, label %exit, !prof !1 +test1: + %tagbit1 = and i32 %tag, 1 + %tagbit1eq0 = icmp eq i32 %tagbit1, 0 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 +optional1: + call void @a() + call void @a() + call void @a() + call void @a() + br label %test2 +test2: + %tagbit2 = and i32 %tag, 2 + %tagbit2eq0 = icmp eq i32 %tagbit2, 0 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 +optional2: + call void @b() + call void @b() + call void @b() + call void @b() + br label %test3 +test3: + %tagbit3 = and i32 %tag, 4 + %tagbit3eq0 = icmp eq i32 %tagbit3, 0 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 +optional3: + call void @c() + call void @c() + call void @c() + call void @c() + br label %test4 +test4: + %tagbit4 = and i32 %tag, 8 + %tagbit4eq0 = icmp eq i32 %tagbit4, 0 + br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 +optional4: + call void @d() + call void @d() + call void @d() + call void @d() + br label %for.latch +for.latch: + %count.sub = sub i32 %count.loop, 1 + br label %for.check +exit: + ret void +} + +declare void @a() +declare void @b() +declare void @c() +declare void @d() + +!1 = !{!"branch_weights", i32 5, i32 3} Index: llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ llvm/trunk/test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs -tail-dup-placement=false < %s | FileCheck %s ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs \ ; RUN: -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64"