Index: include/llvm/Support/BranchProbability.h =================================================================== --- include/llvm/Support/BranchProbability.h +++ include/llvm/Support/BranchProbability.h @@ -17,6 +17,7 @@ #include #include #include +#include #include namespace llvm { @@ -156,6 +157,13 @@ return Prob; } + BranchProbability sqrt() const { + double P = (double)N / (double)D; + P = std::sqrt(P); + BranchProbability Prob((uint32_t)(P * D)); + return Prob; + } + bool operator==(BranchProbability RHS) const { return N == RHS.N; } bool operator!=(BranchProbability RHS) const { return !(*this == RHS); } Index: lib/Target/PowerPC/PPCReduceCRLogicals.cpp =================================================================== --- lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -166,9 +166,27 @@ : *ThisMBB->succ_begin(); MachineBasicBlock *NewBRTarget = BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; - BranchProbability ProbToNewTarget = - !BSI.MBPI ? BranchProbability::getUnknown() - : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget); + + // It's impossible to know the precise branch probability after the split. + // But it still need to be reasonable, the whole probability to original + // targets should not be changed. Here we assume the two impacted branches + // have same probability. + BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br. + BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br. + ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown(); + ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown(); + if (BSI.MBPI) { + if (BSI.BranchToFallThrough) { + ProbFallThrough = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget).sqrt(); + ProbOrigTarget = ProbFallThrough; + ProbOrigFallThrough = ProbOrigTarget.getCompl(); + } else { + ProbFallThrough = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough).sqrt(); + ProbOrigFallThrough = ProbFallThrough; + ProbOrigTarget = ProbOrigFallThrough.getCompl(); + } + ProbToNewTarget = ProbFallThrough.getCompl(); + } // Create a new basic block. MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; @@ -180,11 +198,16 @@ // Move everything after SplitBefore into the new block. NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); NewMBB->transferSuccessors(ThisMBB); + if (!ProbOrigTarget.isUnknown()) { + auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget); + NewMBB->setSuccProbability(MBBI, ProbOrigTarget); + MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough); + NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough); + } - // Add the two successors to ThisMBB. The probabilities come from the - // existing blocks if available. + // Add the two successors to ThisMBB. ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); - ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl()); + ThisMBB->addSuccessor(NewMBB, ProbFallThrough); // Add the branches to ThisMBB. BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), Index: test/CodeGen/PowerPC/reduce_cr.ll =================================================================== --- test/CodeGen/PowerPC/reduce_cr.ll +++ test/CodeGen/PowerPC/reduce_cr.ll @@ -0,0 +1,88 @@ +; RUN: llc -O2 -ppc-reduce-cr-logicals -print-machine-bfi -o - %s 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-grtev4-linux-gnu" + +; First block frequency info +;CHECK: block-frequency-info: loop_test +;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12 +;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34 +;CHECK-NEXT: - BB2[test1]: float = 1.6667, int = 21 +;CHECK-NEXT: - BB3[optional1]: float = 0.625, int = 8 + +;CHECK: block-frequency-info: loop_test +;CHECK: block-frequency-info: loop_test +;CHECK: block-frequency-info: loop_test + +; Last block frequency info +;CHECK: block-frequency-info: loop_test +;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12 +;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34 +;CHECK-NEXT: - BB2[for.check]: float = 2.1082, int = 26 +;CHECK-NEXT: - BB3[test1]: float = 1.6667, int = 21 +;CHECK-NEXT: - BB4[optional1]: float = 0.625, int = 8 + + +define void @loop_test(i32* %tags, i32 %count) { +entry: + br label %for.check +for.check: + %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] + %done.count = icmp ugt i32 %count.loop, 0 + %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count + %tag = load i32, i32* %tag_ptr + %done.tag = icmp eq i32 %tag, 0 + %done = and i1 %done.count, %done.tag + br i1 %done, label %test1, label %exit, !prof !1 +test1: + %tagbit1 = and i32 %tag, 1 + %tagbit1eq0 = icmp eq i32 %tagbit1, 0 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 +optional1: + call void @a() + call void @a() + call void @a() + call void @a() + br label %test2 +test2: + %tagbit2 = and i32 %tag, 2 + %tagbit2eq0 = icmp eq i32 %tagbit2, 0 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 +optional2: + call void @b() + call void @b() + call void @b() + call void @b() + br label %test3 +test3: + %tagbit3 = and i32 %tag, 4 + %tagbit3eq0 = icmp eq i32 %tagbit3, 0 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 +optional3: + call void @c() + call void @c() + call void @c() + call void @c() + br label %test4 +test4: + %tagbit4 = and i32 %tag, 8 + %tagbit4eq0 = icmp eq i32 %tagbit4, 0 + br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 +optional4: + call void @d() + call void @d() + call void @d() + call void @d() + br label %for.latch +for.latch: + %count.sub = sub i32 %count.loop, 1 + br label %for.check +exit: + ret void +} + +declare void @a() +declare void @b() +declare void @c() +declare void @d() + +!1 = !{!"branch_weights", i32 5, i32 3} Index: test/CodeGen/PowerPC/select-i1-vs-i1.ll =================================================================== --- test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs -tail-dup-placement=false < %s | FileCheck %s ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs \ ; RUN: -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s target datalayout = "E-m:e-i64:64-n32:64"