Index: lib/Target/AArch64/AArch64ConditionalCompares.cpp =================================================================== --- lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -139,6 +140,7 @@ const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; + const MachineBranchProbabilityInfo *MBPI; public: /// The first block containing a conditional branch, dominating everything @@ -186,8 +188,10 @@ public: /// runOnMachineFunction - Initialize per-function data structures. - void runOnMachineFunction(MachineFunction &MF) { + void runOnMachineFunction(MachineFunction &MF, + const MachineBranchProbabilityInfo *MBPI) { this->MF = &MF; + this->MBPI = MBPI; TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -564,8 +568,39 @@ // All CmpBB instructions are moved into Head, and CmpBB is deleted. // Update the CFG first. updateTailPHIs(); - Head->removeSuccessor(CmpBB, true); - CmpBB->removeSuccessor(Tail, true); + + // Save successor probabilties before removing CmpBB and Tail from their + // parents. + BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB); + BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail); + + Head->removeSuccessor(CmpBB); + CmpBB->removeSuccessor(Tail); + + // If Head and CmpBB had successor probabilties, udpate the probabilities to + // reflect the ccmp-conversion. + if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) { + + // Head is allowed two successors. We've removed CmpBB, so the remaining + // sucessor is Tail. We need to increase the sucessor probability for Tail + // to account for the CmpBB path we removed. + // + // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB). + BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail); + Head->setSuccProbability(Head->succ_begin(), + Head2Tail + Head2CmpBB * CmpBB2Tail); + + // We will transfer sucessors of CmpBB to Head in a moment without + // normalizing the sucessor probabilities. Set the successor probabilites + // before doing so. + // + // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB). + for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) { + BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I); + CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I); + } + } + Head->transferSuccessorsAndUpdatePHIs(CmpBB); DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); TII->removeBranch(*Head); @@ -717,6 +752,7 @@ namespace { class AArch64ConditionalCompares : public MachineFunctionPass { + const MachineBranchProbabilityInfo *MBPI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MCSchedModel SchedModel; @@ -753,6 +789,7 @@ INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", @@ -763,6 +800,7 @@ } void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -892,12 +930,13 @@ MRI = &MF.getRegInfo(); DomTree = &getAnalysis(); Loops = getAnalysisIfAvailable(); + MBPI = &getAnalysis(); Traces = &getAnalysis(); MinInstr = nullptr; MinSize = MF.getFunction()->optForMinSize(); bool Changed = false; - CmpConv.runOnMachineFunction(MF); + CmpConv.runOnMachineFunction(MF, MBPI); // Visit blocks in dominator tree pre-order. The pre-order enables multiple // cmp-conversions from the same head block. Index: test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ccmp.ll +++ test/CodeGen/AArch64/arm64-ccmp.ll @@ -108,9 +108,9 @@ ; CHECK: cmp w0, #1 ; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0 ; CHECK: ccmp [[DIVRES]], #16, #0, ge -; CHECK: b.gt [[BLOCK:LBB[0-9_]+]] -; CHECK: bl _foo +; CHECK: b.le [[BLOCK:LBB[0-9_]+]] ; CHECK: [[BLOCK]]: +; CHECK: bl _foo ; CHECK: orr w0, wzr, #0x7 define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp { entry: @@ -135,7 +135,7 @@ ; CHECK: cmp ; CHECK-NOT: b. ; CHECK: fccmp {{.*}}, #8, ge -; CHECK: b.lt +; CHECK: b.ge define i32 @single_fcmp(i32 %a, float %b) nounwind ssp { entry: %cmp = icmp sgt i32 %a, 0 Index: test/CodeGen/AArch64/ccmp-successor-probs.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/ccmp-successor-probs.mir @@ -0,0 +1,46 @@ +# RUN: llc -o - %s -mtriple=aarch64--linux-gnu -mcpu=falkor -run-pass=aarch64-ccmp | FileCheck %s +--- +# This test checks that successor probabilties are properly updated after a +# ccmp-conversion. +# +# CHECK-LABEL: name: aarch64-ccmp-successor-probs +# CHECK: bb.0: +# CHECK-NEXT: successors: %bb.2(0x04000000), %bb.3(0x7c000000) +# CHECK: CCMPXr %5, %4, 0, 10, implicit-def %nzcv, implicit %nzcv +# +name: aarch64-ccmp-successor-probs +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64 } + - { id: 3, class: gpr64 } + - { id: 4, class: gpr64 } + - { id: 5, class: gpr64 } + - { id: 6, class: gpr64 } + - { id: 7, class: gpr64 } +body : | + bb.0: + successors: %bb.1(0x7e000000), %bb.2(0x02000000) + + %0 = LDRXui killed %x0, 69 + %1 = COPY %xzr + %2 = SUBSXrr %1, %0, implicit-def dead %nzcv + %3 = SUBSXri %x1, 1, 0, implicit-def dead %nzcv + %4 = COPY %0 + %5 = COPY %3 + %6 = SUBSXrr %x1, killed %2, implicit-def %nzcv + Bcc 11, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x02082082), %bb.3(0x7df7df7e) + + %7 = SUBSXrr %5, %4, implicit-def %nzcv + Bcc 12, %bb.2, implicit %nzcv + B %bb.3 + + bb.2: + successors: %bb.3(0x80000000) + + bb.3: +...