diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2354,6 +2354,24 @@ return HaveRewritablePHIs; } +// Check if the branch is non-unpredictable, and has a predictable behaviour. +static bool IsBranchPredictable(BranchInst *BI, + const TargetTransformInfo &TTI) { + if (BI->getMetadata(LLVMContext::MD_unpredictable)) + return false; + + uint64_t TWeight, FWeight; + if (!BI->extractProfMetadata(TWeight, FWeight) || (TWeight + FWeight) == 0) + return false; + + BranchProbability BITrueProb = + BranchProbability::getBranchProbability(TWeight, TWeight + FWeight); + BranchProbability BIFalseProb = BITrueProb.getCompl(); + + BranchProbability Likely = TTI.getPredictableBranchThreshold(); + return BITrueProb >= Likely || BIFalseProb >= Likely; +} + /// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -2412,19 +2430,9 @@ } assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); - // If the branch is non-unpredictable, and is predicted to *not* branch to - // the `then` block, then avoid speculating it. - if (!BI->getMetadata(LLVMContext::MD_unpredictable)) { - uint64_t TWeight, FWeight; - if (BI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) { - uint64_t EndWeight = Invert ? TWeight : FWeight; - BranchProbability BIEndProb = - BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight); - BranchProbability Likely = TTI.getPredictableBranchThreshold(); - if (BIEndProb >= Likely) - return false; - } - } + // Avoid speculating predictable branches. + if (IsBranchPredictable(BI, TTI)) + return false; // Keep a count of how many times instructions are used within ThenBB when // they are candidates for sinking into ThenBB. Specifically: @@ -2795,30 +2803,9 @@ assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) && "Will have either one or two blocks to speculate."); - // If the branch is non-unpredictable, see if we either predictably jump to - // the merge bb (if we have only a single 'then' block), or if we predictably - // jump to one specific 'then' block (if we have two of them). - // It isn't beneficial to speculatively execute the code - // from the block that we know is predictably not entered. - if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) { - uint64_t TWeight, FWeight; - if (DomBI->extractProfMetadata(TWeight, FWeight) && - (TWeight + FWeight) != 0) { - BranchProbability BITrueProb = - BranchProbability::getBranchProbability(TWeight, TWeight + FWeight); - BranchProbability Likely = TTI.getPredictableBranchThreshold(); - BranchProbability BIFalseProb = BITrueProb.getCompl(); - if (IfBlocks.size() == 1) { - BranchProbability BIBBProb = - DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb; - if (BIBBProb >= Likely) - return false; - } else { - if (BITrueProb >= Likely || BIFalseProb >= Likely) - return false; - } - } - } + // Avoid speculating predictable branches. + if (IsBranchPredictable(DomBI, TTI)) + return false; // Don't try to fold an unreachable block. For example, the phi node itself // can't be the candidate if-condition for a select that we want to form. diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -471,14 +471,15 @@ ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP10]], i32 [[SUM1_NONCHR]], i32 [[TMP11]], !prof [[PROF16]] ; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof [[PROF16]] +; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP0]], 8 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP15]], i32 44, i32 88 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP15]], i32 44, i32 88, !prof [[PROF16]] ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] -; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP13]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof [[PROF16]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: @@ -571,14 +572,15 @@ ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP13]], i32 [[SUM1_NONCHR]], i32 [[TMP14]], !prof [[PROF16]] ; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[SUM0]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof [[PROF16]] +; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP0]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP17]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP18]], i32 44, i32 88 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP18]], i32 44, i32 88, !prof [[PROF16]] ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] -; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP16]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof [[PROF16]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: @@ -668,14 +670,15 @@ ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof [[PROF16]] ; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[J0]], 4 ; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0 +; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof [[PROF16]] +; CHECK: bb1.nonchr: ; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[I0]], 8 ; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88, !prof [[PROF16]] ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] -; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[V10_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof [[PROF16]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: @@ -1753,14 +1756,15 @@ ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM0]], 85 ; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM0]], i32 [[TMP8]], !prof [[PROF16]] +; CHECK-NEXT: br i1 [[TMP7]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof [[PROF16]] +; CHECK: bb1.nonchr: ; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP0]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP10]], i32 44, i32 88 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP10]], i32 44, i32 88, !prof [[PROF16]] ; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] -; CHECK-NEXT: [[SUM5_NONCHR:%.*]] = select i1 [[TMP7]], i32 [[SUM2_NONCHR]], i32 [[SUM4_NONCHR]], !prof [[PROF16]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM5_NONCHR]], [[BB0_NONCHR]] ] +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] ; CHECK-NEXT: ret i32 [[SUM6]] ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/fold-two-entry-phi-node-with-one-block-profmd.ll b/llvm/test/Transforms/SimplifyCFG/fold-two-entry-phi-node-with-one-block-profmd.ll --- a/llvm/test/Transforms/SimplifyCFG/fold-two-entry-phi-node-with-one-block-profmd.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-two-entry-phi-node-with-one-block-profmd.ll @@ -34,8 +34,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[END:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: cond.true: ; CHECK-NEXT: [[V0:%.*]] = add i32 [[C:%.*]], [[D:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 [[V0]], i32 0, !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[V0]], [[COND_TRUE]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @sideeffect1() ; CHECK-NEXT: ret i32 [[RES]] ; diff --git a/llvm/test/Transforms/SimplifyCFG/speculatively-execute-block-profmd.ll b/llvm/test/Transforms/SimplifyCFG/speculatively-execute-block-profmd.ll --- a/llvm/test/Transforms/SimplifyCFG/speculatively-execute-block-profmd.ll +++ b/llvm/test/Transforms/SimplifyCFG/speculatively-execute-block-profmd.ll @@ -48,11 +48,12 @@ ; CHECK: dispatch: ; CHECK-NEXT: call void @sideeffect1() ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[END]], !prof [[PROF0:![0-9]+]] +; CHECK: cond.true: ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[A]], [[B]] -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP]], i32 [[VAL]], i32 0, !prof [[PROF0:![0-9]+]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[DISPATCH]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ 0, [[DISPATCH]] ], [ [[VAL]], [[COND_TRUE]] ] ; CHECK-NEXT: call void @sideeffect2() ; CHECK-NEXT: ret i32 [[RES]] ;