Index: llvm/include/llvm/Analysis/BranchProbabilityInfo.h =================================================================== --- llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -24,6 +24,7 @@ #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include #include #include @@ -41,6 +42,9 @@ class TargetLibraryInfo; class Value; +extern cl::opt LikelyBranchWeight; +extern cl::opt UnlikelyBranchWeight; + /// Analysis providing branch probability information. /// /// This is a function analysis which provides information on the relative Index: llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h +++ llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h @@ -17,7 +17,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/CommandLine.h" namespace llvm { @@ -32,8 +31,6 @@ PreservedAnalyses run(Function &F, FunctionAnalysisManager &); }; -extern cl::opt LikelyBranchWeight; -extern cl::opt UnlikelyBranchWeight; } #endif Index: llvm/lib/Analysis/BranchProbabilityInfo.cpp =================================================================== --- llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -57,6 +57,21 @@ cl::desc("The option to specify the name of the function " "whose branch probability info is printed.")); +// These default values are chosen to represent an extremely skewed outcome for +// a condition, but they leave some room for interpretation by later passes. +// +// If the documentation for __builtin_expect() was made explicit that it should +// only be used in extreme cases, we could make this ratio higher. As it stands, +// programmers may be using __builtin_expect() / llvm.expect to annotate that a +// branch is likely or unlikely to be taken. + +cl::opt llvm::LikelyBranchWeight( + "likely-branch-weight", cl::Hidden, cl::init(2000), + cl::desc("Weight of the branch likely to be taken (default = 2000)")); +cl::opt llvm::UnlikelyBranchWeight( + "unlikely-branch-weight", cl::Hidden, cl::init(1), + cl::desc("Weight of the branch unlikely to be taken (default = 1)")); + INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) Index: llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -34,25 +35,6 @@ STATISTIC(ExpectIntrinsicsHandled, "Number of 'expect' intrinsic instructions handled"); -// These default values are chosen to represent an extremely skewed outcome for -// a condition, but they leave some room for interpretation by later passes. -// -// If the documentation for __builtin_expect() was made explicit that it should -// only be used in extreme cases, we could make this ratio higher. As it stands, -// programmers may be using __builtin_expect() / llvm.expect to annotate that a -// branch is likely or unlikely to be taken. -// -// There is a known dependency on this ratio in CodeGenPrepare when transforming -// 'select' instructions. It may be worthwhile to hoist these values to some -// shared space, so they can be used directly by other passes. - -cl::opt llvm::LikelyBranchWeight( - "likely-branch-weight", cl::Hidden, cl::init(2000), - cl::desc("Weight of the branch likely to be taken (default = 2000)")); -cl::opt llvm::UnlikelyBranchWeight( - "unlikely-branch-weight", cl::Hidden, cl::init(1), - cl::desc("Weight of the branch unlikely to be taken (default = 1)")); - static std::tuple getBranchWeight(Intrinsic::ID IntrinsicID, CallInst *CI, int BranchCount) { if (IntrinsicID == Intrinsic::expect) { Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GuardUtils.h" @@ -2844,20 +2845,40 @@ // and deduce a glue that we need to use to join branch's conditions // to arrive at the common destination. static Optional> -CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { +shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI) { assert(BI && PBI && BI->isConditional() && PBI->isConditional() && "Both blocks must end with a conditional branches."); assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && "PredBB must be a predecessor of BB."); - if (PBI->getSuccessor(0) == BI->getSuccessor(0)) - return {{Instruction::Or, false}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) - return {{Instruction::And, false}}; - else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) - return {{Instruction::And, true}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) - return {{Instruction::Or, true}}; + // We have the potential to fold the conditions together, but if the + // predecessor branch is predictable, we may not want to merge them. + uint64_t TWeight, FWeight; + BranchProbability PBITrueProb, Likely; + if (PBI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) { + PBITrueProb = + BranchProbability::getBranchProbability(TWeight, TWeight + FWeight); + Likely = BranchProbability::getBranchProbability( + LikelyBranchWeight, LikelyBranchWeight + UnlikelyBranchWeight); + } + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::Or, false}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::And, false}}; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::And, true}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::Or, true}}; + } return None; } @@ -2871,7 +2892,7 @@ Instruction::BinaryOps Opc; bool InvertPredCond; std::tie(Opc, InvertPredCond) = - *CheckIfCondBranchesShareCommonDestination(BI, PBI); + *shouldFoldCondBranchesToCommonDestination(BI, PBI); LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); @@ -3059,8 +3080,8 @@ // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond; - if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) - std::tie(Opc, InvertPredCond) = *Recepie; + if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI)) + std::tie(Opc, InvertPredCond) = *Recipe; else continue; Index: llvm/test/Transforms/PGOProfile/chr.ll =================================================================== --- llvm/test/Transforms/PGOProfile/chr.ll +++ llvm/test/Transforms/PGOProfile/chr.ll @@ -1277,11 +1277,12 @@ ; CHECK-LABEL: @test_chr_14( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1 +; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1 +; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: entry.split.nonchr: ; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0 ; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16 ; CHECK: bb0.nonchr: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB1]] @@ -1912,7 +1913,7 @@ ; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [ ; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]] ; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]] -; CHECK-NEXT: ], !prof !20 +; CHECK-NEXT: ], !prof !19 ; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: call void @foo() @@ -2489,14 +2490,14 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20 ; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] Index: llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll +++ llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll @@ -636,16 +636,17 @@ ret i32 %outval } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_harmful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -668,16 +669,17 @@ ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_harmful_inverted( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1 +; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -701,6 +703,7 @@ } ; The probability threshold is set by a builtin_expect setting. +; In this example, we are just short of strongly expected, so speculate. define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_not_that_harmful( @@ -708,7 +711,7 @@ ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -731,13 +734,16 @@ ret void } +; The probability threshold is set by a builtin_expect setting. +; In this example, we are just short of strongly expected, so speculate. + define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_not_that_harmful_inverted( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -760,13 +766,15 @@ ret void } +; The 1st cmp is probably true, so speculating the 2nd is probably a win. + define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_useful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -789,13 +797,15 @@ ret void } +; The 1st cmp is probably false, so speculating the 2nd is probably a win. + define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_useful_inverted( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -849,16 +859,17 @@ ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_harmful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_FALSE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -881,16 +892,17 @@ ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_harmful_inverted( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 +; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -913,6 +925,9 @@ ret void } +; The probability threshold is set by a builtin_expect setting. +; In this example, we are just short of strongly expected, so speculate. + define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_not_that_harmful( ; CHECK-NEXT: entry: @@ -942,6 +957,9 @@ ret void } +; The probability threshold is set by a builtin_expect setting. +; In this example, we are just short of strongly expected, so speculate. + define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_not_that_harmful_inverted( ; CHECK-NEXT: entry: @@ -971,6 +989,8 @@ ret void } +; The 1st cmp is probably true, so speculating the 2nd is probably a win. + define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_useful( ; CHECK-NEXT: entry: @@ -1000,6 +1020,8 @@ ret void } +; The 1st cmp is probably false, so speculating the 2nd is probably a win. + define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_useful_inverted( ; CHECK-NEXT: entry: