diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -63,6 +63,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -2840,30 +2841,52 @@ } } -// Determine if the two branches share a common destination, -// and deduce a glue that we need to use to join branch's conditions -// to arrive at the common destination. +/// Determine if the two branches share a common destination and deduce a glue +/// that joins branch's conditions to arrive at the common destination if that +/// would be profitable. static Optional> -CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) { +shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, + const TargetTransformInfo *TTI) { assert(BI && PBI && BI->isConditional() && PBI->isConditional() && "Both blocks must end with a conditional branches."); assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && "PredBB must be a predecessor of BB."); - if (PBI->getSuccessor(0) == BI->getSuccessor(0)) - return {{Instruction::Or, false}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) - return {{Instruction::And, false}}; - else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) - return {{Instruction::And, true}}; - else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) - return {{Instruction::Or, true}}; + // We have the potential to fold the conditions together, but if the + // predecessor branch is predictable, we may not want to merge them. + uint64_t PTWeight, PFWeight; + BranchProbability PBITrueProb, Likely; + if (PBI->extractProfMetadata(PTWeight, PFWeight) && + (PTWeight + PFWeight) != 0) { + PBITrueProb = + BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight); + Likely = TTI->getPredictableBranchThreshold(); + } + + if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::Or, false}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::And, false}}; + } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { + // Speculate the 2nd condition unless the 1st is probably true. + if (PBITrueProb.isUnknown() || PBITrueProb < Likely) + return {{Instruction::And, true}}; + } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { + // Speculate the 2nd condition unless the 1st is probably false. + if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) + return {{Instruction::Or, true}}; + } return None; } -static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, +static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, - MemorySSAUpdater *MSSAU) { + MemorySSAUpdater *MSSAU, + const TargetTransformInfo *TTI) { BasicBlock *BB = BI->getParent(); BasicBlock *PredBlock = PBI->getParent(); @@ -2871,7 +2894,7 @@ Instruction::BinaryOps Opc; bool InvertPredCond; std::tie(Opc, InvertPredCond) = - *CheckIfCondBranchesShareCommonDestination(BI, PBI); + *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI); LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); @@ -3059,8 +3082,8 @@ // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond; - if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI)) - std::tie(Opc, InvertPredCond) = *Recepie; + if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI)) + std::tie(Opc, InvertPredCond) = *Recipe; else continue; @@ -3077,7 +3100,7 @@ continue; } - return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU); + return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI); } return Changed; } diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -1277,11 +1277,12 @@ ; CHECK-LABEL: @test_chr_14( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 -; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1 +; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1 +; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: entry.split.nonchr: ; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0 ; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]] -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16 ; CHECK: bb0.nonchr: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB1]] @@ -1912,7 +1913,7 @@ ; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [ ; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]] ; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]] -; CHECK-NEXT: ], !prof !20 +; CHECK-NEXT: ], !prof !19 ; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: call void @foo() @@ -2489,14 +2490,14 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20 ; CHECK: bb0: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20 ; CHECK: bb2: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB3]] @@ -2550,4 +2551,3 @@ ; CHECK: !16 = !{!"branch_weights", i32 0, i32 1} ; CHECK: !17 = !{!"branch_weights", i32 1, i32 1} ; CHECK: !18 = !{!"branch_weights", i32 1, i32 0} -; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000} diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll --- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll +++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll @@ -636,16 +636,17 @@ ret i32 %outval } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_harmful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -668,16 +669,17 @@ ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_harmful_inverted( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1 +; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -700,7 +702,8 @@ ret void } -; The probability threshold is set by a builtin_expect setting. +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_not_that_harmful( @@ -708,7 +711,7 @@ ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -731,13 +734,16 @@ ret void } +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. + define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_not_that_harmful_inverted( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -760,13 +766,15 @@ ret void } +; The 1st cmp is probably true, so speculating the 2nd is probably a win. + define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_useful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -789,13 +797,15 @@ ret void } +; The 1st cmp is probably false, so speculating the 2nd is probably a win. + define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @or_icmps_useful_inverted( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22 +; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23 ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -849,16 +859,17 @@ ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_harmful( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_FALSE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -881,16 +892,17 @@ ret void } -; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata. +; Merging the icmps with logic-op defeats the purpose of the metadata. ; We can't tell which condition is expensive if they are combined. define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_harmful_inverted( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1 +; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1 +; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19 +; CHECK: rare: ; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_TRUE]], [[EXPENSIVE]] -; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23 +; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]] ; CHECK: false: ; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1 ; CHECK-NEXT: br label [[EXIT]] @@ -913,6 +925,9 @@ ret void } +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. + define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_not_that_harmful( ; CHECK-NEXT: entry: @@ -942,6 +957,9 @@ ret void } +; The probability threshold is determined by a TTI setting. +; In this example, we are just short of strongly expected, so speculate. + define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_not_that_harmful_inverted( ; CHECK-NEXT: entry: @@ -971,6 +989,8 @@ ret void } +; The 1st cmp is probably true, so speculating the 2nd is probably a win. + define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_useful( ; CHECK-NEXT: entry: @@ -1000,6 +1020,8 @@ ret void } +; The 1st cmp is probably false, so speculating the 2nd is probably a win. + define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @and_icmps_useful_inverted( ; CHECK-NEXT: entry: