diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -143,6 +143,11 @@ "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); +static cl::opt + SinkCheapSelectOperand("enable-cgp-sink-cheap-select-operand", cl::Hidden, + cl::init(false), + cl::desc("Sink select's cheap operand")); + static cl::opt AddrSinkUsingGEPs( "addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs.")); @@ -6564,19 +6569,27 @@ /// Check if V (an operand of a select instruction) is an expensive instruction /// that is only used once. -static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { +static bool sinkSelectOperand(const TargetTransformInfo *TTI, + ArrayRef ASI, Value *V) { auto *I = dyn_cast(V); // If it's safe to speculatively execute, then it should not have side // effects; therefore, it's safe to sink and possibly *not* execute. - return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && - TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= - TargetTransformInfo::TCC_Expensive; + return I && + all_of(I->users(), + [&](User *user) { + return std::find(ASI.begin(), ASI.end(), + dyn_cast(user)) != ASI.end(); + }) && + isSafeToSpeculativelyExecute(I) && + (SinkCheapSelectOperand || + TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >= + TargetTransformInfo::TCC_Expensive); } /// Returns true if a SelectInst should be turned into an explicit branch. static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, - SelectInst *SI) { + ArrayRef ASI) { // If even a predictable select is cheap, then a branch can't be cheaper. if (!TLI->isPredictableSelectExpensive()) return false; @@ -6586,30 +6599,41 @@ // If metadata tells us that the select condition is obviously predictable, // then we want to replace the select with a branch. - uint64_t TrueWeight, FalseWeight; - if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { - uint64_t Max = std::max(TrueWeight, FalseWeight); - uint64_t Sum = TrueWeight + FalseWeight; - if (Sum != 0) { - auto Probability = BranchProbability::getBranchProbability(Max, Sum); - if (Probability > TTI->getPredictableBranchThreshold()) - return true; - } - } + if (any_of(ASI, [&](SelectInst *SI) { + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + if (Sum != 0) { + auto Probability = + BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TTI->getPredictableBranchThreshold()) + return true; + } + } + return false; + })) + return true; - CmpInst *Cmp = dyn_cast(SI->getCondition()); + CmpInst *Cmp = dyn_cast(ASI.back()->getCondition()); // If a branch is predictable, an out-of-order CPU can avoid blocking on its // comparison condition. If the compare has more than one use, there's // probably another cmov or setcc around, so it's not worth emitting a branch. - if (!Cmp || !Cmp->hasOneUse()) + if (!Cmp || !all_of(Cmp->uses(), [&](const Use &use) { + SelectInst *SI = dyn_cast(use.getUser()); + return SI && std::find(ASI.begin(), ASI.end(), SI) != ASI.end(); + })) { return false; + } // If either operand of the select is expensive and only needed on one side // of the select, we should form a branch. - if (sinkSelectOperand(TTI, SI->getTrueValue()) || - sinkSelectOperand(TTI, SI->getFalseValue())) - return true; + for (SelectInst *SI : ASI) { + if (sinkSelectOperand(TTI, ASI, SI->getTrueValue()) || + sinkSelectOperand(TTI, ASI, SI->getFalseValue())) + return true; + } return false; } @@ -6741,7 +6765,7 @@ SelectKind = TargetLowering::ScalarValSelect; if (TLI->isSelectSupported(SelectKind) && - (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize || + (!isFormingBranchFromSelectProfitable(TTI, TLI, ASI) || OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))) return false; @@ -6795,7 +6819,7 @@ // Sink expensive instructions into the conditional blocks to avoid executing // them speculatively. for (SelectInst *SI : ASI) { - if (sinkSelectOperand(TTI, SI->getTrueValue())) { + if (sinkSelectOperand(TTI, ASI, SI->getTrueValue())) { if (TrueBlock == nullptr) { TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", EndBlock->getParent(), EndBlock); @@ -6805,7 +6829,7 @@ auto *TrueInst = cast(SI->getTrueValue()); TrueInst->moveBefore(TrueBranch); } - if (sinkSelectOperand(TTI, SI->getFalseValue())) { + if (sinkSelectOperand(TTI, ASI, SI->getFalseValue())) { if (FalseBlock == nullptr) { FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", EndBlock->getParent(), EndBlock); diff --git a/llvm/test/CodeGen/PowerPC/cgp-select.ll b/llvm/test/CodeGen/PowerPC/cgp-select.ll --- a/llvm/test/CodeGen/PowerPC/cgp-select.ll +++ b/llvm/test/CodeGen/PowerPC/cgp-select.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -O3 -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -enable-cgp-sink-cheap-select-operand < %s | FileCheck %s define dso_local void @wibble(float* nocapture readonly %arg, i32 signext %arg1, i32* nocapture %arg2, float* nocapture %arg3) { ; CHECK-LABEL: wibble: @@ -17,18 +18,19 @@ ; CHECK-NEXT: li 4, 8 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_2: # %bb11 +; CHECK-NEXT: .LBB0_2: # %select.end ; CHECK-NEXT: # -; CHECK-NEXT: iselgt 7, 4, 7 ; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: bdz .LBB0_5 ; CHECK-NEXT: .LBB0_3: # %bb11 ; CHECK-NEXT: # ; CHECK-NEXT: lfsu 1, 4(3) ; CHECK-NEXT: fcmpu 0, 1, 0 -; CHECK-NEXT: ble 0, .LBB0_2 -; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: bc 4, 1, .LBB0_2 +; CHECK-NEXT: # %bb.4: # %select.true.sink +; CHECK-NEXT: # ; CHECK-NEXT: xsaddsp 0, 1, 1 +; CHECK-NEXT: mr 7, 4 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_5: # %bb8 ; CHECK-NEXT: stw 7, 0(5)