diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -143,6 +143,12 @@
   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
   cl::desc("Disable select to branch conversion."));
 
+static cl::opt<float> SinkSelectOperandRatioAgainstMisprediction(
+    "cgp-sink-select-operand-ratio-against-misprediction", cl::Hidden,
+    cl::init(0),
+    cl::desc("If (select operand cycles) * ratio > (branch misprection "
+             "penalty), sink the operand."));
+
 static cl::opt<bool> AddrSinkUsingGEPs(
   "addr-sink-using-gep", cl::Hidden, cl::init(true),
   cl::desc("Address sinking in CGP using GEPs."));
@@ -6563,20 +6569,51 @@
 }
 
 /// Check if V (an operand of a select instruction) is an expensive instruction
-/// that is only used once.
-static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+/// that is only used by SelectInst consistently in ASI.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI,
+                              const TargetSubtargetInfo *SubtargetInfo,
+                              ArrayRef<SelectInst *> ASI, Value *V) {
   auto *I = dyn_cast<Instruction>(V);
+  if (!I || I->user_empty())
+    return false;
+  auto IsUseConsistent = [&]() {
+    bool HasTrueOperandUse = false, HasFalseOpeandUse = false;
+    for (SelectInst *SI : ASI) {
+      if (V == SI->getTrueValue())
+        HasTrueOperandUse = true;
+      else if (V == SI->getFalseValue())
+        HasFalseOpeandUse = true;
+    }
+    return !(HasTrueOperandUse && HasFalseOpeandUse);
+  };
+  const MCSchedModel &SM = SubtargetInfo->getSchedModel();
+  // FIXME: InstructionCost is not measured at cycle dimension, but
+  // MispredictPenalty is, so we can't compare getUserCost and MispredictPenalty
+  // directly.
+  static const unsigned CheapInstCycles = 1;
   // If it's safe to speculatively execute, then it should not have side
   // effects; therefore, it's safe to sink and possibly *not* execute.
-  return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
-         TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >=
-         TargetTransformInfo::TCC_Expensive;
+  return all_of(I->users(),
+                [&](User *user) {
+                  // Check if all users of V in ASI consistently use V as true
+                  // or false operand.
+                  SelectInst *U = dyn_cast<SelectInst>(user);
+                  return U &&
+                         std::find(ASI.begin(), ASI.end(), U) != ASI.end() &&
+                         IsUseConsistent();
+                }) &&
+         isSafeToSpeculativelyExecute(I) &&
+         (TTI->getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency) >=
+              TargetTransformInfo::TCC_Expensive ||
+          (CheapInstCycles * SinkSelectOperandRatioAgainstMisprediction >
+           SM.MispredictPenalty));
 }
 
 /// Returns true if a SelectInst should be turned into an explicit branch.
 static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
                                                 const TargetLowering *TLI,
-                                                SelectInst *SI) {
+                                                const TargetSubtargetInfo *SubtargetInfo,
+                                                ArrayRef<SelectInst *> ASI) {
   // If even a predictable select is cheap, then a branch can't be cheaper.
   if (!TLI->isPredictableSelectExpensive())
     return false;
@@ -6586,6 +6623,8 @@
 
   // If metadata tells us that the select condition is obviously predictable,
   // then we want to replace the select with a branch.
+  assert(!ASI.empty() && "ASI should have at least one SelectInst.");
+  SelectInst *SI = ASI.back();
   uint64_t TrueWeight, FalseWeight;
   if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
     uint64_t Max = std::max(TrueWeight, FalseWeight);
@@ -6600,16 +6639,22 @@
   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
 
   // If a branch is predictable, an out-of-order CPU can avoid blocking on its
-  // comparison condition. If the compare has more than one use, there's
-  // probably another cmov or setcc around, so it's not worth emitting a branch.
-  if (!Cmp || !Cmp->hasOneUse())
+  // comparison condition. If the compare's uses are all selects in the same
+  // basic block, we try to form branch since select is considered expensive at
+  // this point.
+  if (!Cmp || !all_of(Cmp->uses(), [&](const Use &use) {
+        SelectInst *SI = dyn_cast<SelectInst>(use.getUser());
+        return SI && std::find(ASI.begin(), ASI.end(), SI) != ASI.end();
+      }))
     return false;
 
   // If either operand of the select is expensive and only needed on one side
   // of the select, we should form a branch.
-  if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
-      sinkSelectOperand(TTI, SI->getFalseValue()))
-    return true;
+  for (SelectInst *SI : ASI) {
+    if (sinkSelectOperand(TTI, SubtargetInfo, ASI, SI->getTrueValue()) ||
+        sinkSelectOperand(TTI, SubtargetInfo, ASI, SI->getFalseValue()))
+      return true;
+  }
 
   return false;
 }
@@ -6741,8 +6786,8 @@
     SelectKind = TargetLowering::ScalarValSelect;
 
   if (TLI->isSelectSupported(SelectKind) &&
-      (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
-       llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
+      (!isFormingBranchFromSelectProfitable(TTI, TLI, SubtargetInfo, ASI) ||
+       OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
     return false;
 
   // The DominatorTree needs to be rebuilt by any consumers after this
@@ -6795,7 +6840,7 @@
   // Sink expensive instructions into the conditional blocks to avoid executing
   // them speculatively.
   for (SelectInst *SI : ASI) {
-    if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+    if (sinkSelectOperand(TTI, SubtargetInfo, ASI, SI->getTrueValue())) {
       if (TrueBlock == nullptr) {
         TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
                                        EndBlock->getParent(), EndBlock);
@@ -6805,7 +6850,7 @@
       auto *TrueInst = cast<Instruction>(SI->getTrueValue());
       TrueInst->moveBefore(TrueBranch);
     }
-    if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+    if (sinkSelectOperand(TTI, SubtargetInfo, ASI, SI->getFalseValue())) {
       if (FalseBlock == nullptr) {
         FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
                                         EndBlock->getParent(), EndBlock);
diff --git a/llvm/test/CodeGen/PowerPC/cgp-select.ll b/llvm/test/CodeGen/PowerPC/cgp-select.ll
--- a/llvm/test/CodeGen/PowerPC/cgp-select.ll
+++ b/llvm/test/CodeGen/PowerPC/cgp-select.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -O3 -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -O3 -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -cgp-sink-select-operand-ratio-against-misprediction=20 < %s | FileCheck --check-prefix=CHECK-SINK %s
 
 define dso_local void @wibble(float* nocapture readonly %arg, i32 signext %arg1, i32* nocapture %arg2, float* nocapture %arg3) {
 ; CHECK-LABEL: wibble:
@@ -34,6 +36,40 @@
 ; CHECK-NEXT:    stw 7, 0(5)
 ; CHECK-NEXT:    stfs 0, 0(6)
 ; CHECK-NEXT:    blr
+;
+; CHECK-SINK-LABEL: wibble:
+; CHECK-SINK:       # %bb.0: # %bb
+; CHECK-SINK-NEXT:    lfs 0, 0(3)
+; CHECK-SINK-NEXT:    li 7, 7
+; CHECK-SINK-NEXT:    cmpwi 4, 2
+; CHECK-SINK-NEXT:    xsaddsp 0, 0, 0
+; CHECK-SINK-NEXT:    blt 0, .LBB0_5
+; CHECK-SINK-NEXT:  # %bb.1: # %bb6
+; CHECK-SINK-NEXT:    clrldi 4, 4, 32
+; CHECK-SINK-NEXT:    li 7, 7
+; CHECK-SINK-NEXT:    addi 4, 4, -1
+; CHECK-SINK-NEXT:    mtctr 4
+; CHECK-SINK-NEXT:    li 4, 8
+; CHECK-SINK-NEXT:    b .LBB0_3
+; CHECK-SINK-NEXT:    .p2align 5
+; CHECK-SINK-NEXT:  .LBB0_2: # %select.end
+; CHECK-SINK-NEXT:    #
+; CHECK-SINK-NEXT:    addi 4, 4, 1
+; CHECK-SINK-NEXT:    bdz .LBB0_5
+; CHECK-SINK-NEXT:  .LBB0_3: # %bb11
+; CHECK-SINK-NEXT:    #
+; CHECK-SINK-NEXT:    lfsu 1, 4(3)
+; CHECK-SINK-NEXT:    fcmpu 0, 1, 0
+; CHECK-SINK-NEXT:    bc 4, 1, .LBB0_2
+; CHECK-SINK-NEXT:  # %bb.4: # %select.true.sink
+; CHECK-SINK-NEXT:    #
+; CHECK-SINK-NEXT:    xsaddsp 0, 1, 1
+; CHECK-SINK-NEXT:    mr 7, 4
+; CHECK-SINK-NEXT:    b .LBB0_2
+; CHECK-SINK-NEXT:  .LBB0_5: # %bb8
+; CHECK-SINK-NEXT:    stw 7, 0(5)
+; CHECK-SINK-NEXT:    stfs 0, 0(6)
+; CHECK-SINK-NEXT:    blr
 bb:
   %tmp = load float, float* %arg, align 4
   %tmp4 = fmul float %tmp, 2.000000e+00
@@ -67,3 +103,114 @@
   %tmp24 = icmp eq i64 %tmp23, %tmp7
   br i1 %tmp24, label %bb8, label %bb11
 }
+
+define dso_local void @foo(float* nocapture readonly %arg, i32 signext %arg1, i32* nocapture %arg2, float* nocapture %arg3) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    lfs 0, 0(3)
+; CHECK-NEXT:    li 9, 7
+; CHECK-NEXT:    cmpwi 4, 2
+; CHECK-NEXT:    xsaddsp 0, 0, 0
+; CHECK-NEXT:    blt 0, .LBB1_5
+; CHECK-NEXT:  # %bb.1: # %bb6
+; CHECK-NEXT:    clrldi 4, 4, 32
+; CHECK-NEXT:    li 8, 7
+; CHECK-NEXT:    li 7, 1
+; CHECK-NEXT:    b .LBB1_3
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB1_2: # %bb11
+; CHECK-NEXT:    #
+; CHECK-NEXT:    addi 10, 7, 7
+; CHECK-NEXT:    addi 7, 7, 1
+; CHECK-NEXT:    iselgt 9, 10, 8
+; CHECK-NEXT:    iselgt 8, 8, 10
+; CHECK-NEXT:    clrldi 8, 8, 32
+; CHECK-NEXT:    sub 8, 4, 8
+; CHECK-NEXT:    cmpld 8, 7
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    beq 0, .LBB1_5
+; CHECK-NEXT:  .LBB1_3: # %bb11
+; CHECK-NEXT:    #
+; CHECK-NEXT:    lfsu 1, 4(3)
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    ble 0, .LBB1_2
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:    xsaddsp 0, 1, 1
+; CHECK-NEXT:    b .LBB1_2
+; CHECK-NEXT:  .LBB1_5: # %bb8
+; CHECK-NEXT:    stw 9, 0(5)
+; CHECK-NEXT:    stfs 0, 0(6)
+; CHECK-NEXT:    blr
+;
+; CHECK-SINK-LABEL: foo:
+; CHECK-SINK:       # %bb.0: # %bb
+; CHECK-SINK-NEXT:    lfs 0, 0(3)
+; CHECK-SINK-NEXT:    li 9, 7
+; CHECK-SINK-NEXT:    cmpwi 4, 2
+; CHECK-SINK-NEXT:    xsaddsp 0, 0, 0
+; CHECK-SINK-NEXT:    blt 0, .LBB1_5
+; CHECK-SINK-NEXT:  # %bb.1: # %bb6
+; CHECK-SINK-NEXT:    clrldi 4, 4, 32
+; CHECK-SINK-NEXT:    li 8, 7
+; CHECK-SINK-NEXT:    li 7, 1
+; CHECK-SINK-NEXT:    b .LBB1_3
+; CHECK-SINK-NEXT:    .p2align 4
+; CHECK-SINK-NEXT:  .LBB1_2: # %bb11
+; CHECK-SINK-NEXT:    #
+; CHECK-SINK-NEXT:    addi 10, 7, 7
+; CHECK-SINK-NEXT:    addi 7, 7, 1
+; CHECK-SINK-NEXT:    iselgt 9, 10, 8
+; CHECK-SINK-NEXT:    iselgt 8, 8, 10
+; CHECK-SINK-NEXT:    clrldi 8, 8, 32
+; CHECK-SINK-NEXT:    sub 8, 4, 8
+; CHECK-SINK-NEXT:    cmpld 8, 7
+; CHECK-SINK-NEXT:    mr 8, 9
+; CHECK-SINK-NEXT:    beq 0, .LBB1_5
+; CHECK-SINK-NEXT:  .LBB1_3: # %bb11
+; CHECK-SINK-NEXT:    #
+; CHECK-SINK-NEXT:    lfsu 1, 4(3)
+; CHECK-SINK-NEXT:    fcmpu 0, 1, 0
+; CHECK-SINK-NEXT:    ble 0, .LBB1_2
+; CHECK-SINK-NEXT:  # %bb.4:
+; CHECK-SINK-NEXT:    xsaddsp 0, 1, 1
+; CHECK-SINK-NEXT:    b .LBB1_2
+; CHECK-SINK-NEXT:  .LBB1_5: # %bb8
+; CHECK-SINK-NEXT:    stw 9, 0(5)
+; CHECK-SINK-NEXT:    stfs 0, 0(6)
+; CHECK-SINK-NEXT:    blr
+bb:
+  %tmp = load float, float* %arg, align 4
+  %tmp4 = fmul float %tmp, 2.000000e+00
+  %tmp5 = icmp sgt i32 %arg1, 1
+  br i1 %tmp5, label %bb6, label %bb8
+
+bb6:                                              ; preds = %bb
+  %tmp7 = zext i32 %arg1 to i64
+  br label %bb11
+
+bb8:                                              ; preds = %bb11, %bb
+  %tmp9 = phi float [ %tmp4, %bb ], [ %tmp19, %bb11 ]
+  %tmp10 = phi i32 [ 7, %bb ], [ %tmp22, %bb11 ]
+  store i32 %tmp10, i32* %arg2, align 4
+  store float %tmp9, float* %arg3, align 4
+  ret void
+
+bb11:                                             ; preds = %bb11, %bb6
+  %tmp12 = phi i64 [ 1, %bb6 ], [ %tmp23, %bb11 ]
+  %tmp13 = phi i32 [ 7, %bb6 ], [ %tmp22, %bb11 ]
+  %tmp14 = phi float [ %tmp4, %bb6 ], [ %tmp19, %bb11 ]
+  %tmp15 = getelementptr inbounds float, float* %arg, i64 %tmp12
+  %tmp16 = load float, float* %tmp15, align 4
+  %tmp17 = fcmp ogt float %tmp16, %tmp14
+  %tmp18 = fmul float %tmp16, 2.000000e+00
+  %tmp19 = select i1 %tmp17, float %tmp18, float %tmp14
+  %tmp20 = trunc i64 %tmp12 to i32
+  %tmp21 = add i32 %tmp20, 7
+  %tmp22 = select i1 %tmp17, i32 %tmp21, i32 %tmp13
+  %tmp23 = add nuw nsw i64 %tmp12, 1
+  %tmp24 = select i1 %tmp17, i32 %tmp13, i32 %tmp21
+  %tmp24.ext = zext i32 %tmp24 to i64
+  %tmp25 = add nuw nsw i64 %tmp23, %tmp24.ext
+  %tmp26 = icmp eq i64 %tmp25, %tmp7
+  br i1 %tmp26, label %bb8, label %bb11
+}