Index: lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -116,8 +116,7 @@
   }
 }
 
-/// Here we have (select c, TI, FI), and we know that TI and FI
-/// have the same opcode and only one use each.  Try to simplify this.
+/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
 Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                           Instruction *FI) {
   // If this is a cast from the same type, merge.
@@ -129,10 +128,17 @@
     // The select condition may be a vector. We may only change the operand
     // type if the vector width remains the same (and matches the condition).
     Type *CondTy = SI.getCondition()->getType();
-    if (CondTy->isVectorTy() &&
-        (!FIOpndTy->isVectorTy() ||
-         CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
+    if (CondTy->isVectorTy()) {
+      if (!FIOpndTy->isVectorTy())
+        return nullptr;
+      if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())
+        return nullptr;
+    } else if (!TI->hasOneUse() || !FI->hasOneUse()) {
+      // TODO: The one-use restrictions for a scalar select could be eased if
+      // the fold of a select in visitLoadInst() was enhanced to match a pattern
+      // that includes a cast.
       return nullptr;
+    }
 
     // Fold this by inserting a select from the input values.
     Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
@@ -142,7 +148,7 @@
   }
 
   // Only handle binary operators here.
-  if (!isa<BinaryOperator>(TI))
+  if (!isa<BinaryOperator>(TI) || !TI->hasOneUse() || !FI->hasOneUse())
     return nullptr;
 
   // Figure out if the operations have any operands in common.
@@ -1056,14 +1062,12 @@
   if (Instruction *Add = foldAddSubSelect(SI, *Builder))
     return Add;
 
+  // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
   auto *TI = dyn_cast<Instruction>(TrueVal);
   auto *FI = dyn_cast<Instruction>(FalseVal);
-  if (TI && FI && TI->hasOneUse() && FI->hasOneUse()) {
-    // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
-    if (TI->getOpcode() == FI->getOpcode())
-      if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
-        return IV;
-  }
+  if (TI && FI && TI->getOpcode() == FI->getOpcode())
+    if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+      return IV;
 
   // See if we can fold the select into one of our operands.
   if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) {
Index: test/Transforms/InstCombine/select.ll
===================================================================
--- test/Transforms/InstCombine/select.ll
+++ test/Transforms/InstCombine/select.ll
@@ -723,6 +723,30 @@
   ret i48 %tmp2
 }
 
+; Allow select promotion even if there are multiple uses of casted ops.
+; Hoisting the selects allows later pattern matching to see that these are min/max ops.
+
+define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @min_max_bitcast(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> %a, %b
+; CHECK-NEXT:    [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT:    [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32>* %ptr1 to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32>* %ptr2 to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16
+; CHECK-NEXT:    ret void
+;
+  %cmp = fcmp olt <4 x float> %a, %b
+  %bc1 = bitcast <4 x float> %a to <4 x i32>
+  %bc2 = bitcast <4 x float> %b to <4 x i32>
+  %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+  %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+  store <4 x i32> %sel1, <4 x i32>* %ptr1
+  store <4 x i32> %sel2, <4 x i32>* %ptr2
+  ret void
+}
+
 ; PR8575
 
 define i32 @test52(i32 %n, i32 %m) nounwind {