Index: lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- lib/CodeGen/CodeGenPrepare.cpp
+++ lib/CodeGen/CodeGenPrepare.cpp
@@ -4475,6 +4475,14 @@
   if (!Cmp || !Cmp->hasOneUse())
     return false;
 
+  // If both operand of the select is expected to fold away in lowering,
+  // the mispredicted branch might be more painful.
+  auto *TI = dyn_cast<Instruction>(SI->getTrueValue());
+  auto *FI = dyn_cast<Instruction>(SI->getFalseValue());
+  if (TI && FI && TTI->getUserCost(TI) == TargetTransformInfo::TCC_Free &&
+      TTI->getUserCost(FI) == TargetTransformInfo::TCC_Free)
+    return false;
+
   Value *CmpOp0 = Cmp->getOperand(0);
   Value *CmpOp1 = Cmp->getOperand(1);
 
Index: test/CodeGen/AArch64/arm64-select.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/arm64-select.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=arm64 -mcpu=cortex-a57 < %s | FileCheck %s
+; We don't transform below case which has cheap operands of select.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+%class.A = type { i32, i32, i32, i32 }
+
+define i32 @test(%class.A* nocapture readonly %cla, float* nocapture readonly %b, i32 %c) #0 {
+entry:
+; CHECK-LABEL: test:
+; CHECK: csel
+  %call = tail call fast float @_Z6getvalv()
+  %0 = load float, float* %b, align 4, !tbaa !0
+  %cmp = fcmp fast olt float %call, %0
+  %a1 = getelementptr inbounds %class.A, %class.A* %cla, i64 0, i32 1
+  %a2 = getelementptr inbounds %class.A, %class.A* %cla, i64 0, i32 2
+  %cond.in = select i1 %cmp, i32* %a1, i32* %a2
+  %cond = load i32, i32* %cond.in, align 4, !tbaa !0
+  ret i32 %cond
+}
+
+declare float @_Z6getvalv() #0
+
+!0 = !{!1, !1, i64 0}
+!1 = distinct !{!"int", !1, i64 0}
\ No newline at end of file