Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -4475,6 +4475,14 @@ if (!Cmp || !Cmp->hasOneUse()) return false; + // If both operand of the select is expected to fold away in lowering, + // the mispredicted branch might be more painful. + auto *TI = dyn_cast(SI->getTrueValue()); + auto *FI = dyn_cast(SI->getFalseValue()); + if (TI && FI && TTI->getUserCost(TI) == TargetTransformInfo::TCC_Free && + TTI->getUserCost(FI) == TargetTransformInfo::TCC_Free) + return false; + Value *CmpOp0 = Cmp->getOperand(0); Value *CmpOp1 = Cmp->getOperand(1); Index: test/CodeGen/AArch64/arm64-select.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-select.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=arm64 -mcpu=cortex-a57 < %s | FileCheck %s +; We don't transform below case which has cheap operands of select. + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +%class.A = type { i32, i32, i32, i32 } + +define i32 @test(%class.A* nocapture readonly %cla, float* nocapture readonly %b, i32 %c) #0 { +entry: +; CHECK-LABEL: test: +; CHECK: csel + %call = tail call fast float @_Z6getvalv() + %0 = load float, float* %b, align 4, !tbaa !0 + %cmp = fcmp fast olt float %call, %0 + %a1 = getelementptr inbounds %class.A, %class.A* %cla, i64 0, i32 1 + %a2 = getelementptr inbounds %class.A, %class.A* %cla, i64 0, i32 2 + %cond.in = select i1 %cmp, i32* %a1, i32* %a2 + %cond = load i32, i32* %cond.in, align 4, !tbaa !0 + ret i32 %cond +} + +declare float @_Z6getvalv() #0 + +!0 = !{!1, !1, i64 0} +!1 = distinct !{!"int", !1, i64 0} \ No newline at end of file