Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -4475,17 +4475,6 @@ if (!Cmp || !Cmp->hasOneUse()) return false; - Value *CmpOp0 = Cmp->getOperand(0); - Value *CmpOp1 = Cmp->getOperand(1); - - // Emit "cmov on compare with a memory operand" as a branch to avoid stalls - // on a load from memory. But if the load is used more than once, do not - // change the select to a branch because the load is probably needed - // regardless of whether the branch is taken or not. - if ((isa(CmpOp0) && CmpOp0->hasOneUse()) || - (isa(CmpOp1) && CmpOp1->hasOneUse())) - return true; - // If either operand of the select is expensive and only needed on one side // of the select, we should form a branch. if (sinkSelectOperand(TTI, SI->getTrueValue()) || Index: test/CodeGen/AArch64/a57-csel.ll =================================================================== --- test/CodeGen/AArch64/a57-csel.ll +++ test/CodeGen/AArch64/a57-csel.ll @@ -1,8 +1,9 @@ ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mcpu=cortex-a57 -aarch64-enable-early-ifcvt=false | FileCheck %s -; Check that the select is expanded into a branch sequence. +; Check that the select isn't expanded into a branch sequence +; when the icmp's first operand %x0 is from load. define i64 @f(i64 %a, i64 %b, i64* %c, i64 %d, i64 %e) { - ; CHECK: cbz + ; CHECK: csel %x0 = load i64, i64* %c %x1 = icmp eq i64 %x0, 0 %x2 = select i1 %x1, i64 %a, i64 %b Index: test/CodeGen/AArch64/arm64-select.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-select.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=arm64 -mcpu=cortex-a57 < %s | FileCheck %s +; We don't transform below case which has cheap operands of select. + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +%class.A = type { i32, i32, i32, i32 } + +define i32 @test(%class.A* nocapture readonly %cla, float* nocapture readonly %b, i32 %c) #0 { +entry: +; CHECK-LABEL: test: +; CHECK: csel + %call = tail call fast float @_Z6getvalv() + %0 = load float, float* %b, align 4, !tbaa !0 + %cmp = fcmp fast olt float %call, %0 + %a1 = getelementptr inbounds %class.A, %class.A* %cla, i64 0, i32 1 + %a2 = getelementptr inbounds %class.A, %class.A* %cla, i64 0, i32 2 + %cond.in = select i1 %cmp, i32* %a1, i32* %a2 + %cond = load i32, i32* %cond.in, align 4, !tbaa !0 + ret i32 %cond +} + +declare float @_Z6getvalv() #0 + +!0 = !{!1, !1, i64 0} +!1 = distinct !{!"int", !1, i64 0} \ No newline at end of file Index: test/CodeGen/X86/cmov-into-branch.ll =================================================================== --- test/CodeGen/X86/cmov-into-branch.ll +++ test/CodeGen/X86/cmov-into-branch.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -; cmp with single-use load, should not form cmov. +; cmp with single-use load, should not form branch. define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) { %load = load double, double* %b, align 8 %cmp = fcmp olt double %load, %a @@ -8,9 +8,7 @@ ret i32 %cond ; CHECK-LABEL: test1: ; CHECK: ucomisd -; CHECK-NOT: cmov -; CHECK: j -; CHECK-NOT: cmov +; CHECK: cmovbel } ; Sanity check: no load. @@ -23,19 +21,6 @@ ; CHECK: cmov } -; Multiple uses of %a, should not form cmov. -define i32 @test3(i32 %a, i32* nocapture %b, i32 %x) { - %load = load i32, i32* %b, align 4 - %cmp = icmp ult i32 %load, %a - %cond = select i1 %cmp, i32 %a, i32 %x - ret i32 %cond -; CHECK-LABEL: test3: -; CHECK: cmpl -; CHECK-NOT: cmov -; CHECK: j -; CHECK-NOT: cmov -} - ; Multiple uses of the load. define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y) { %load = load i32, i32* %b, align 4 Index: test/Transforms/CodeGenPrepare/X86/select.ll =================================================================== --- test/Transforms/CodeGenPrepare/X86/select.ll +++ test/Transforms/CodeGenPrepare/X86/select.ll @@ -2,8 +2,7 @@ target triple = "x86_64-unknown-unknown" -; Nothing to sink here, but this gets converted to a branch to -; avoid stalling an out-of-order CPU on a predictable branch. +; Nothing to sink and convert here. define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) { entry: @@ -15,11 +14,7 @@ ; CHECK-LABEL: @no_sink( ; CHECK: %load = load double, double* %b, align 8 ; CHECK: %cmp = fcmp olt double %load, %a -; CHECK: br i1 %cmp, label %select.end, label %select.false -; CHECK: select.false: -; CHECK: br label %select.end -; CHECK: select.end: -; CHECK: %sel = phi i32 [ %x, %entry ], [ %y, %select.false ] +; CHECK: %sel = select i1 %cmp, i32 %x, i32 %y ; CHECK: ret i32 %sel }