Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -6294,6 +6294,30 @@ DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } +/// \brief Analyze profitability of direct move +/// prefer float load to int load plus direct move +/// when there is no integer use of int load +static bool directMoveIsProfitable(const SDValue &Op) { + SDNode *Origin = Op.getOperand(0).getNode(); + if (Origin->getOpcode() != ISD::LOAD) + return true; + + for (SDNode::use_iterator UI = Origin->use_begin(), + UE = Origin->use_end(); + UI != UE; ++UI) { + + // Only look at the users of the loaded value. + if (UI.getUse().get().getResNo() != 0) + continue; + + if (UI->getOpcode() != ISD::SINT_TO_FP && + UI->getOpcode() != ISD::UINT_TO_FP) + return true; + } + + return false; +} + /// \brief Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. @@ -6362,7 +6386,8 @@ // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. - if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT()) + if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && + Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && Index: test/CodeGen/PowerPC/direct-move-profit.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/direct-move-profit.ll @@ -0,0 +1,83 @@ +; RUN: llc -O2 -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; Function Attrs: norecurse nounwind +define void @test1(float* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readnone %c, i32 signext %n) #0 { + +; CHECK-LABEL: test1 + +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4, !tbaa !1 + %conv = sitofp i32 %0 to float + %mul = fmul float %conv, 0x4002916880000000 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom + store float %mul, float* %arrayidx2, align 4, !tbaa !5 + ret void + +; CHECK-NOT: mtvsrwa +; CHECK-NOT: mtfprwa +; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}} +; CHECK-NOT: mtvsrwa +; CHECK-NOT: mtfprwa +; CHECK: xscvsxdsp {{.*}}, [[REG]] +; CHECK-NOT: mtvsrwa +; CHECK-NOT: mtfprwa +; CHECK: blr + +} + +; Function Attrs: norecurse nounwind readonly +define float @test2(i32* nocapture readonly %b) #0 { + +; CHECK-LABEL: test2 + +entry: + %0 = load i32, i32* %b, align 4, !tbaa !1 + %conv = sitofp i32 %0 to float + %mul = fmul float %conv, 0x40030A3D80000000 + ret float %mul + +; CHECK-NOT: mtvsrwa +; CHECK-NOT: mtfprwa +; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}} +; CHECK-NOT: mtvsrwa +; CHECK-NOT: mtfprwa +; CHECK: xscvsxdsp {{.*}}, [[REG]] +; CHECK-NOT: mtvsrwa +; CHECK-NOT: mtfprwa +; CHECK: blr + +} + +; Function Attrs: norecurse nounwind +define void @test3(float* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32 signext %n) #0 { + +; CHECK-LABEL: test3 + +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4, !tbaa !1 + %conv = sitofp i32 %0 to float + %mul = fmul float %conv, 0x4002916880000000 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom + store float %mul, float* %arrayidx2, align 4, !tbaa !5 + %arrayidx6 = getelementptr inbounds i32, i32* %c, i64 %idxprom + %1 = load i32, i32* %arrayidx6, align 4, !tbaa !1 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %arrayidx6, align 4, !tbaa !1 + ret void + +; CHECK: mtvsrwa +; CHECK: blr + +} + +!0 = !{!"clang version 3.9.0 (http://llvm.org/git/clang.git b88a395e7ba26c0fb96cd99a2a004d76f4f41d0c) (http://llvm.org/git/llvm.git 1ac3fbac0f5b037c17c0b0f9d271c32c4d7ca1b5)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"float", !3, i64 0}