diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5006,6 +5006,29 @@ } break; + case ISD::ConstantFP: + if (Subtarget->isISA3_0() && Subtarget->isPPC64() && + !Subtarget->hasPrefixInstrs() && + (N->getValueType(0) == MVT::f64 || N->getValueType(0) == MVT::f32)) { + ConstantFPSDNode *CFP = cast(N); + bool isExact = false; + APSInt Val(5, false); + ConstantFP *LLVMC = const_cast(CFP->getConstantFPValue()); + if (LLVMC->getValueAPF().convertToInteger(Val, APFloat::rmTowardZero, + &isExact) == APFloat::opOK && + isExact == true && Val.getExtValue() != 0) { + SDNode *Op1 = CurDAG->getMachineNode(PPC::VSPLTISW, dl, MVT::v4i32, + getI32Imm(Val.getExtValue(), dl)); + SDNode *Op2 = CurDAG->getMachineNode(PPC::XVCVSXWDP, dl, MVT::v2f64, + SDValue(Op1, 0)); + CurDAG->SelectNodeTo(N, PPC::COPY_TO_REGCLASS, N->getValueType(0), + SDValue(Op2, 0), + getI32Imm(PPC::VSFRCRegClassID, dl)); + return; + } + } + break; + case ISD::INTRINSIC_VOID: { auto IntrinsicID = N->getConstantOperandVal(1); if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17641,6 +17641,20 @@ // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP. return true; } + if (Subtarget.isISA3_0() && Subtarget.isPPC64() && + !Subtarget.hasPrefixInstrs()) { + // Currently target P9 only + bool isExact = false; + APSInt Value(5, false); + if (Imm.convertToInteger(Value, APFloat::rmTowardZero, &isExact) == + APFloat::opOK && + isExact == true && Value.getExtValue() != 0) { + // we can materialize FP in range [-16.0, 15.0] excluding zeros that + // can be exactly converted to integer via VSPLTISW and XVCVSXWDP. + // 0.0 will select XXLXOR, -0.0 is not exact. + return true; + } + } LLVM_FALLTHROUGH; case MVT::ppcf128: return Imm.isPosZero(); diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -1252,10 +1252,12 @@ ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f0, f0 ; CHECK-NEXT: fcmpu cr0, f0, f1 -; CHECK-NEXT: beqlr cr0 +; CHECK-NEXT: beq cr0, .LBB20_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addis r3, r2, .LCPI20_0@toc@ha -; CHECK-NEXT: lfs f1, .LCPI20_0@toc@l(r3) +; CHECK-NEXT: vspltisw v2, 1 +; CHECK-NEXT: xvcvsxwdp vs1, vs34 +; CHECK-NEXT: .LBB20_2: +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NEXT: blr ; ; SOFT-LABEL: PR40273: diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -52,13 +52,13 @@ ; CHECK-P9-LABEL: foo_fmf: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xsrsqrtedp 0, 2 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: lfs 4, .LCPI0_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; CHECK-P9-NEXT: xsmuldp 3, 2, 0 +; CHECK-P9-NEXT: xvcvsxwdp 4, 34 ; CHECK-P9-NEXT: fmr 5, 4 ; CHECK-P9-NEXT: xsmaddadp 5, 3, 0 -; CHECK-P9-NEXT: lfs 3, .LCPI0_1@toc@l(3) +; CHECK-P9-NEXT: lfs 3, .LCPI0_0@toc@l(3) ; CHECK-P9-NEXT: xsmuldp 0, 0, 3 ; CHECK-P9-NEXT: xsmuldp 0, 0, 5 ; CHECK-P9-NEXT: xsmuldp 2, 2, 0 @@ -150,12 +150,12 @@ ; CHECK-P9-LABEL: foof_fmf: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xsrsqrtesp 0, 2 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI3_0@toc@ha -; CHECK-P9-NEXT: lfs 3, .LCPI3_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI3_1@toc@ha ; CHECK-P9-NEXT: xsmulsp 2, 2, 0 +; CHECK-P9-NEXT: xvcvsxwdp 3, 34 ; CHECK-P9-NEXT: xsmaddasp 3, 2, 0 -; CHECK-P9-NEXT: lfs 2, .LCPI3_1@toc@l(3) +; CHECK-P9-NEXT: lfs 2, .LCPI3_0@toc@l(3) ; CHECK-P9-NEXT: xsmulsp 0, 0, 2 ; CHECK-P9-NEXT: xsmulsp 0, 0, 3 ; CHECK-P9-NEXT: xsmuldp 1, 1, 0 @@ -233,13 +233,13 @@ ; CHECK-P9-LABEL: food_fmf: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xsrsqrtedp 0, 2 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI5_0@toc@ha -; CHECK-P9-NEXT: lfs 4, .LCPI5_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI5_1@toc@ha ; CHECK-P9-NEXT: xsmuldp 3, 2, 0 +; CHECK-P9-NEXT: xvcvsxwdp 4, 34 ; CHECK-P9-NEXT: fmr 5, 4 ; CHECK-P9-NEXT: xsmaddadp 5, 3, 0 -; CHECK-P9-NEXT: lfs 3, .LCPI5_1@toc@l(3) +; CHECK-P9-NEXT: lfs 3, .LCPI5_0@toc@l(3) ; CHECK-P9-NEXT: xsmuldp 0, 0, 3 ; CHECK-P9-NEXT: xsmuldp 0, 0, 5 ; CHECK-P9-NEXT: xsmuldp 2, 2, 0 @@ -314,12 +314,12 @@ ; CHECK-P9-LABEL: goo_fmf: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xsrsqrtesp 0, 2 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; CHECK-P9-NEXT: lfs 3, .LCPI7_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI7_1@toc@ha ; CHECK-P9-NEXT: xsmulsp 2, 2, 0 +; CHECK-P9-NEXT: xvcvsxwdp 3, 34 ; CHECK-P9-NEXT: xsmaddasp 3, 2, 0 -; CHECK-P9-NEXT: lfs 2, .LCPI7_1@toc@l(3) +; CHECK-P9-NEXT: lfs 2, .LCPI7_0@toc@l(3) ; CHECK-P9-NEXT: xsmulsp 0, 0, 2 ; CHECK-P9-NEXT: xsmulsp 0, 0, 3 ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 @@ -415,12 +415,12 @@ ; CHECK-P9-LABEL: rsqrt_fmul_fmf: ; CHECK-P9: # %bb.0: ; CHECK-P9-NEXT: xsrsqrtesp 0, 1 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; CHECK-P9-NEXT: lfs 4, .LCPI10_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI10_1@toc@ha ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 +; CHECK-P9-NEXT: xvcvsxwdp 4, 34 ; CHECK-P9-NEXT: xsmaddasp 4, 1, 0 -; CHECK-P9-NEXT: lfs 1, .LCPI10_1@toc@l(3) +; CHECK-P9-NEXT: lfs 1, .LCPI10_0@toc@l(3) ; CHECK-P9-NEXT: xsmulsp 0, 0, 1 ; CHECK-P9-NEXT: xsresp 1, 2 ; CHECK-P9-NEXT: xsmulsp 0, 0, 4 @@ -592,9 +592,9 @@ ; ; CHECK-P9-LABEL: foo2_fmf: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: addis 3, 2, .LCPI14_0@toc@ha +; CHECK-P9-NEXT: vspltisw 2, -1 ; CHECK-P9-NEXT: xsredp 3, 2 -; CHECK-P9-NEXT: lfs 0, .LCPI14_0@toc@l(3) +; CHECK-P9-NEXT: xvcvsxwdp 0, 34 ; CHECK-P9-NEXT: xsmaddadp 0, 2, 3 ; CHECK-P9-NEXT: xsnmsubadp 3, 3, 0 ; CHECK-P9-NEXT: xsmuldp 0, 1, 3 @@ -800,13 +800,13 @@ ; CHECK-P9-NEXT: bc 12, 2, .LBB20_2 ; CHECK-P9-NEXT: # %bb.1: ; CHECK-P9-NEXT: xsrsqrtedp 0, 1 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI20_0@toc@ha -; CHECK-P9-NEXT: lfs 3, .LCPI20_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI20_1@toc@ha ; CHECK-P9-NEXT: xsmuldp 2, 1, 0 +; CHECK-P9-NEXT: xvcvsxwdp 3, 34 ; CHECK-P9-NEXT: fmr 4, 3 ; CHECK-P9-NEXT: xsmaddadp 4, 2, 0 -; CHECK-P9-NEXT: lfs 2, .LCPI20_1@toc@l(3) +; CHECK-P9-NEXT: lfs 2, .LCPI20_0@toc@l(3) ; CHECK-P9-NEXT: xsmuldp 0, 0, 2 ; CHECK-P9-NEXT: xsmuldp 0, 0, 4 ; CHECK-P9-NEXT: xsmuldp 1, 1, 0 @@ -877,20 +877,20 @@ ; ; CHECK-P9-LABEL: foo3_fmf_crbits_off: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: addis 3, 2, .LCPI21_2@toc@ha +; CHECK-P9-NEXT: addis 3, 2, .LCPI21_1@toc@ha ; CHECK-P9-NEXT: xsabsdp 0, 1 -; CHECK-P9-NEXT: lfd 2, .LCPI21_2@toc@l(3) +; CHECK-P9-NEXT: lfd 2, .LCPI21_1@toc@l(3) ; CHECK-P9-NEXT: xscmpudp 0, 0, 2 ; CHECK-P9-NEXT: blt 0, .LBB21_2 ; CHECK-P9-NEXT: # %bb.1: ; CHECK-P9-NEXT: xsrsqrtedp 0, 1 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI21_0@toc@ha -; CHECK-P9-NEXT: lfs 3, .LCPI21_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI21_1@toc@ha ; CHECK-P9-NEXT: xsmuldp 2, 1, 0 +; CHECK-P9-NEXT: xvcvsxwdp 3, 34 ; CHECK-P9-NEXT: fmr 4, 3 ; CHECK-P9-NEXT: xsmaddadp 4, 2, 0 -; CHECK-P9-NEXT: lfs 2, .LCPI21_1@toc@l(3) +; CHECK-P9-NEXT: lfs 2, .LCPI21_0@toc@l(3) ; CHECK-P9-NEXT: xsmuldp 0, 0, 2 ; CHECK-P9-NEXT: xsmuldp 0, 0, 4 ; CHECK-P9-NEXT: xsmuldp 1, 1, 0 @@ -972,20 +972,20 @@ ; ; CHECK-P9-LABEL: goo3_fmf: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: addis 3, 2, .LCPI23_2@toc@ha +; CHECK-P9-NEXT: addis 3, 2, .LCPI23_1@toc@ha ; CHECK-P9-NEXT: xsabsdp 0, 1 -; CHECK-P9-NEXT: lfs 2, .LCPI23_2@toc@l(3) +; CHECK-P9-NEXT: lfs 2, .LCPI23_1@toc@l(3) ; CHECK-P9-NEXT: fcmpu 0, 0, 2 ; CHECK-P9-NEXT: xxlxor 0, 0, 0 ; CHECK-P9-NEXT: blt 0, .LBB23_2 ; CHECK-P9-NEXT: # %bb.1: ; CHECK-P9-NEXT: xsrsqrtesp 0, 1 +; CHECK-P9-NEXT: vspltisw 2, -3 ; CHECK-P9-NEXT: addis 3, 2, .LCPI23_0@toc@ha -; CHECK-P9-NEXT: lfs 2, .LCPI23_0@toc@l(3) -; CHECK-P9-NEXT: addis 3, 2, .LCPI23_1@toc@ha ; CHECK-P9-NEXT: xsmulsp 1, 1, 0 +; CHECK-P9-NEXT: xvcvsxwdp 2, 34 ; CHECK-P9-NEXT: xsmaddasp 2, 1, 0 -; CHECK-P9-NEXT: lfs 0, .LCPI23_1@toc@l(3) +; CHECK-P9-NEXT: lfs 0, .LCPI23_0@toc@l(3) ; CHECK-P9-NEXT: xsmulsp 0, 1, 0 ; CHECK-P9-NEXT: xsmulsp 0, 0, 2 ; CHECK-P9-NEXT: .LBB23_2: diff --git a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll --- a/llvm/test/CodeGen/PowerPC/scalar_cmp.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_cmp.ll @@ -903,9 +903,9 @@ ; ; FAST-P9-LABEL: onecmp1: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha -; FAST-P9-NEXT: lfs f0, .LCPI24_0@toc@l(r3) -; FAST-P9-NEXT: xssubdp f0, f1, f0 +; FAST-P9-NEXT: vspltisw v2, -1 +; FAST-P9-NEXT: xvcvsxwdp vs0, vs34 +; FAST-P9-NEXT: xsadddp f0, f1, f0 ; FAST-P9-NEXT: fsel f1, f0, f2, f3 ; FAST-P9-NEXT: blr ; @@ -924,14 +924,16 @@ ; ; NO-FAST-P9-LABEL: onecmp1: ; NO-FAST-P9: # %bb.0: # %entry -; NO-FAST-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha -; NO-FAST-P9-NEXT: lfs f0, .LCPI24_0@toc@l(r3) +; NO-FAST-P9-NEXT: vspltisw v2, 1 +; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P9-NEXT: fcmpu cr0, f1, f0 -; NO-FAST-P9-NEXT: cror 4*cr5+lt, lt, un -; NO-FAST-P9-NEXT: bc 12, 4*cr5+lt, .LBB24_2 +; NO-FAST-P9-NEXT: bc 12, lt, .LBB24_3 ; NO-FAST-P9-NEXT: # %bb.1: # %entry +; NO-FAST-P9-NEXT: fcmpu cr0, f1, f1 +; NO-FAST-P9-NEXT: bc 12, un, .LBB24_3 +; NO-FAST-P9-NEXT: # %bb.2: # %entry ; NO-FAST-P9-NEXT: fmr f3, f2 -; NO-FAST-P9-NEXT: .LBB24_2: # %entry +; NO-FAST-P9-NEXT: .LBB24_3: # %entry ; NO-FAST-P9-NEXT: fmr f1, f3 ; NO-FAST-P9-NEXT: blr entry: @@ -951,8 +953,8 @@ ; ; FAST-P9-LABEL: onecmp2: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: addis r3, r2, .LCPI25_0@toc@ha -; FAST-P9-NEXT: lfs f0, .LCPI25_0@toc@l(r3) +; FAST-P9-NEXT: vspltisw v2, 1 +; FAST-P9-NEXT: xvcvsxwdp vs0, vs34 ; FAST-P9-NEXT: xssubdp f0, f0, f1 ; FAST-P9-NEXT: fsel f1, f0, f3, f2 ; FAST-P9-NEXT: blr @@ -970,8 +972,8 @@ ; ; NO-FAST-P9-LABEL: onecmp2: ; NO-FAST-P9: # %bb.0: # %entry -; NO-FAST-P9-NEXT: addis r3, r2, .LCPI25_0@toc@ha -; NO-FAST-P9-NEXT: lfs f0, .LCPI25_0@toc@l(r3) +; NO-FAST-P9-NEXT: vspltisw v2, 1 +; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f0 ; NO-FAST-P9-NEXT: bgt cr0, .LBB25_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry @@ -998,9 +1000,9 @@ ; ; FAST-P9-LABEL: onecmp3: ; FAST-P9: # %bb.0: # %entry -; FAST-P9-NEXT: addis r3, r2, .LCPI26_0@toc@ha -; FAST-P9-NEXT: lfs f0, .LCPI26_0@toc@l(r3) -; FAST-P9-NEXT: xssubdp f0, f1, f0 +; FAST-P9-NEXT: vspltisw v2, -1 +; FAST-P9-NEXT: xvcvsxwdp vs0, vs34 +; FAST-P9-NEXT: xsadddp f0, f1, f0 ; FAST-P9-NEXT: fsel f1, f0, f2, f3 ; FAST-P9-NEXT: xsnegdp f0, f0 ; FAST-P9-NEXT: fsel f1, f0, f1, f3 @@ -1019,8 +1021,8 @@ ; ; NO-FAST-P9-LABEL: onecmp3: ; NO-FAST-P9: # %bb.0: # %entry -; NO-FAST-P9-NEXT: addis r3, r2, .LCPI26_0@toc@ha -; NO-FAST-P9-NEXT: lfs f0, .LCPI26_0@toc@l(r3) +; NO-FAST-P9-NEXT: vspltisw v2, 1 +; NO-FAST-P9-NEXT: xvcvsxwdp vs0, vs34 ; NO-FAST-P9-NEXT: xscmpudp cr0, f1, f0 ; NO-FAST-P9-NEXT: beq cr0, .LBB26_2 ; NO-FAST-P9-NEXT: # %bb.1: # %entry diff --git a/llvm/test/CodeGen/PowerPC/toc-float.ll b/llvm/test/CodeGen/PowerPC/toc-float.ll --- a/llvm/test/CodeGen/PowerPC/toc-float.ll +++ b/llvm/test/CodeGen/PowerPC/toc-float.ll @@ -7,8 +7,9 @@ define double @doubleConstant1() { ; CHECK-P9-LABEL: doubleConstant1: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: lfs 1, .LCPI0_0@toc@l(3) +; CHECK-P9-NEXT: vspltisw 2, 14 +; CHECK-P9-NEXT: xvcvsxwdp 1, 34 +; CHECK-P9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-LABEL: doubleConstant1: diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll @@ -181,18 +181,18 @@ ; CHECK-LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha ; CHECK-LE-NEXT: addi 3, 3, .LCPI9_0@toc@l ; CHECK-LE-NEXT: lxv 36, 0(3) -; CHECK-LE-NEXT: addis 3, 2, .LCPI9_1@toc@ha -; CHECK-LE-NEXT: lfs 0, .LCPI9_1@toc@l(3) ; CHECK-LE-NEXT: vperm 2, 3, 2, 4 +; CHECK-LE-NEXT: vspltisw 3, 1 +; CHECK-LE-NEXT: xvcvsxwdp 0, 35 ; CHECK-LE-NEXT: xsadddp 1, 34, 0 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test10: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI9_0@toc@ha ; CHECK-BE-NEXT: vmrghw 3, 3, 3 -; CHECK-BE-NEXT: lfs 0, .LCPI9_0@toc@l(3) ; CHECK-BE-NEXT: vmrglw 2, 3, 2 +; CHECK-BE-NEXT: vspltisw 3, 1 +; CHECK-BE-NEXT: xvcvsxwdp 0, 35 ; CHECK-BE-NEXT: xsadddp 1, 34, 0 ; CHECK-BE-NEXT: blr entry: