diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -453,6 +453,9 @@ STRICT_FCFIDS, STRICT_FCFIDUS, + /// Constrained floating point add in round-to-zero mode. + STRICT_FADDRTZ, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -262,6 +262,8 @@ // PPC (the libcall is not available). setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); @@ -1505,6 +1507,8 @@ return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::STRICT_FADDRTZ: + return "PPCISD::STRICT_FADDRTZ"; case PPCISD::STRICT_FCTIDZ: return "PPCISD::STRICT_FCTIDZ"; case PPCISD::STRICT_FCTIWZ: @@ -8164,38 +8168,86 @@ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || Op.getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Src = Op.getOperand(IsStrict ? 1 : 0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + // FP to INT conversions are legal for f128. - if (Src.getValueType() == MVT::f128) + if (SrcVT == MVT::f128) return Op; // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). - if (Src.getValueType() == MVT::ppcf128 && !IsStrict) { - if (Op.getValueType() == MVT::i32) { + if (SrcVT == MVT::ppcf128) { + if (DstVT == MVT::i32) { + // TODO: Conservatively pass only nofpexcept flag here. Need to check and + // set other fast-math flags to FP operations in both strict and + // non-strict cases. (FP_TO_SINT, FSUB) + SDNodeFlags Flags; + Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); + if (IsSigned) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(1, dl)); - // Add the two halves of the long double in round-to-zero mode. - SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); - - // Now use a smaller FP_TO_SINT. - return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + // Add the two halves of the long double in round-to-zero mode, and use + // a smaller FP_TO_SINT. + if (IsStrict) { + SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl, + DAG.getVTList(MVT::f64, MVT::Other), + {Op.getOperand(0), Lo, Hi}, Flags); + return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(MVT::i32, MVT::Other), + {Res.getValue(1), Res}, Flags); + } else { + SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } } else { const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); - SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); - // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X - // FIXME: generated code sucks. - // TODO: Are there fast-math-flags to propagate to this FSUB? - SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp); - True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); - True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, - DAG.getConstant(0x80000000, dl, MVT::i32)); - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); - return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE); + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); + SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT); + if (IsStrict) { + // Sel = Src < 0x80000000 + // FltOfs = select Sel, 0.0, 0x80000000 + // IntOfs = select Sel, 0, 0x80000000 + // Result = fp_to_sint(Src - FltOfs) ^ IntOfs + SDValue Chain = Op.getOperand(0); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); + EVT DstSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); + SDValue Sel = + DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true); + Chain = Sel.getValue(1); + + SDValue FltOfs = DAG.getSelect( + dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst); + Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); + + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, + DAG.getVTList(SrcVT, MVT::Other), + {Chain, Src, FltOfs}, Flags); + Chain = Val.getValue(1); + SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + DAG.getVTList(DstVT, MVT::Other), + {Chain, Val}, Flags); + Chain = SInt.getValue(1); + SDValue IntOfs = DAG.getSelect( + dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask); + SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); + return DAG.getMergeValues({Result, Chain}, dl); + } else { + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst); + True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); + True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask); + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); + return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE); + } } } @@ -12170,7 +12222,11 @@ .addReg(PPC::RM, RegState::ImplicitDefine); // Perform addition. - BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); + auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest) + .addReg(Src1) + .addReg(Src2); + if (MI.getFlag(MachineInstr::NoFPExcept)) + MIB.setMIFlag(MachineInstr::NoFPExcept); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -182,7 +182,12 @@ // Perform FADD in round-to-zero mode. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; +def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, + [SDNPHasChain]>; +def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs), + [(PPCfaddrtz node:$lhs, node:$rhs), + (PPCstrict_faddrtz node:$lhs, node:$rhs)]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -2960,9 +2965,9 @@ let Predicates = [HasFPU] in { // Custom inserter instruction to perform FADD in round-to-zero mode. -let Uses = [RM] in { +let Uses = [RM], mayRaiseFPException = 1 in { def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", - [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; + [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>; } // The above pseudo gets expanded to make use of the following instructions diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -403,47 +403,39 @@ define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { ; P8-LABEL: ppcq_to_i32: ; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __gcc_qtou -; P8-NEXT: nop +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 ; P8-NEXT: extsw r3, r3 -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: ppcq_to_i32: ; P9: # %bb.0: # %entry -; P9-NEXT: mflr r0 -; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __gcc_qtou -; P9-NEXT: nop +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 ; P9-NEXT: extsw r3, r3 -; P9-NEXT: addi r1, r1, 32 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_i32: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 -; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __gcc_qtou -; NOVSX-NEXT: nop -; NOVSX-NEXT: extsw r3, r3 -; NOVSX-NEXT: addi r1, r1, 32 -; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) ; NOVSX-NEXT: blr entry: %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 @@ -549,12 +541,40 @@ ; P8: # %bb.0: # %entry ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: stdu r1, -128(r1) +; P8-NEXT: .cfi_def_cfa_offset 128 ; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __fixunstfsi +; P8-NEXT: .cfi_offset r30, -16 +; P8-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P8-NEXT: xxlxor f3, f3, f3 +; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; P8-NEXT: fcmpo cr0, f2, f3 +; P8-NEXT: lis r3, -32768 +; P8-NEXT: xxlxor f3, f3, f3 +; P8-NEXT: fcmpo cr1, f1, f0 +; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt +; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq +; P8-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; P8-NEXT: isel r30, 0, r3, 4*cr5+lt +; P8-NEXT: bc 12, 4*cr5+lt, .LBB11_2 +; P8-NEXT: # %bb.1: # %entry +; P8-NEXT: fmr f3, f0 +; P8-NEXT: .LBB11_2: # %entry +; P8-NEXT: xxlxor f4, f4, f4 +; P8-NEXT: bl __gcc_qsub ; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 +; P8-NEXT: xor r3, r3, r30 +; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8-NEXT: clrldi r3, r3, 32 +; P8-NEXT: addi r1, r1, 128 ; P8-NEXT: ld r0, 16(r1) ; P8-NEXT: mtlr r0 ; P8-NEXT: blr @@ -562,28 +582,88 @@ ; P9-LABEL: ppcq_to_u32: ; P9: # %bb.0: # %entry ; P9-NEXT: mflr r0 -; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_def_cfa_offset 48 ; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __fixunstfsi +; P9-NEXT: .cfi_offset r30, -16 +; P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -48(r1) +; P9-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P9-NEXT: xxlxor f3, f3, f3 +; P9-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; P9-NEXT: fcmpo cr1, f2, f3 +; P9-NEXT: lis r3, -32768 +; P9-NEXT: fcmpo cr0, f1, f0 +; P9-NEXT: xxlxor f3, f3, f3 +; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt +; P9-NEXT: crandc 4*cr5+gt, lt, eq +; P9-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; P9-NEXT: isel r30, 0, r3, 4*cr5+lt +; P9-NEXT: bc 12, 4*cr5+lt, .LBB11_2 +; P9-NEXT: # %bb.1: # %entry +; P9-NEXT: fmr f3, f0 +; P9-NEXT: .LBB11_2: # %entry +; P9-NEXT: xxlxor f4, f4, f4 +; P9-NEXT: bl __gcc_qsub ; P9-NEXT: nop -; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 +; P9-NEXT: xor r3, r3, r30 +; P9-NEXT: clrldi r3, r3, 32 +; P9-NEXT: addi r1, r1, 48 ; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P9-NEXT: mtlr r0 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_u32: ; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mfocrf r12, 32 ; NOVSX-NEXT: mflr r0 ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: stw r12, 8(r1) +; NOVSX-NEXT: stdu r1, -48(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 48 ; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: .cfi_offset cr2, 8 +; NOVSX-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; NOVSX-NEXT: addis r4, r2, .LCPI11_1@toc@ha +; NOVSX-NEXT: lfs f0, .LCPI11_0@toc@l(r3) +; NOVSX-NEXT: lfs f4, .LCPI11_1@toc@l(r4) +; NOVSX-NEXT: fcmpo cr0, f1, f0 +; NOVSX-NEXT: fcmpo cr1, f2, f4 +; NOVSX-NEXT: fmr f3, f4 +; NOVSX-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt +; NOVSX-NEXT: crandc 4*cr5+gt, lt, eq +; NOVSX-NEXT: cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt +; NOVSX-NEXT: bc 12, 4*cr2+lt, .LBB11_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr f3, f0 +; NOVSX-NEXT: .LBB11_2: # %entry +; NOVSX-NEXT: bl __gcc_qsub ; NOVSX-NEXT: nop -; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, 44 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lis r3, -32768 +; NOVSX-NEXT: lwz r4, 44(r1) +; NOVSX-NEXT: isel r3, 0, r3, 4*cr2+lt +; NOVSX-NEXT: xor r3, r4, r3 +; NOVSX-NEXT: clrldi r3, r3, 32 +; NOVSX-NEXT: addi r1, r1, 48 ; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: lwz r12, 8(r1) +; NOVSX-NEXT: mtocrf 32, r12 ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr entry: @@ -747,12 +827,17 @@ ret fp128 %conv } -define void @fptoint_nofpexcept(fp128 %m, i32* %addr1, i64* %addr2) { +define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2) { ; MIR-LABEL: name: fptoint_nofpexcept ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSWZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUWZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSDZ ; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUDZ +; +; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD +; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS +; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD +; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS entry: %conv1 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 store volatile i32 %conv1, i32* %addr1, align 4 @@ -762,6 +847,11 @@ store volatile i64 %conv3, i64* %addr2, align 8 %conv4 = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0 store volatile i64 %conv4, i64* %addr2, align 8 + + %conv5 = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0 + store volatile i32 %conv5, i32* %addr1, align 4 + %conv6 = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0 + store volatile i32 %conv6, i32* %addr1, align 4 ret void } diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1202,38 +1202,36 @@ define i32 @test_fptosi_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __gcc_qtou -; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __gcc_qtou -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: blr ; ; PC64-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64: # %bb.0: # %entry -; PC64-NEXT: mflr 0 -; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __gcc_qtou -; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 -; PC64-NEXT: ld 0, 16(1) -; PC64-NEXT: mtlr 0 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, -8(1) +; PC64-NEXT: lwz 3, -4(1) ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128( @@ -1289,24 +1287,76 @@ ; PC64LE-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 +; PC64LE-NEXT: std 30, -16(1) # 8-byte Folded Spill ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __fixunstfsi +; PC64LE-NEXT: stdu 1, -48(1) +; PC64LE-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE-NEXT: xxlxor 3, 3, 3 +; PC64LE-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64LE-NEXT: fcmpo 0, 2, 3 +; PC64LE-NEXT: lis 3, -32768 +; PC64LE-NEXT: xxlxor 3, 3, 3 +; PC64LE-NEXT: fcmpo 1, 1, 0 +; PC64LE-NEXT: crand 20, 6, 0 +; PC64LE-NEXT: crandc 21, 4, 6 +; PC64LE-NEXT: cror 20, 21, 20 +; PC64LE-NEXT: isel 30, 0, 3, 20 +; PC64LE-NEXT: bc 12, 20, .LBB31_2 +; PC64LE-NEXT: # %bb.1: # %entry +; PC64LE-NEXT: fmr 3, 0 +; PC64LE-NEXT: .LBB31_2: # %entry +; PC64LE-NEXT: xxlxor 4, 4, 4 +; PC64LE-NEXT: bl __gcc_qsub ; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 +; PC64LE-NEXT: xor 3, 3, 30 +; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: std 30, -16(1) # 8-byte Folded Spill ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __fixunstfsi +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE9-NEXT: xxlxor 3, 3, 3 +; PC64LE9-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64LE9-NEXT: fcmpo 1, 2, 3 +; PC64LE9-NEXT: lis 3, -32768 +; PC64LE9-NEXT: fcmpo 0, 1, 0 +; PC64LE9-NEXT: xxlxor 3, 3, 3 +; PC64LE9-NEXT: crand 20, 2, 4 +; PC64LE9-NEXT: crandc 21, 0, 2 +; PC64LE9-NEXT: cror 20, 21, 20 +; PC64LE9-NEXT: isel 30, 0, 3, 20 +; PC64LE9-NEXT: bc 12, 20, .LBB31_2 +; PC64LE9-NEXT: # %bb.1: # %entry +; PC64LE9-NEXT: fmr 3, 0 +; PC64LE9-NEXT: .LBB31_2: # %entry +; PC64LE9-NEXT: xxlxor 4, 4, 4 +; PC64LE9-NEXT: bl __gcc_qsub ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 +; PC64LE9-NEXT: xor 3, 3, 30 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr ; @@ -1314,12 +1364,45 @@ ; PC64: # %bb.0: # %entry ; PC64-NEXT: mflr 0 ; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __fixunstfsi +; PC64-NEXT: mfcr 12 +; PC64-NEXT: stw 12, 8(1) +; PC64-NEXT: stdu 1, -128(1) +; PC64-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64-NEXT: lfs 0, .LCPI31_0@toc@l(3) +; PC64-NEXT: addis 3, 2, .LCPI31_1@toc@ha +; PC64-NEXT: lfs 4, .LCPI31_1@toc@l(3) +; PC64-NEXT: fcmpo 0, 1, 0 +; PC64-NEXT: crandc 21, 0, 2 +; PC64-NEXT: fcmpo 1, 2, 4 +; PC64-NEXT: crand 20, 2, 4 +; PC64-NEXT: cror 8, 21, 20 +; PC64-NEXT: fmr 3, 4 +; PC64-NEXT: bc 12, 8, .LBB31_2 +; PC64-NEXT: # %bb.1: # %entry +; PC64-NEXT: fmr 3, 0 +; PC64-NEXT: .LBB31_2: # %entry +; PC64-NEXT: bl __gcc_qsub ; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: lis 4, -32768 +; PC64-NEXT: bc 12, 8, .LBB31_3 +; PC64-NEXT: b .LBB31_4 +; PC64-NEXT: .LBB31_3: # %entry +; PC64-NEXT: li 4, 0 +; PC64-NEXT: .LBB31_4: # %entry +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, 120(1) +; PC64-NEXT: lwz 3, 124(1) +; PC64-NEXT: xor 3, 3, 4 +; PC64-NEXT: addi 1, 1, 128 ; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: lwz 12, 8(1) ; PC64-NEXT: mtlr 0 +; PC64-NEXT: mtcrf 32, 12 # cr2 ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(