diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -262,6 +262,8 @@ // PPC (the libcall is not available). setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); @@ -8175,7 +8177,7 @@ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). - if (Src.getValueType() == MVT::ppcf128 && !IsStrict) { + if (Src.getValueType() == MVT::ppcf128) { if (Op.getValueType() == MVT::i32) { if (IsSigned) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, @@ -8187,7 +8189,11 @@ SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // Now use a smaller FP_TO_SINT. - return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + if (IsStrict) + return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other}, + {Op.getOperand(0), Res}); + else + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); } else { const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); @@ -8195,12 +8201,35 @@ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. // TODO: Are there fast-math-flags to propagate to this FSUB? - SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp); - True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); - True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, - DAG.getConstant(0x80000000, dl, MVT::i32)); - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); - return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE); + if (IsStrict) { + SDValue Chain = Op.getOperand(0); + EVT CondVT = getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Src.getValueType()); + SDValue SetCC = DAG.getSetCC(dl, CondVT, Src, Tmp, ISD::SETGE, Chain); + Chain = SetCC.getValue(1); + SDValue True = + DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::ppcf128, MVT::Other}, + {Chain, Src, Tmp}); + Chain = True.getValue(1); + True = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other}, + {Chain, True}); + Chain = True.getValue(1); + True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, + DAG.getConstant(0x80000000, dl, MVT::i32)); + SDValue False = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, + {MVT::i32, MVT::Other}, {Chain, Src}); + Chain = False.getValue(1); + SDValue Sel = + DAG.getSelect(dl, True.getValueType(), SetCC, True, False); + return DAG.getMergeValues({Sel, Chain}, dl); + } else { + SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp); + True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); + True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, + DAG.getConstant(0x80000000, dl, MVT::i32)); + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); + return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE); + } } } diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -403,47 +403,39 @@ define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { ; P8-LABEL: ppcq_to_i32: ; P8: # %bb.0: # %entry -; P8-NEXT: mflr r0 -; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 -; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __gcc_qtou -; P8-NEXT: nop +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 ; P8-NEXT: extsw r3, r3 -; P8-NEXT: addi r1, r1, 112 -; P8-NEXT: ld r0, 16(r1) -; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: ppcq_to_i32: ; P9: # %bb.0: # %entry -; P9-NEXT: mflr r0 -; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 -; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __gcc_qtou -; P9-NEXT: nop +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 ; P9-NEXT: extsw r3, r3 -; P9-NEXT: addi r1, r1, 32 -; P9-NEXT: ld r0, 16(r1) -; P9-NEXT: mtlr r0 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_i32: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: mflr r0 -; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 -; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __gcc_qtou -; NOVSX-NEXT: nop -; NOVSX-NEXT: extsw r3, r3 -; NOVSX-NEXT: addi r1, r1, 32 -; NOVSX-NEXT: ld r0, 16(r1) -; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) ; NOVSX-NEXT: blr entry: %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 @@ -547,43 +539,168 @@ define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { ; P8-LABEL: ppcq_to_u32: ; P8: # %bb.0: # %entry +; P8-NEXT: mfcr r12 ; P8-NEXT: mflr r0 ; P8-NEXT: std r0, 16(r1) -; P8-NEXT: stdu r1, -112(r1) -; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: stw r12, 8(r1) +; P8-NEXT: stdu r1, -128(r1) +; P8-NEXT: .cfi_def_cfa_offset 128 ; P8-NEXT: .cfi_offset lr, 16 -; P8-NEXT: bl __fixunstfsi +; P8-NEXT: .cfi_offset f30, -16 +; P8-NEXT: .cfi_offset f31, -8 +; P8-NEXT: .cfi_offset cr2, 8 +; P8-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P8-NEXT: xxlxor f0, f0, f0 +; P8-NEXT: stfd f30, 112(r1) # 8-byte Folded Spill +; P8-NEXT: xxlxor f4, f4, f4 +; P8-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill +; P8-NEXT: lfs f3, .LCPI11_0@toc@l(r3) +; P8-NEXT: fcmpu cr0, f2, f0 +; P8-NEXT: fmr f31, f2 +; P8-NEXT: fmr f30, f1 +; P8-NEXT: fcmpu cr1, f1, f3 +; P8-NEXT: crandc 4*cr5+lt, 4*cr1+eq, lt +; P8-NEXT: crnor 4*cr5+gt, 4*cr1+lt, 4*cr1+eq +; P8-NEXT: cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt +; P8-NEXT: bl __gcc_qsub ; P8-NEXT: nop -; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f2, f1 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r3, f0 +; P8-NEXT: addis r3, r3, -32768 +; P8-NEXT: mffs f0 +; P8-NEXT: mtfsb1 31 +; P8-NEXT: mtfsb0 30 +; P8-NEXT: fadd f1, f31, f30 +; P8-NEXT: mtfsf 1, f0 +; P8-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload +; P8-NEXT: lfd f30, 112(r1) # 8-byte Folded Reload +; P8-NEXT: xscvdpsxws f0, f1 +; P8-NEXT: mffprwz r4, f0 +; P8-NEXT: isel r3, r3, r4, 4*cr2+lt +; P8-NEXT: clrldi r3, r3, 32 +; P8-NEXT: addi r1, r1, 128 ; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: lwz r12, 8(r1) +; P8-NEXT: mtocrf 32, r12 ; P8-NEXT: mtlr r0 ; P8-NEXT: blr ; ; P9-LABEL: ppcq_to_u32: ; P9: # %bb.0: # %entry ; P9-NEXT: mflr r0 +; P9-NEXT: mfocrf r12, 32 ; P9-NEXT: std r0, 16(r1) -; P9-NEXT: stdu r1, -32(r1) -; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: stw r12, 8(r1) +; P9-NEXT: stdu r1, -48(r1) +; P9-NEXT: .cfi_def_cfa_offset 48 ; P9-NEXT: .cfi_offset lr, 16 -; P9-NEXT: bl __fixunstfsi +; P9-NEXT: .cfi_offset f30, -16 +; P9-NEXT: .cfi_offset f31, -8 +; P9-NEXT: .cfi_offset cr2, 8 +; P9-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; P9-NEXT: xxlxor f0, f0, f0 +; P9-NEXT: xxlxor f4, f4, f4 +; P9-NEXT: stfd f30, 32(r1) # 8-byte Folded Spill +; P9-NEXT: stfd f31, 40(r1) # 8-byte Folded Spill +; P9-NEXT: fmr f31, f2 +; P9-NEXT: fmr f30, f1 +; P9-NEXT: fcmpu cr0, f2, f0 +; P9-NEXT: lfs f3, .LCPI11_0@toc@l(r3) +; P9-NEXT: fcmpu cr1, f1, f3 +; P9-NEXT: crandc 4*cr5+lt, 4*cr1+eq, lt +; P9-NEXT: crnor 4*cr5+gt, 4*cr1+lt, 4*cr1+eq +; P9-NEXT: cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt +; P9-NEXT: bl __gcc_qsub ; P9-NEXT: nop -; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f2, f1 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: mffprwz r3, f0 +; P9-NEXT: addis r3, r3, -32768 +; P9-NEXT: mffs f0 +; P9-NEXT: mtfsb1 31 +; P9-NEXT: mtfsb0 30 +; P9-NEXT: fadd f1, f31, f30 +; P9-NEXT: mtfsf 1, f0 +; P9-NEXT: xscvdpsxws f0, f1 +; P9-NEXT: lfd f31, 40(r1) # 8-byte Folded Reload +; P9-NEXT: lfd f30, 32(r1) # 8-byte Folded Reload +; P9-NEXT: mffprwz r4, f0 +; P9-NEXT: isel r3, r3, r4, 4*cr2+lt +; P9-NEXT: clrldi r3, r3, 32 +; P9-NEXT: addi r1, r1, 48 ; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: lwz r12, 8(r1) ; P9-NEXT: mtlr r0 +; P9-NEXT: mtocrf 32, r12 ; P9-NEXT: blr ; ; NOVSX-LABEL: ppcq_to_u32: ; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mfocrf r12, 32 ; NOVSX-NEXT: mflr r0 ; NOVSX-NEXT: std r0, 16(r1) -; NOVSX-NEXT: stdu r1, -32(r1) -; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: stw r12, 8(r1) +; NOVSX-NEXT: stdu r1, -64(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 64 ; NOVSX-NEXT: .cfi_offset lr, 16 -; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: .cfi_offset f29, -24 +; NOVSX-NEXT: .cfi_offset f30, -16 +; NOVSX-NEXT: .cfi_offset f31, -8 +; NOVSX-NEXT: .cfi_offset cr2, 8 +; NOVSX-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; NOVSX-NEXT: stfd f29, 40(r1) # 8-byte Folded Spill +; NOVSX-NEXT: stfd f30, 48(r1) # 8-byte Folded Spill +; NOVSX-NEXT: lfs f3, .LCPI11_0@toc@l(r3) +; NOVSX-NEXT: addis r3, r2, .LCPI11_1@toc@ha +; NOVSX-NEXT: stfd f31, 56(r1) # 8-byte Folded Spill +; NOVSX-NEXT: fmr f31, f1 +; NOVSX-NEXT: lfs f29, .LCPI11_1@toc@l(r3) +; NOVSX-NEXT: fmr f30, f2 +; NOVSX-NEXT: fcmpu cr2, f1, f3 +; NOVSX-NEXT: fmr f4, f29 +; NOVSX-NEXT: bl __gcc_qsub ; NOVSX-NEXT: nop -; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: addi r3, r1, 32 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f2, f1 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: mffs f0 +; NOVSX-NEXT: fcmpu cr0, f30, f29 +; NOVSX-NEXT: mtfsb1 31 +; NOVSX-NEXT: crnor 4*cr5+gt, 4*cr2+lt, 4*cr2+eq +; NOVSX-NEXT: addi r3, r1, 36 +; NOVSX-NEXT: mtfsb0 30 +; NOVSX-NEXT: fadd f1, f30, f31 +; NOVSX-NEXT: mtfsf 1, f0 +; NOVSX-NEXT: lwz r4, 32(r1) +; NOVSX-NEXT: addis r4, r4, -32768 +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: crandc 4*cr5+lt, 4*cr2+eq, lt +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt +; NOVSX-NEXT: lwz r3, 36(r1) +; NOVSX-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload +; NOVSX-NEXT: lfd f30, 48(r1) # 8-byte Folded Reload +; NOVSX-NEXT: lfd f29, 40(r1) # 8-byte Folded Reload +; NOVSX-NEXT: isel r3, r4, r3, 4*cr5+lt +; NOVSX-NEXT: clrldi r3, r3, 32 +; NOVSX-NEXT: addi r1, r1, 64 ; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: lwz r12, 8(r1) +; NOVSX-NEXT: mtocrf 32, r12 ; NOVSX-NEXT: mtlr r0 ; NOVSX-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1202,38 +1202,36 @@ define i32 @test_fptosi_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __gcc_qtou -; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __gcc_qtou -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: blr ; ; PC64-LABEL: test_fptosi_ppc_i32_ppc_fp128: ; PC64: # %bb.0: # %entry -; PC64-NEXT: mflr 0 -; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __gcc_qtou -; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 -; PC64-NEXT: ld 0, 16(1) -; PC64-NEXT: mtlr 0 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, -8(1) +; PC64-NEXT: lwz 3, -4(1) ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128( @@ -1288,38 +1286,152 @@ define i32 @test_fptoui_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 { ; PC64LE-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT: mfocrf 12, 32 ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -32(1) -; PC64LE-NEXT: bl __fixunstfsi +; PC64LE-NEXT: stw 12, 8(1) +; PC64LE-NEXT: stdu 1, -48(1) +; PC64LE-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE-NEXT: xxlxor 0, 0, 0 +; PC64LE-NEXT: stfd 30, 32(1) # 8-byte Folded Spill +; PC64LE-NEXT: xxlxor 4, 4, 4 +; PC64LE-NEXT: stfd 31, 40(1) # 8-byte Folded Spill +; PC64LE-NEXT: lfs 3, .LCPI31_0@toc@l(3) +; PC64LE-NEXT: fcmpu 0, 2, 0 +; PC64LE-NEXT: fmr 31, 2 +; PC64LE-NEXT: fmr 30, 1 +; PC64LE-NEXT: fcmpu 1, 1, 3 +; PC64LE-NEXT: crandc 20, 6, 0 +; PC64LE-NEXT: crnor 21, 4, 6 +; PC64LE-NEXT: cror 8, 21, 20 +; PC64LE-NEXT: bl __gcc_qsub ; PC64LE-NEXT: nop -; PC64LE-NEXT: addi 1, 1, 32 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 2, 1 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 3, 0 +; PC64LE-NEXT: addis 3, 3, -32768 +; PC64LE-NEXT: mffs 0 +; PC64LE-NEXT: mtfsb1 31 +; PC64LE-NEXT: mtfsb0 30 +; PC64LE-NEXT: fadd 1, 31, 30 +; PC64LE-NEXT: mtfsf 1, 0 +; PC64LE-NEXT: lfd 31, 40(1) # 8-byte Folded Reload +; PC64LE-NEXT: lfd 30, 32(1) # 8-byte Folded Reload +; PC64LE-NEXT: xscvdpsxws 0, 1 +; PC64LE-NEXT: mffprwz 4, 0 +; PC64LE-NEXT: isel 3, 3, 4, 8 +; PC64LE-NEXT: addi 1, 1, 48 ; PC64LE-NEXT: ld 0, 16(1) +; PC64LE-NEXT: lwz 12, 8(1) +; PC64LE-NEXT: mtocrf 32, 12 ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 +; PC64LE9-NEXT: mfocrf 12, 32 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -32(1) -; PC64LE9-NEXT: bl __fixunstfsi +; PC64LE9-NEXT: stw 12, 8(1) +; PC64LE9-NEXT: stdu 1, -48(1) +; PC64LE9-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64LE9-NEXT: xxlxor 0, 0, 0 +; PC64LE9-NEXT: xxlxor 4, 4, 4 +; PC64LE9-NEXT: stfd 30, 32(1) # 8-byte Folded Spill +; PC64LE9-NEXT: stfd 31, 40(1) # 8-byte Folded Spill +; PC64LE9-NEXT: fmr 31, 2 +; PC64LE9-NEXT: fmr 30, 1 +; PC64LE9-NEXT: fcmpu 0, 2, 0 +; PC64LE9-NEXT: lfs 3, .LCPI31_0@toc@l(3) +; PC64LE9-NEXT: fcmpu 1, 1, 3 +; PC64LE9-NEXT: crandc 20, 6, 0 +; PC64LE9-NEXT: crnor 21, 4, 6 +; PC64LE9-NEXT: cror 8, 21, 20 +; PC64LE9-NEXT: bl __gcc_qsub ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addi 1, 1, 32 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 2, 1 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: mffprwz 3, 0 +; PC64LE9-NEXT: addis 3, 3, -32768 +; PC64LE9-NEXT: mffs 0 +; PC64LE9-NEXT: mtfsb1 31 +; PC64LE9-NEXT: mtfsb0 30 +; PC64LE9-NEXT: fadd 1, 31, 30 +; PC64LE9-NEXT: mtfsf 1, 0 +; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: lfd 31, 40(1) # 8-byte Folded Reload +; PC64LE9-NEXT: lfd 30, 32(1) # 8-byte Folded Reload +; PC64LE9-NEXT: mffprwz 4, 0 +; PC64LE9-NEXT: isel 3, 3, 4, 8 +; PC64LE9-NEXT: addi 1, 1, 48 ; PC64LE9-NEXT: ld 0, 16(1) +; PC64LE9-NEXT: lwz 12, 8(1) ; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: mtocrf 32, 12 ; PC64LE9-NEXT: blr ; ; PC64-LABEL: test_fptoui_ppc_i32_ppc_fp128: ; PC64: # %bb.0: # %entry ; PC64-NEXT: mflr 0 ; PC64-NEXT: std 0, 16(1) -; PC64-NEXT: stdu 1, -112(1) -; PC64-NEXT: bl __fixunstfsi +; PC64-NEXT: mfcr 12 +; PC64-NEXT: stw 12, 8(1) +; PC64-NEXT: stdu 1, -160(1) +; PC64-NEXT: addis 3, 2, .LCPI31_0@toc@ha +; PC64-NEXT: stfd 29, 136(1) # 8-byte Folded Spill +; PC64-NEXT: lfs 3, .LCPI31_0@toc@l(3) +; PC64-NEXT: addis 3, 2, .LCPI31_1@toc@ha +; PC64-NEXT: lfs 29, .LCPI31_1@toc@l(3) +; PC64-NEXT: stfd 30, 144(1) # 8-byte Folded Spill +; PC64-NEXT: fmr 30, 1 +; PC64-NEXT: fmr 4, 29 +; PC64-NEXT: stfd 31, 152(1) # 8-byte Folded Spill +; PC64-NEXT: fmr 31, 2 +; PC64-NEXT: fcmpu 2, 1, 3 +; PC64-NEXT: bl __gcc_qsub ; PC64-NEXT: nop -; PC64-NEXT: addi 1, 1, 112 +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 2, 1 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, 120(1) +; PC64-NEXT: mffs 0 +; PC64-NEXT: mtfsb1 31 +; PC64-NEXT: crnor 21, 8, 10 +; PC64-NEXT: mtfsb0 30 +; PC64-NEXT: fadd 1, 31, 30 +; PC64-NEXT: mtfsf 1, 0 +; PC64-NEXT: lwz 3, 124(1) +; PC64-NEXT: fctiwz 0, 1 +; PC64-NEXT: stfd 0, 128(1) +; PC64-NEXT: fcmpu 0, 31, 29 +; PC64-NEXT: lwz 4, 132(1) +; PC64-NEXT: crandc 20, 10, 0 +; PC64-NEXT: cror 20, 21, 20 +; PC64-NEXT: addis 3, 3, -32768 +; PC64-NEXT: bc 12, 20, .LBB31_2 +; PC64-NEXT: # %bb.1: # %entry +; PC64-NEXT: ori 3, 4, 0 +; PC64-NEXT: b .LBB31_2 +; PC64-NEXT: .LBB31_2: # %entry +; PC64-NEXT: lfd 31, 152(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 30, 144(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 29, 136(1) # 8-byte Folded Reload +; PC64-NEXT: addi 1, 1, 160 ; PC64-NEXT: ld 0, 16(1) +; PC64-NEXT: lwz 12, 8(1) ; PC64-NEXT: mtlr 0 +; PC64-NEXT: mtcrf 32, 12 # cr2 ; PC64-NEXT: blr entry: %fpext = call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(