diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -431,6 +431,9 @@ /// PLD. MAT_PCREL_ADDR, + /// Constrained direct move from VSR instruction. + STRICT_MFVSR = ISD::FIRST_TARGET_STRICTFP_OPCODE, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -423,11 +423,13 @@ if (Subtarget.hasSPE()) { // SPE has built-in conversions + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP @@ -560,33 +562,43 @@ if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. - if (Subtarget.hasSPE()) + if (Subtarget.hasSPE()) { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - else + } else { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + } } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -1568,6 +1580,7 @@ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::STRICT_MFVSR: return "PPCISD::STRICT_MFVSR"; } return nullptr; } @@ -8128,14 +8141,47 @@ return Op; } +static SDValue getFPNode(unsigned Opc, EVT VT, SDValue Op, SDValue Chain, + SelectionDAG &DAG, bool Strict) { + SDLoc dl(Op); + if (!Strict) + return DAG.getNode(Opc, dl, VT, Op); + + // Try to generate a STRICT node version + assert(Chain && "Missing chain for creating strict nodes"); + unsigned NewOpc = ISD::DELETED_NODE; + switch (Opc) { + default: + llvm_unreachable("getFPNode called with unexpected opcode!"); + case PPCISD::MFVSR: + NewOpc = PPCISD::STRICT_MFVSR; + break; +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::DAGN: \ + NewOpc = ISD::STRICT_##DAGN; \ + break; +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) +#include "llvm/IR/ConstrainedOps.def" +#undef DAG_INSTRUCTION +#undef CMP_INSTRUCTION + } + return DAG.getNode(NewOpc, dl, {VT, MVT::Other}, {Chain, Op}); +} + static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { SDLoc dl(Op); - bool Signed = Op.getOpcode() == ISD::FP_TO_SINT; - SDValue Src = Op.getOperand(0); + bool Strict = Op->isStrictFPOpcode(); + bool Signed = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; + // For strict nodes, source is the second operand. + SDValue Src = Op.getOperand(Strict ? 1 : 0); assert(Src.getValueType().isFloatingPoint()); + SDValue FPChain; + if (Strict) + FPChain = Op.getOperand(0); if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + Src = getFPNode(ISD::FP_EXTEND, MVT::f64, Src, FPChain, DAG, Strict); SDValue Conv; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); @@ -8157,10 +8203,11 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const { - SDValue Src = Op.getOperand(0); + SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); assert(Src.getValueType().isFloatingPoint()); SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); - bool Signed = Op.getOpcode() == ISD::FP_TO_SINT; + bool Signed = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && @@ -8204,15 +8251,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - SDValue Src = Op.getOperand(0); + SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); assert(Src.getValueType().isFloatingPoint()); - return DAG.getNode(PPCISD::MFVSR, dl, Op.getSimpleValueType().SimpleTy, - convertFPToInt(Op, DAG, Subtarget)); + return getFPNode(PPCISD::MFVSR, Op.getValueType(), + convertFPToInt(Op, DAG, Subtarget), Op.getOperand(0), DAG, + Op->isStrictFPOpcode()); } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - SDValue Src = Op.getOperand(0); + SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); // FP to INT conversions are legal for f128. if (EnableQuadPrecision && (Src.getValueType() == MVT::f128)) return Op; @@ -10878,6 +10926,8 @@ case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: @@ -10968,10 +11018,13 @@ } return; } + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. - if (N->getOperand(0).getValueType() == MVT::ppcf128) + if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() == + MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -197,7 +197,7 @@ def EFDCTSIZ : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB), "efdctsiz $RT, $RB", IIC_FPDGeneral, - [(set i32:$RT, (fp_to_sint f64:$RB))]>; + [(set i32:$RT, (any_fp_to_sint f64:$RB))]>; def EFDCTUF : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB), "efdctuf $RT, $RB", IIC_FPDGeneral, []>; @@ -212,7 +212,7 @@ def EFDCTUIZ : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB), "efdctuiz $RT, $RB", IIC_FPDGeneral, - [(set i32:$RT, (fp_to_uint f64:$RB))]>; + [(set i32:$RT, (any_fp_to_uint f64:$RB))]>; def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "efddiv $RT, $RA, $RB", IIC_FPDivD, @@ -288,7 +288,7 @@ def EFSCTSIZ : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB), "efsctsiz $RT, $RB", IIC_FPSGeneral, - [(set i32:$RT, (fp_to_sint f32:$RB))]>; + [(set i32:$RT, (any_fp_to_sint f32:$RB))]>; def EFSCTUF : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB), "efsctuf $RT, $RB", IIC_FPSGeneral, []>; @@ -299,7 +299,7 @@ def EFSCTUIZ : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB), "efsctuiz $RT, $RB", IIC_FPSGeneral, - [(set i32:$RT, (fp_to_uint f32:$RB))]>; + [(set i32:$RT, (any_fp_to_uint f32:$RB))]>; def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), "efsdiv $RT, $RA, $RB", IIC_FPDivD, diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -139,6 +139,8 @@ def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCstrict_mfvsr : SDNode<"PPCISD::STRICT_MFVSR", SDTUnaryOp, [SDNPHasChain]>; + //-------------------------- Predicate definitions ---------------------------// def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; @@ -3273,6 +3275,8 @@ // (move to GPR, nothing else needed) def : Pat<(i64 (bitconvert f64:$S)), (i64 (MFVSRD $S))>; +def : Pat<(i64 (PPCstrict_mfvsr f64:$A)), + (i64 (MFVSRD f64:$A))>; // bitconvert i64 -> f64 // (move to FPR, nothing else needed) @@ -3296,6 +3300,8 @@ (i64 (MFVSRD (FCTID (XSRDPI $S))))>; def : Pat<(i64 (llround f32:$S)), (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; +def : Pat<(i32 (PPCstrict_mfvsr f64:$A)), + (i32 (MFVSRWZ f64:$A))>; // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead // of f64 @@ -3718,11 +3724,11 @@ (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; // Truncate & Convert QP -> (Un)Signed (D)Word. -def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; -def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; -def : Pat<(i32 (fp_to_sint f128:$src)), +def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; +def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; +def : Pat<(i32 (any_fp_to_sint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; -def : Pat<(i32 (fp_to_uint f128:$src)), +def : Pat<(i32 (any_fp_to_uint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; // Instructions for store(fptosi). diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -0,0 +1,845 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -enable-ppc-quad-precision \ +; RUN: -mcpu=pwr8 | FileCheck %s -check-prefix=P8 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -enable-ppc-quad-precision \ +; RUN: -mcpu=pwr9 | FileCheck %s -check-prefix=P9 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -enable-ppc-quad-precision \ +; RUN: -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names < %s -mcpu=e500 \ +; RUN: -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \ +; RUN: -check-prefix=SPE + +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128, metadata) + +declare i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.ppcf128(ppc_fp128, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128, metadata) + +define i128 @q_to_i128(fp128 %m) #0 { +; P8-LABEL: q_to_i128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_i128: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixtfti +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @q_to_u128(fp128 %m) #0 { +; P8-LABEL: q_to_u128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_u128: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunstfti +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @ppcq_to_i128(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_i128: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixtfti +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @ppcq_to_u128(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_u128: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixtfti +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define signext i32 @q_to_i32(fp128 %m) #0 { +; P8-LABEL: q_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfsi +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpswz v2, v2 +; P9-NEXT: mfvsrwz r3, v2 +; P9-NEXT: extsw r3, r3 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixkfsi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @q_to_i64(fp128 %m) #0 { +; P8-LABEL: q_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpsdz v2, v2 +; P9-NEXT: mfvsrd r3, v2 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixkfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @q_to_u64(fp128 %m) #0 { +; P8-LABEL: q_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpudz v2, v2 +; P9-NEXT: mfvsrd r3, v2 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunskfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @q_to_u32(fp128 %m) #0 { +; P8-LABEL: q_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpuwz v2, v2 +; P9-NEXT: mfvsrwz r3, v2 +; P9-NEXT: clrldi r3, r3, 32 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunskfsi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __gcc_qtou +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __gcc_qtou +; P9-NEXT: nop +; P9-NEXT: extsw r3, r3 +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __gcc_qtou +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __gcc_qtou +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @ppcq_to_i64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixtfdi +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @ppcq_to_u64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixunstfdi +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfsi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixunstfsi +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll @@ -0,0 +1,256 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | \ +; RUN: FileCheck %s -check-prefix=NOVSX +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names < %s -mcpu=e500 \ +; RUN: -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \ +; RUN: -check-prefix=SPE + +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +define i32 @d_to_i32(double %m) #0 { +; CHECK-LABEL: d_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdctsiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @d_to_i64(double %m) #0 { +; CHECK-LABEL: d_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl __fixdfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @d_to_u64(double %m) #0 { +; CHECK-LABEL: d_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl __fixunsdfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @d_to_u32(double %m) #0 { +; CHECK-LABEL: d_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdctuiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define signext i32 @f_to_i32(float %m) #0 { +; CHECK-LABEL: f_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efsctsiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @f_to_i64(float %m) #0 { +; CHECK-LABEL: f_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixsfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @f_to_u64(float %m) #0 { +; CHECK-LABEL: f_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunssfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @f_to_u32(float %m) #0 { +; CHECK-LABEL: f_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efsctuiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +attributes #0 = { strictfp }