diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -431,6 +431,9 @@ /// PLD. MAT_PCREL_ADDR, + /// Constrained direct move from VSR instruction. + STRICT_MFVSR = ISD::FIRST_TARGET_STRICTFP_OPCODE, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -423,12 +423,14 @@ if (Subtarget.hasSPE()) { // SPE has built-in conversions + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); @@ -560,33 +562,43 @@ if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. - if (Subtarget.hasSPE()) + if (Subtarget.hasSPE()) { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - else + } else { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + } } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -1568,6 +1580,7 @@ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::STRICT_MFVSR: return "PPCISD::STRICT_MFVSR"; } return nullptr; } @@ -8128,36 +8141,76 @@ return Op; } -void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, - SelectionDAG &DAG, - const SDLoc &dl) const { - assert(Op.getOperand(0).getValueType().isFloatingPoint()); - SDValue Src = Op.getOperand(0); - if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); +static SDValue getFPNode(unsigned Opc, EVT VT, SDValue Op, SDValue Chain, + SelectionDAG &DAG, bool Strict) { + SDLoc dl(Op); + if (!Strict) + return DAG.getNode(Opc, dl, VT, Op); + // Try to generate a STRICT node version + assert((!Strict || Chain) && "Missing chain for creating strict nodes"); + unsigned NewOpc = ISD::DELETED_NODE; + switch (Opc) { + default: + llvm_unreachable("getFPNode called with unexpected opcode!"); + case PPCISD::MFVSR: + NewOpc = PPCISD::STRICT_MFVSR; + break; +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::DAGN: \ + NewOpc = ISD::STRICT_##DAGN; \ + break; +#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) +#include "llvm/IR/ConstrainedOps.def" +#undef DAG_INSTRUCTION +#undef CMP_INSTRUCTION + } + return DAG.getNode(NewOpc, dl, {VT, MVT::Other}, {Chain, Op}); +} + +static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + SDLoc dl(Op); + bool Strict = Op->isStrictFPOpcode(); + bool Signed = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; + // For strict nodes, source is the second operand. + SDValue Src = Op.getOperand(Strict ? 1 : 0); + SDValue FPChain; + if (Strict) + FPChain = Op.getOperand(0); + assert(Src.getValueType().isFloatingPoint()); + if (Src.getValueType() == MVT::f32) + Src = getFPNode(ISD::FP_EXTEND, MVT::f64, Src, FPChain, DAG, Strict); SDValue Tmp; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode( - Op.getOpcode() == ISD::FP_TO_SINT - ? PPCISD::FCTIWZ - : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), + Signed ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && + assert((Signed || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); - Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : - PPCISD::FCTIDUZ, - dl, MVT::f64, Src); + Tmp = DAG.getNode(Signed ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, MVT::f64, + Src); break; } + return Tmp; +} + +void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, + SelectionDAG &DAG, + const SDLoc &dl) const { + SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); + bool Signed = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && - (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); + (Signed || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); MachinePointerInfo MPI = @@ -8197,45 +8250,22 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - assert(Op.getOperand(0).getValueType().isFloatingPoint()); - SDValue Src = Op.getOperand(0); - - if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); - - SDValue Tmp; - switch (Op.getSimpleValueType().SimpleTy) { - default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); - case MVT::i32: - Tmp = DAG.getNode( - Op.getOpcode() == ISD::FP_TO_SINT - ? PPCISD::FCTIWZ - : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), - dl, MVT::f64, Src); - Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); - break; - case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && - "i64 FP_TO_UINT is supported only with FPCVT"); - Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : - PPCISD::FCTIDUZ, - dl, MVT::f64, Src); - Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); - break; - } - return Tmp; + SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); + return getFPNode(PPCISD::MFVSR, Op.getSimpleValueType().SimpleTy, Tmp, + Op.getOperand(0), DAG, Op->isStrictFPOpcode()); } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - + bool Strict = Op->isStrictFPOpcode(); + SDValue Src = Op.getOperand(Strict ? 1 : 0); // FP to INT conversions are legal for f128. - if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128)) + if (EnableQuadPrecision && (Src.getValueType() == MVT::f128)) return Op; // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). - if (Op.getOperand(0).getValueType() == MVT::ppcf128) { + if (Src.getValueType() == MVT::ppcf128) { if (Op.getValueType() == MVT::i32) { if (Op.getOpcode() == ISD::FP_TO_SINT) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, @@ -10908,6 +10938,8 @@ case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: @@ -10998,10 +11030,13 @@ } return; } + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. - if (N->getOperand(0).getValueType() == MVT::ppcf128) + if (N->getOperand((int)N->isStrictFPOpcode()).getValueType() == + MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -197,7 +197,7 @@ def EFDCTSIZ : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB), "efdctsiz $RT, $RB", IIC_FPDGeneral, - [(set i32:$RT, (fp_to_sint f64:$RB))]>; + [(set i32:$RT, (any_fp_to_sint f64:$RB))]>; def EFDCTUF : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB), "efdctuf $RT, $RB", IIC_FPDGeneral, []>; @@ -212,7 +212,7 @@ def EFDCTUIZ : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB), "efdctuiz $RT, $RB", IIC_FPDGeneral, - [(set i32:$RT, (fp_to_uint f64:$RB))]>; + [(set i32:$RT, (any_fp_to_uint f64:$RB))]>; def EFDDIV : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), "efddiv $RT, $RA, $RB", IIC_FPDivD, @@ -288,7 +288,7 @@ def EFSCTSIZ : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB), "efsctsiz $RT, $RB", IIC_FPSGeneral, - [(set i32:$RT, (fp_to_sint f32:$RB))]>; + [(set i32:$RT, (any_fp_to_sint f32:$RB))]>; def EFSCTUF : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB), "efsctuf $RT, $RB", IIC_FPSGeneral, []>; @@ -299,7 +299,7 @@ def EFSCTUIZ : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB), "efsctuiz $RT, $RB", IIC_FPSGeneral, - [(set i32:$RT, (fp_to_uint f32:$RB))]>; + [(set i32:$RT, (any_fp_to_uint f32:$RB))]>; def EFSDIV : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB), "efsdiv $RT, $RA, $RB", IIC_FPDivD, diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -139,6 +139,8 @@ def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCstrict_mfvsr : SDNode<"PPCISD::STRICT_MFVSR", SDTUnaryOp, [SDNPHasChain]>; + //-------------------------- Predicate definitions ---------------------------// def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">; @@ -3273,6 +3275,8 @@ // (move to GPR, nothing else needed) def : Pat<(i64 (bitconvert f64:$S)), (i64 (MFVSRD $S))>; +def : Pat<(i64 (PPCstrict_mfvsr f64:$A)), + (i64 (MFVSRD f64:$A))>; // bitconvert i64 -> f64 // (move to FPR, nothing else needed) @@ -3296,6 +3300,8 @@ (i64 (MFVSRD (FCTID (XSRDPI $S))))>; def : Pat<(i64 (llround f32:$S)), (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; +def : Pat<(i32 (PPCstrict_mfvsr f64:$A)), + (i32 (MFVSRWZ f64:$A))>; // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead // of f64 diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -0,0 +1,263 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s \ +; RUN: -check-prefix=P8 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \ +; RUN: -check-prefix=P9 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | \ +; RUN: FileCheck %s -check-prefix=NOVSX +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names < %s -mcpu=e500 \ +; RUN: -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \ +; RUN: -check-prefix=SPE + +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) + +define signext i32 @q_to_i32(fp128 %m) #0 { +; P8-LABEL: q_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfsi +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixkfsi +; P9-NEXT: nop +; P9-NEXT: extsw r3, r3 +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixkfsi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + ret i32 %conv +} + +define i64 @q_to_i64(fp128 %m) #0 { +; P8-LABEL: q_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixkfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixkfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + ret i64 %conv +} + +define i64 @q_to_u64(fp128 %m) #0 { +; P8-LABEL: q_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunskfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunskfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + ret i64 %conv +} + +define zeroext i32 @q_to_u32(fp128 %m) #0 { +; P8-LABEL: q_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunskfsi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: q_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunskfsi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + ret i32 %conv +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll @@ -0,0 +1,261 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | \ +; RUN: FileCheck %s -check-prefix=NOVSX +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names < %s -mcpu=e500 \ +; RUN: -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \ +; RUN: -check-prefix=SPE + +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) + +define i32 @d_to_i32(double %m) #0 { +; CHECK-LABEL: d_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdctsiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.ignore") #0 + ret i32 %conv +} + +define i64 @d_to_i64(double %m) #0 { +; CHECK-LABEL: d_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl __fixdfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.ignore") #0 + ret i64 %conv +} + +define i64 @d_to_u64(double %m) #0 { +; CHECK-LABEL: d_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bl __fixunsdfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.ignore") #0 + ret i64 %conv +} + +define zeroext i32 @d_to_u32(double %m) #0 { +; CHECK-LABEL: d_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: d_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: efdctuiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.ignore") #0 + ret i32 %conv +} + +define signext i32 @f_to_i32(float %m) #0 { +; CHECK-LABEL: f_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efsctsiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.ignore") #0 + ret i32 %conv +} + +define i64 @f_to_i64(float %m) #0 { +; CHECK-LABEL: f_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixsfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.ignore") #0 + ret i64 %conv +} + +define i64 @f_to_u64(float %m) #0 { +; CHECK-LABEL: f_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __fixunssfdi +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.ignore") #0 + ret i64 %conv +} + +define zeroext i32 @f_to_u32(float %m) #0 { +; CHECK-LABEL: f_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +; +; SPE-LABEL: f_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efsctuiz r3, r3 +; SPE-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.ignore") #0 + ret i32 %conv +} + +attributes #0 = { strictfp }