diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -434,6 +434,12 @@ /// Constrained direct move from VSR instruction. STRICT_MFVSR = ISD::FIRST_TARGET_STRICTFP_OPCODE, + /// Constrained integer-to-floating-point conversion instructions. + STRICT_FCFID, + STRICT_FCFIDU, + STRICT_FCFIDS, + STRICT_FCFIDUS, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -216,6 +216,13 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (isPPC64 || Subtarget.hasFPCVT()) { + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::STRICT_SINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote); + AddPromotedToType (ISD::STRICT_UINT_TO_FP, MVT::i1, + isPPC64 ? MVT::i64 : MVT::i32); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); @@ -223,6 +230,9 @@ AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); } else { + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } @@ -424,6 +434,8 @@ if (Subtarget.hasSPE()) { // SPE has built-in conversions setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); @@ -435,6 +447,8 @@ // PowerPC does not have [U|S]INT_TO_FP setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand); } if (Subtarget.hasDirectMove() && isPPC64) { @@ -564,6 +578,8 @@ // They also have instructions for converting between i64 and fp. setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); @@ -573,8 +589,10 @@ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) + if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) { + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + } } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. if (Subtarget.hasSPE()) { @@ -591,6 +609,8 @@ if (Subtarget.has64BitSupport()) { setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); @@ -599,6 +619,8 @@ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -1581,6 +1603,10 @@ case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; case PPCISD::STRICT_MFVSR: return "PPCISD::STRICT_MFVSR"; + case PPCISD::STRICT_FCFID: return "PPCISD::STRICT_FCFID"; + case PPCISD::STRICT_FCFIDU: return "PPCISD::STRICT_FCFIDU"; + case PPCISD::STRICT_FCFIDS: return "PPCISD::STRICT_FCFIDS"; + case PPCISD::STRICT_FCFIDUS: return "PPCISD::STRICT_FCFIDUS"; } return nullptr; } @@ -8141,14 +8167,10 @@ return Op; } -static SDValue getFPNode(unsigned Opc, EVT VT, SDValue Op, SDValue Chain, - SelectionDAG &DAG, bool Strict) { - SDLoc dl(Op); - if (!Strict) - return DAG.getNode(Opc, dl, VT, Op); - +static SDValue getStrictFPNode(unsigned Opc, EVT VT, ArrayRef Ops, + SelectionDAG &DAG) { + assert(Ops.size() >= 2 && Ops[0] && "Missing chain for strict nodes!"); // Try to generate a STRICT node version - assert(Chain && "Missing chain for creating strict nodes"); unsigned NewOpc = ISD::DELETED_NODE; switch (Opc) { default: @@ -8156,6 +8178,18 @@ case PPCISD::MFVSR: NewOpc = PPCISD::STRICT_MFVSR; break; + case PPCISD::FCFID: + NewOpc = PPCISD::STRICT_FCFID; + break; + case PPCISD::FCFIDU: + NewOpc = PPCISD::STRICT_FCFIDU; + break; + case PPCISD::FCFIDS: + NewOpc = PPCISD::STRICT_FCFIDS; + break; + case PPCISD::FCFIDUS: + NewOpc = PPCISD::STRICT_FCFIDUS; + break; #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::DAGN: \ NewOpc = ISD::STRICT_##DAGN; \ @@ -8165,7 +8199,21 @@ #undef DAG_INSTRUCTION #undef CMP_INSTRUCTION } - return DAG.getNode(NewOpc, dl, {VT, MVT::Other}, {Chain, Op}); + return DAG.getNode(NewOpc, SDLoc(Ops[1]), {VT, MVT::Other}, Ops); +} + +static SDValue getFPNode(unsigned Opc, EVT VT, SDValue Op, SDValue Chain, + SelectionDAG& DAG, bool Strict) { + if (!Strict) + return DAG.getNode(Opc, SDLoc(Op), VT, Op); + return getStrictFPNode(Opc, VT, { Chain, Op }, DAG); +} + +static SDValue getFPNode(unsigned Opc, EVT VT, SDValue Op1, SDValue Op2, + SDValue Chain, SelectionDAG& DAG, bool Strict) { + if (!Strict) + return DAG.getNode(Opc, SDLoc(Op1), VT, Op1, Op2); + return getStrictFPNode(Opc, VT, { Chain, Op1, Op2 }, DAG); } static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, @@ -8408,13 +8456,31 @@ continue; if (UI->getOpcode() != ISD::SINT_TO_FP && - UI->getOpcode() != ISD::UINT_TO_FP) + UI->getOpcode() != ISD::UINT_TO_FP && + UI->getOpcode() != ISD::STRICT_SINT_TO_FP && + UI->getOpcode() != ISD::STRICT_UINT_TO_FP) return true; } return false; } +SDValue convertIntToFP(SDValue Op, SDValue Src, + SelectionDAG &DAG, + const PPCSubtarget& Subtarget) { + bool Strict = Op->isStrictFPOpcode(); + bool Signed = (Op.getOpcode() == ISD::SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_SINT_TO_FP); + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-precision and then round. + bool Single = (Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT()); + unsigned ConvOpc = Single + ? (Signed ? PPCISD::FCFIDS : PPCISD::FCFIDUS) + : (Signed ? PPCISD::FCFID : PPCISD::FCFIDU); + return getFPNode(ConvOpc, Single ? MVT::f32 : MVT::f64, Src, + Op.getOperand(0), DAG, Strict); +} + /// Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. @@ -8426,25 +8492,12 @@ "Invalid floating point type as target of conversion"); assert(Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"); - SDValue FP; - SDValue Src = Op.getOperand(0); - bool SinglePrec = Op.getValueType() == MVT::f32; + SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; - unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : - (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); - - if (WordInt) { - FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, - dl, MVT::f64, Src); - FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); - } - else { - FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); - FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); - } - - return FP; + unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA; + SDValue FP = DAG.getNode(MovOpc, dl, MVT::f64, Src); + return convertIntToFP(Op, FP, DAG, Subtarget); } static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { @@ -8520,8 +8573,12 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + bool Strict = Op->isStrictFPOpcode(); + bool Signed = (Op.getOpcode() == ISD::SINT_TO_FP || + Op.getOpcode() == ISD::STRICT_SINT_TO_FP); + int SrcIdx = Strict ? 1 : 0; - EVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(SrcIdx).getValueType(); EVT OutVT = Op.getValueType(); if (OutVT.isVector() && OutVT.isFloatingPoint() && isOperationCustom(Op.getOpcode(), InVT)) @@ -8531,11 +8588,12 @@ if (EnableQuadPrecision && (Op.getValueType() == MVT::f128)) return Op; - if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { + if (Subtarget.hasQPX() && Op.getOperand(SrcIdx).getValueType() == MVT::v4i1) { + assert(!Strict && "Strict int to fp not supported on QPX!"); if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) return SDValue(); - SDValue Value = Op.getOperand(0); + SDValue Value = Op.getOperand(SrcIdx); // The values are now known to be -1 (false) or 1 (true). To convert this // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 @@ -8556,8 +8614,8 @@ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); - if (Op.getOperand(0).getValueType() == MVT::i1) - return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), + if (Op.getOperand(SrcIdx).getValueType() == MVT::i1) + return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(SrcIdx), DAG.getConstantFP(1.0, dl, Op.getValueType()), DAG.getConstantFP(0.0, dl, Op.getValueType())); @@ -8567,22 +8625,11 @@ Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); - assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && + assert((Signed || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); - // If we have FCFIDS, then use it when converting to single-precision. - // Otherwise, convert to double-precision and then round. - unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) - ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS - : PPCISD::FCFIDS) - : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU - : PPCISD::FCFID); - MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) - ? MVT::f32 - : MVT::f64; - - if (Op.getOperand(0).getValueType() == MVT::i64) { - SDValue SINT = Op.getOperand(0); + if (Op.getOperand(SrcIdx).getValueType() == MVT::i64) { + SDValue SINT = Op.getOperand(SrcIdx); // When converting to single-precision, we actually need to convert // to double-precision first and then round to single-precision. // To avoid double-rounding effects during that operation, we have @@ -8695,15 +8742,15 @@ } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); - SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); + SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) - FP = DAG.getNode(ISD::FP_ROUND, dl, - MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); + FP = getFPNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0, dl), + Op.getOperand(0), DAG, Strict); return FP; } - assert(Op.getOperand(0).getValueType() == MVT::i32 && + assert(Op.getOperand(SrcIdx).getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the @@ -8717,13 +8764,13 @@ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { ReuseLoadInfo RLI; bool ReusingLoad; - if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, + if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(SrcIdx), MVT::i32, RLI, DAG))) { int FrameIdx = MFI.CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(SrcIdx), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); @@ -8755,7 +8802,7 @@ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, - Op.getOperand(0)); + Op.getOperand(SrcIdx)); // STD the extended value into the stack slot. SDValue Store = DAG.getStore( @@ -8769,10 +8816,10 @@ } // FCFID it and return it. - SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); + SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) - FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, - DAG.getIntPtrConstant(0, dl)); + FP = getFPNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0, dl), + Op.getOperand(0), DAG, Strict); return FP; } @@ -10942,6 +10989,8 @@ case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1496,6 +1496,15 @@ def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), (STDX $rS, xoaddr:$dst)>; +def : Pat<(f64 (PPCstrict_fcfid f64:$A)), + (f64 (FCFID f64:$A))>; +def : Pat<(f64 (PPCstrict_fcfidu f64:$A)), + (f64 (FCFIDU f64:$A))>; +def : Pat<(f32 (PPCstrict_fcfids f64:$A)), + (f32 (FCFIDS f64:$A))>; +def : Pat<(f32 (PPCstrict_fcfidus f64:$A)), + (f32 (FCFIDUS f64:$A))>; + // 64-bits atomic loads and stores def : Pat<(atomic_load_64 iaddrX4:$src), (LD memrix:$src)>; def : Pat<(atomic_load_64 xaddrX4:$src), (LDX memrr:$src)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -130,6 +130,15 @@ def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; +def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", + SDTFPUnaryOp, [SDNPHasChain]>; +def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", + SDTFPUnaryOp, [SDNPHasChain]>; +def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", + SDTFPRoundOp, [SDNPHasChain]>; +def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", + SDTFPRoundOp, [SDNPHasChain]>; + def PPCcv_fp_to_uint_in_vsr: SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; def PPCcv_fp_to_sint_in_vsr: diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -158,7 +158,7 @@ def EFDCFSI : EFXForm_2a<753, (outs sperc:$RT), (ins gprc:$RB), "efdcfsi $RT, $RB", IIC_FPDGeneral, - [(set f64:$RT, (sint_to_fp i32:$RB))]>; + [(set f64:$RT, (any_sint_to_fp i32:$RB))]>; def EFDCFSID : EFXForm_2a<739, (outs sperc:$RT), (ins gprc:$RB), "efdcfsid $RT, $RB", IIC_FPDGeneral, @@ -169,7 +169,7 @@ def EFDCFUI : EFXForm_2a<752, (outs sperc:$RT), (ins gprc:$RB), "efdcfui $RT, $RB", IIC_FPDGeneral, - [(set f64:$RT, (uint_to_fp i32:$RB))]>; + [(set f64:$RT, (any_uint_to_fp i32:$RB))]>; def EFDCFUID : EFXForm_2a<738, (outs sperc:$RT), (ins gprc:$RB), "efdcfuid $RT, $RB", IIC_FPDGeneral, @@ -261,14 +261,14 @@ def EFSCFSI : EFXForm_2a<721, (outs spe4rc:$RT), (ins gprc:$RB), "efscfsi $RT, $RB", IIC_FPSGeneral, - [(set f32:$RT, (sint_to_fp i32:$RB))]>; + [(set f32:$RT, (any_sint_to_fp i32:$RB))]>; def EFSCFUF : EFXForm_2a<722, (outs spe4rc:$RT), (ins spe4rc:$RB), "efscfuf $RT, $RB", IIC_FPSGeneral, []>; def EFSCFUI : EFXForm_2a<720, (outs spe4rc:$RT), (ins gprc:$RB), "efscfui $RT, $RB", IIC_FPSGeneral, - [(set f32:$RT, (uint_to_fp i32:$RB))]>; + [(set f32:$RT, (any_uint_to_fp i32:$RB))]>; let isCompare = 1 in { def EFSCMPEQ : EFXForm_3<718, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB), diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2423,6 +2423,15 @@ (and v4i32:$B, v4i32:$C))), (v4i32 (XXSEL $A, $B, $C))>; +def : Pat<(f64 (PPCstrict_fcfid f64:$A)), + (f64 (XSCVSXDDP f64:$A))>; +def : Pat<(f64 (PPCstrict_fcfidu f64:$A)), + (f64 (XSCVUXDDP f64:$A))>; +def : Pat<(f32 (PPCstrict_fcfids f64:$A)), + (f32 (XSCVSXDSP f64:$A))>; +def : Pat<(f32 (PPCstrict_fcfidus f64:$A)), + (f32 (XSCVUXDSP f64:$A))>; + // Additional fnmsub pattern for PPC specific ISD opcode def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C), (XSNMSUBADP $C, $A, $B)>; diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -12,11 +12,349 @@ ; RUN: -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \ ; RUN: -check-prefix=SPE +declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128, metadata) + declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) +declare fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32, metadata, metadata) +declare fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64, metadata, metadata) +declare fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32, metadata, metadata) +declare fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64, metadata, metadata) + +declare ppc_fp128 @llvm.experimental.constrained.sitofp.ppcf128.i32(i32, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.sitofp.ppcf128.i64(i64, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.uitofp.ppcf128.i32(i32, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.uitofp.ppcf128.i64(i64, metadata, metadata) + +define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __gcc_qtou +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __gcc_qtou +; P9-NEXT: nop +; P9-NEXT: extsw r3, r3 +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __gcc_qtou +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_i32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __gcc_qtou +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @ppcq_to_i64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_i64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixtfdi +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @ppcq_to_u64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_u64: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixunstfdi +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfsi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: ppcq_to_u32: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: evmergelo r5, r5, r6 +; SPE-NEXT: evmergelo r3, r3, r4 +; SPE-NEXT: evstdd r5, 16(r1) +; SPE-NEXT: evstdd r3, 24(r1) +; SPE-NEXT: lwz r3, 28(r1) +; SPE-NEXT: stw r3, 44(r1) +; SPE-NEXT: lwz r3, 24(r1) +; SPE-NEXT: stw r3, 40(r1) +; SPE-NEXT: lwz r3, 20(r1) +; SPE-NEXT: stw r3, 36(r1) +; SPE-NEXT: lwz r3, 16(r1) +; SPE-NEXT: stw r3, 32(r1) +; SPE-NEXT: evldd r4, 40(r1) +; SPE-NEXT: evldd r6, 32(r1) +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evmergehi r5, r6, r6 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6 +; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 +; SPE-NEXT: bl __fixunstfsi +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + define signext i32 @q_to_i32(fp128 %m) #0 { ; P8-LABEL: q_to_i32: ; P8: # %bb.0: # %entry @@ -76,7 +414,7 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 ret i32 %conv } @@ -136,7 +474,7 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 ret i64 %conv } @@ -196,7 +534,7 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 ret i64 %conv } @@ -256,8 +594,248 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0 + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 ret i32 %conv } +define fp128 @i32_to_q(i32 signext %m) #0 { +; P8-LABEL: i32_to_q: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __floatsikf +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: i32_to_q: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __floatsikf +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: i32_to_q: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __floatsikf +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: i32_to_q: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatsikf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %conv +} + +define fp128 @i64_to_q(i64 %m) #0 { +; P8-LABEL: i64_to_q: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __floatdikf +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: i64_to_q: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __floatdikf +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: i64_to_q: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __floatdikf +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: i64_to_q: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatdikf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %conv +} + +define fp128 @u32_to_q(i32 zeroext %m) #0 { +; P8-LABEL: u32_to_q: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __floatunsikf +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: u32_to_q: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __floatunsikf +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: u32_to_q: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __floatunsikf +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: u32_to_q: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatunsikf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %conv +} + +define fp128 @u64_to_q(i64 %m) #0 { +; P8-LABEL: u64_to_q: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __floatundikf +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: u64_to_q: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __floatundikf +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: u64_to_q: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __floatundikf +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: u64_to_q: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatundikf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %conv +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll @@ -25,6 +25,16 @@ declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) + +declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) + define i32 @d_to_i32(double %m) #0 { ; CHECK-LABEL: d_to_i32: ; CHECK: # %bb.0: # %entry @@ -46,7 +56,7 @@ ; SPE-NEXT: efdctsiz r3, r3 ; SPE-NEXT: blr entry: - %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.ignore") #0 + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.strict") #0 ret i32 %conv } @@ -81,7 +91,7 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.ignore") #0 + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.strict") #0 ret i64 %conv } @@ -116,7 +126,7 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.ignore") #0 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.strict") #0 ret i64 %conv } @@ -142,7 +152,7 @@ ; SPE-NEXT: efdctuiz r3, r3 ; SPE-NEXT: blr entry: - %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.ignore") #0 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.strict") #0 ret i32 %conv } @@ -167,7 +177,7 @@ ; SPE-NEXT: efsctsiz r3, r3 ; SPE-NEXT: blr entry: - %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.ignore") #0 + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.strict") #0 ret i32 %conv } @@ -198,7 +208,7 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.ignore") #0 + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.strict") #0 ret i64 %conv } @@ -229,7 +239,7 @@ ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr entry: - %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.ignore") #0 + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.strict") #0 ret i64 %conv } @@ -254,8 +264,242 @@ ; SPE-NEXT: efsctuiz r3, r3 ; SPE-NEXT: blr entry: - %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.ignore") #0 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.strict") #0 ret i32 %conv } +define double @i32_to_d(i32 signext %m) #0 { +; CHECK-LABEL: i32_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvsxddp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: i32_to_d: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: addi r4, r1, -4 +; NOVSX-NEXT: stw r3, -4(r1) +; NOVSX-NEXT: lfiwax f0, 0, r4 +; NOVSX-NEXT: fcfid f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: i32_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfsi r4, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define double @i64_to_d(i64 %m) #0 { +; CHECK-LABEL: i64_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvsxddp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: i64_to_d: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: std r3, -8(r1) +; NOVSX-NEXT: lfd f0, -8(r1) +; NOVSX-NEXT: fcfid f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: i64_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatdidf +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define double @u32_to_d(i32 zeroext %m) #0 { +; CHECK-LABEL: u32_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvuxddp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: u32_to_d: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: addi r4, r1, -4 +; NOVSX-NEXT: stw r3, -4(r1) +; NOVSX-NEXT: lfiwax f0, 0, r4 +; NOVSX-NEXT: fcfidu f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: u32_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfui r4, r3 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define double @u64_to_d(i64 %m) #0 { +; CHECK-LABEL: u64_to_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvuxddp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: u64_to_d: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: std r3, -8(r1) +; NOVSX-NEXT: lfd f0, -8(r1) +; NOVSX-NEXT: fcfidu f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: u64_to_d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatundidf +; SPE-NEXT: evmergelo r4, r3, r4 +; SPE-NEXT: evmergehi r3, r4, r4 +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %conv +} + +define float @i32_to_f(i32 signext %m) #0 { +; CHECK-LABEL: i32_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvsxdsp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: i32_to_f: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: addi r4, r1, -4 +; NOVSX-NEXT: stw r3, -4(r1) +; NOVSX-NEXT: lfiwax f0, 0, r4 +; NOVSX-NEXT: fcfids f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: i32_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efscfsi r3, r3 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + +define float @i64_to_f(i64 %m) #0 { +; CHECK-LABEL: i64_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvsxdsp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: i64_to_f: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: std r3, -8(r1) +; NOVSX-NEXT: lfd f0, -8(r1) +; NOVSX-NEXT: fcfids f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: i64_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatdisf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + +define float @u32_to_f(i32 zeroext %m) #0 { +; CHECK-LABEL: u32_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvuxdsp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: u32_to_f: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: addi r4, r1, -4 +; NOVSX-NEXT: stw r3, -4(r1) +; NOVSX-NEXT: lfiwax f0, 0, r4 +; NOVSX-NEXT: fcfidus f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: u32_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efscfui r3, r3 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + +define float @u64_to_f(i64 %m) #0 { +; CHECK-LABEL: u64_to_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvuxdsp f1, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: u64_to_f: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: std r3, -8(r1) +; NOVSX-NEXT: lfd f0, -8(r1) +; NOVSX-NEXT: fcfidus f1, f0 +; NOVSX-NEXT: blr +; +; SPE-LABEL: u64_to_f: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr r0 +; SPE-NEXT: stw r0, 4(r1) +; SPE-NEXT: stwu r1, -16(r1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: bl __floatundisf +; SPE-NEXT: lwz r0, 20(r1) +; SPE-NEXT: addi r1, r1, 16 +; SPE-NEXT: mtlr r0 +; SPE-NEXT: blr +entry: + %conv = tail call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %conv +} + attributes #0 = { strictfp }