Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -14823,6 +14823,77 @@ operand computed with infinite precision, and then rounded to the target precision. +'``llvm.experimental.constrained.fptrunc``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptrunc( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptrunc``' intrinsic truncates ``value`` +to type ``ty2``. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptrunc``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. This argument must be larger in size +than the result. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +The result produced is a floating point value truncated to be smaller in size +than the operand. + +'``llvm.experimental.constrained.fpext``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fpext( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fpext``' intrinsic extends a +floating-point ``value`` to a larger floating-point value. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fpext``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. This argument must be smaller in size +than the result. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a floating point value extended to be larger in size +than the operand. All restrictions that apply to the fpext instruction also +apply to this intrinsic. + Constrained libm-equivalent Intrinsics -------------------------------------- Index: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h @@ -297,6 +297,26 @@ STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, + /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating + /// point type down to the precision of the destination VT. TRUNC is a + /// flag, which is always an integer that is zero or one. If TRUNC is 0, + /// this is a normal rounding, if it is 1, this FP_ROUND is known to not + /// change the value of Y. + /// + /// The TRUNC = 1 case is used in cases where we know that the value will + /// not be modified by the node, because Y is not using any of the extra + /// precision of source type. This allows certain transformations like + /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,1)) -> X which are not safe for + /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,0)) because the extra bits aren't + /// removed. + /// It is used to limit optimizations while the DAG is being optimized. + STRICT_FP_ROUND, + + /// X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP + /// type. + /// It is used to limit optimizations while the DAG is being optimized. + STRICT_FP_EXTEND, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, Index: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h @@ -691,6 +691,8 @@ case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: return true; } } Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -891,6 +891,8 @@ case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; + case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; + case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } auto Action = getOperationAction(EqOpc, VT); Index: llvm/trunk/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicInst.h +++ llvm/trunk/include/llvm/IR/IntrinsicInst.h @@ -238,6 +238,8 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -607,6 +607,15 @@ llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + // These intrinsics are sensitive to the rounding mode so we need constrained // versions of each of them. When strict rounding and exception control are // not required the non-constrained versions of these intrinsics should be @@ -688,9 +697,7 @@ llvm_metadata_ty, llvm_metadata_ty ]>; } -// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi. -// FIXME: Add intrinsics for fabs and copysign? - +// FIXME: Add intrinsics for fcmp, fptoui and fptosi. //===------------------------- Expect Intrinsics --------------------------===// // Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -154,6 +154,8 @@ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, + const SDLoc &dl, SDValue ChainIn); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, @@ -1115,6 +1117,8 @@ case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -1741,6 +1745,12 @@ /// The resultant code need not be legal. SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl) { + return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode()); +} + +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, + EVT DestVT, const SDLoc &dl, + SDValue Chain) { // Create the stack frame object. unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); @@ -1761,19 +1771,19 @@ // later than DestVT. SDValue Store; - if (SrcSize > SlotSize) - Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = - DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign); + DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); - + assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); @@ -2777,12 +2787,27 @@ } break; } + case ISD::STRICT_FP_ROUND: + Tmp1 = EmitStackConvert(Node->getOperand(1), + Node->getValueType(0), + Node->getValueType(0), dl, Node->getOperand(0)); + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n"); + return true; case ISD::FP_ROUND: case ISD::BITCAST: - Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; + case ISD::STRICT_FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(1), + Node->getOperand(1).getValueType(), + Node->getValueType(0), dl, Node->getOperand(0)); + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n"); + return true; case ISD::FP_EXTEND: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getOperand(0).getValueType(), Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -687,6 +687,7 @@ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); + SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); @@ -710,6 +711,7 @@ SDValue ScalarizeVecOp_VSETCC(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_VECREDUCE(SDNode *N); //===--------------------------------------------------------------------===// @@ -820,6 +822,7 @@ SDValue WidenVecRes_StrictFP(SDNode *N); SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_Convert_StrictFP(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -331,6 +331,8 @@ case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -1301,7 +1303,7 @@ if (OperVT.isVector()) Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - EltVT, Oper, Idx); + OperVT.getVectorElementType(), Oper, Idx); Opers.push_back(Oper); } Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -50,6 +50,7 @@ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; @@ -170,6 +171,7 @@ case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; case ISD::UADDO: @@ -321,6 +323,18 @@ NewVT, Op, N->getOperand(1)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + { NewVT, MVT::Other }, + { N->getOperand(0), Op, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, SDLoc(N), @@ -604,6 +618,9 @@ case ISD::STORE: Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; + case ISD::STRICT_FP_ROUND: + Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo); + break; case ISD::FP_ROUND: Res = ScalarizeVecOp_FP_ROUND(N, OpNo); break; @@ -752,6 +769,20 @@ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); } +SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Wrong operand for scalarization!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), + { N->getValueType(0).getVectorElementType(), + MVT::Other }, + { N->getOperand(0), Elt, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { SDValue Res = GetScalarizedVector(N->getOperand(0)); // Result type may be wider than element type. @@ -844,7 +875,9 @@ case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: @@ -1615,15 +1648,34 @@ // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. - EVT InVT = N->getOperand(0).getValueType(); + unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; + EVT InVT = N->getOperand(OpNo).getValueType(); if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) - GetSplitVector(N->getOperand(0), Lo, Hi); + GetSplitVector(N->getOperand(OpNo), Lo, Hi); else - std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, + { N->getOperand(0), Lo, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, + { N->getOperand(0), Hi, N->getOperand(2) }); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, + { N->getOperand(0), Lo }); + Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, + { N->getOperand(0), Hi }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); } else { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); @@ -1824,6 +1876,7 @@ case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: @@ -1853,6 +1906,7 @@ case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -1894,7 +1948,11 @@ if (Res.getNode() == N) return true; - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + if (N->isStrictFPOpcode()) + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && + "Invalid operand expansion"); + else + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); @@ -1982,14 +2040,30 @@ EVT ResVT = N->getValueType(0); SDValue Lo, Hi; SDLoc dl(N); - GetSplitVector(N->getOperand(0), Lo, Hi); + GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); - Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, + { N->getOperand(0), Lo }); + Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, + { N->getOperand(0), Hi }); + + // Build a factor node to remember that this operation is independent + // of the other one. + SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + } return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } @@ -2461,14 +2535,26 @@ EVT ResVT = N->getValueType(0); SDValue Lo, Hi; SDLoc DL(N); - GetSplitVector(N->getOperand(0), Lo, Hi); + GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + if (N->isStrictFPOpcode()) { + Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, + { N->getOperand(0), Lo, N->getOperand(2) }); + Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, + { N->getOperand(0), Hi, N->getOperand(2) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else { + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + } return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); } @@ -2632,6 +2718,11 @@ Res = WidenVecRes_Convert(N); break; + case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FP_ROUND: + Res = WidenVecRes_Convert_StrictFP(N); + break; + case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -3109,6 +3200,43 @@ return DAG.getBuildVector(WidenVT, DL, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { + SDValue InOp = N->getOperand(1); + SDLoc DL(N); + SmallVector NewOps(N->op_begin(), N->op_end()); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector WidenVTs = { WidenVT, MVT::Other }; + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + + // FIXME: Optimizations need to be implemented here. + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector EltVTs = { EltVT, MVT::Other }; + SmallVector Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector OpChains; + // Use the original element count so we don't do more scalar opts than + // necessary. + unsigned MinElts = N->getValueType(0).getVectorNumElements(); + for (unsigned i=0; i < MinElts; ++i) { + NewOps[1] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps); + OpChains.push_back(Ops[i].getValue(1)); + } + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains); + ReplaceValueWith(SDValue(N, 1), NewChain); + + return DAG.getBuildVector(WidenVT, DL, Ops); +} + SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue InOp = N->getOperand(0); @@ -3895,6 +4023,7 @@ break; case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: @@ -3929,8 +4058,12 @@ return true; - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); + if (N->isStrictFPOpcode()) + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && + "Invalid operand expansion"); + else + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); ReplaceValueWith(SDValue(N, 0), Res); return false; @@ -4010,7 +4143,7 @@ EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); - SDValue InOp = N->getOperand(0); + SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); assert(getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector && "Unexpected type action"); @@ -4019,10 +4152,19 @@ unsigned Opcode = N->getOpcode(); // See if a widened result type would be legal, if so widen the node. + // FIXME: This isn't safe for StrictFP. Other optimization here is needed. EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, InVT.getVectorNumElements()); - if (TLI.isTypeLegal(WideVT)) { - SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp); + if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, + { N->getOperand(0), InOp }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(Opcode, dl, WideVT, InOp); return DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); @@ -4032,12 +4174,26 @@ // Unroll the convert into some scalar code and create a nasty build vector. SmallVector Ops(NumElts); - for (unsigned i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode( - Opcode, dl, EltVT, - DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + if (N->isStrictFPOpcode()) { + SmallVector NewOps(N->op_begin(), N->op_end()); + SmallVector OpChains; + for (unsigned i=0; i < NumElts; ++i) { + NewOps[1] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps); + OpChains.push_back(Ops[i].getValue(1)); + } + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + ReplaceValueWith(SDValue(N, 1), NewChain); + } else { + for (unsigned i = 0; i < NumElts; ++i) + Ops[i] = DAG.getNode( + Opcode, dl, EltVT, + DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + } return DAG.getBuildVector(VT, dl, Ops); } Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7611,6 +7611,10 @@ case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break; case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break; case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break; + // STRICT_FP_ROUND takes an extra argument describing whether or not + // the value will be changed by this node. See ISDOpcodes.h for details. + case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; + case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; IsUnary = true; break; } // We're taking this node out of the chain, so we need to re-link things. @@ -7618,8 +7622,19 @@ SDValue OutputChain = SDValue(Node, 1); ReplaceAllUsesOfValueWith(OutputChain, InputChain); - SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); + SDVTList VTs; SDNode *Res = nullptr; + + switch (OrigOpc) { + default: + VTs = getVTList(Node->getOperand(1).getValueType()); + break; + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: + VTs = getVTList(Node->getValueType(0)); + break; + } + if (IsUnary) Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); else if (IsTernary) Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6078,6 +6078,8 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -6834,6 +6836,12 @@ case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptrunc: + Opcode = ISD::STRICT_FP_ROUND; + break; + case Intrinsic::experimental_constrained_fpext: + Opcode = ISD::STRICT_FP_EXTEND; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6897,7 +6905,12 @@ SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; - if (FPI.isUnaryOp()) + if (Opcode == ISD::STRICT_FP_ROUND) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + DAG.getTargetConstant(0, sdl, + TLI.getPointerTy(DAG.getDataLayout())) }); + else if (FPI.isUnaryOp()) Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); else if (FPI.isTernaryOp()) Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -313,9 +313,11 @@ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; + case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; + case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; Index: llvm/trunk/lib/IR/IntrinsicInst.cpp =================================================================== --- llvm/trunk/lib/IR/IntrinsicInst.cpp +++ llvm/trunk/lib/IR/IntrinsicInst.cpp @@ -142,6 +142,8 @@ switch (getIntrinsicID()) { default: return false; + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: Index: llvm/trunk/lib/IR/Verifier.cpp =================================================================== --- llvm/trunk/lib/IR/Verifier.cpp +++ llvm/trunk/lib/IR/Verifier.cpp @@ -4209,6 +4209,8 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -4687,6 +4689,47 @@ HasRoundingMD = true; break; + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: { + if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) { + Assert((NumOperands == 3), + "invalid arguments for constrained FP intrinsic", &FPI); + HasRoundingMD = true; + } else { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + } + HasExceptionMD = true; + + Value *Operand = FPI.getArgOperand(0); + Type *OperandTy = Operand->getType(); + Value *Result = &FPI; + Type *ResultTy = Result->getType(); + Assert(OperandTy->isFPOrFPVectorTy(), + "Intrinsic first argument must be FP or FP vector", &FPI); + Assert(ResultTy->isFPOrFPVectorTy(), + "Intrinsic result must be FP or FP vector", &FPI); + Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", &FPI); + if (OperandTy->isVectorTy()) { + auto *OperandVecTy = cast(OperandTy); + auto *ResultVecTy = cast(ResultTy); + Assert(OperandVecTy->getNumElements() == ResultVecTy->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); + } + if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) { + Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(), + "Intrinsic first argument's type must be larger than result type", + &FPI); + } else { + Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(), + "Intrinsic first argument's type must be smaller than result type", + &FPI); + } + } + break; + default: llvm_unreachable("Invalid constrained FP intrinsic!"); } Index: llvm/trunk/test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fp-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/fp-intrinsics.ll @@ -286,6 +286,29 @@ ret double %rem } +; Verify that round(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f21 +; COMMON: cvtsd2ss +define float @f21() { +entry: + %result = call float @llvm.experimental.constrained.fptrunc.f32.f64( + double 42.1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %result +} + +; CHECK-LABEL: @f22 +; COMMON: cvtss2sd +define double @f22(float %x) { +entry: + %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, + metadata !"fpexcept.strict") + ret double %result +} + @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) @@ -306,3 +329,6 @@ declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) + Index: llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -3831,6 +3831,217 @@ ret <4 x double> %min } +define <1 x float> @constrained_vector_fptrunc_v1f64() { +; CHECK-LABEL: constrained_vector_fptrunc_v1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptrunc_v1f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64( + <1 x double>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <1 x float> %result +} + +define <2 x float> @constrained_vector_fptrunc_v2f64() { +; CHECK-LABEL: constrained_vector_fptrunc_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptrunc_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: retq +entry: + %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( + <2 x double>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x float> %result +} + +define <3 x float> @constrained_vector_fptrunc_v3f64() { +; CHECK-LABEL: constrained_vector_fptrunc_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1 +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptrunc_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX-NEXT: retq +entry: + %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( + <3 x double>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x float> %result +} + +define <4 x float> @constrained_vector_fptrunc_v4f64() { +; CHECK-LABEL: constrained_vector_fptrunc_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1 +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fptrunc_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0 +; AVX-NEXT: retq +entry: + %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( + <4 x double>, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x float> %result +} + +define <1 x double> @constrained_vector_fpext_v1f32() { +; CHECK-LABEL: constrained_vector_fpext_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fpext_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32( + <1 x float>, + metadata !"fpexcept.strict") + ret <1 x double> %result +} + +define <2 x double> @constrained_vector_fpext_v2f32() { +; CHECK-LABEL: constrained_vector_fpext_v2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fpext_v2f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq +entry: + %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( + <2 x float>, + metadata !"fpexcept.strict") + ret <2 x double> %result +} + +define <3 x double> @constrained_vector_fpext_v3f32() { +; CHECK-LABEL: constrained_vector_fpext_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm1, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm2, %xmm2 +; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fpext_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32( + <3 x float>, + metadata !"fpexcept.strict") + ret <3 x double> %result +} + +define <4 x double> @constrained_vector_fpext_v4f32() { +; CHECK-LABEL: constrained_vector_fpext_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm1, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: cvtss2sd %xmm1, %xmm1 +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_fpext_v4f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtps2pd {{.*}}(%rip), %ymm0 +; AVX-NEXT: retq +entry: + %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32( + <4 x float>, + metadata !"fpexcept.strict") + ret <4 x double> %result +} + define <1 x float> @constrained_vector_ceil_v1f32() { ; CHECK-LABEL: constrained_vector_ceil_v1f32: ; CHECK: # %bb.0: # %entry @@ -4413,6 +4624,8 @@ declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata) @@ -4438,6 +4651,8 @@ declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata) declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata) @@ -4482,6 +4697,8 @@ declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata) @@ -4511,6 +4728,8 @@ declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata) Index: llvm/trunk/test/Feature/fp-intrinsics.ll =================================================================== --- llvm/trunk/test/Feature/fp-intrinsics.ll +++ llvm/trunk/test/Feature/fp-intrinsics.ll @@ -242,6 +242,30 @@ ret double %result } +; Verify that fptrunc(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f20 +; CHECK: call float @llvm.experimental.constrained.fptrunc +define float @f20() { +entry: + %result = call float @llvm.experimental.constrained.fptrunc.f32.f64( + double 42.1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %result +} + +; Verify that fpext(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f21 +; CHECK: call double @llvm.experimental.constrained.fpext +define double @f21() { +entry: + %result = call double @llvm.experimental.constrained.fpext.f64.f32(float 42.0, + metadata !"fpexcept.strict") + ret double %result +} + @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) @@ -260,3 +284,5 @@ declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)