Index: docs/AddingConstrainedIntrinsics.rst =================================================================== --- docs/AddingConstrainedIntrinsics.rst +++ docs/AddingConstrainedIntrinsics.rst @@ -0,0 +1,90 @@ +================================================== +How To Add A Constrained Floating-Point Intrinsic +================================================== + +.. contents:: + :local: + +.. warning:: + This is a work in progress. + +Add the intrinsic +================= + +Multiple files need to be updated when adding a new constrained intrinsic. + +Add the new intrinsic to the table of intrinsics.:: + + include/llvm/IR/Intrinsics.td + +Update class ConstrainedFPIntrinsic to know about the intrinsics.:: + + include/llvm/IR/IntrinsicInst.h + +Functions like ConstrainedFPIntrinsic::isUnaryOp() or +ConstrainedFPIntrinsic::isTernaryOp() may need to know about the new +intrinsic.:: + + lib/IR/IntrinsicInst.cpp + +Update the IR verifier:: + + lib/IR/Verifier.cpp + +Add SelectionDAG node types +=========================== + +Add the new STRICT version of the node type to the ISD::NodeType enum.:: + + include/llvm/CodeGen/ISDOpcodes.h + +In class SDNode update isStrictFPOpcode():: + + include/llvm/CodeGen/SelectionDAGNodes.h + +A mapping from the STRICT SDnode type to the non-STRICT is done in +TargetLoweringBase::getStrictFPOperationAction(). This allows STRICT +nodes to be legalized similarly to the non-STRICT node type.:: + + include/llvm/CodeGen/TargetLowering.h + +Building the SelectionDAG +------------------------- + +The switch statement in SelectionDAGBuilder::visitIntrinsicCall() needs +to be updated to call SelectionDAGBuilder::visitConstrainedFPIntrinsic(). +That function, in turn, needs to be updated to know how to create the +SDNode for the intrinsic. The new STRICT node will eventually be converted +to the matching non-STRICT node. For this reason it _must_ have the same +operands and values as the non-STRICT version in case the non-STRICT +version's default lowering is used. This means that if the non-STRICT +version of the node does not use the chain then the STRICT node cannot +either.:: + + lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp + +Most of the STRICT nodes get legalized the same as their matching non-STRICT +counterparts. A new STRICT node with this property must get added to the +switch in SelectionDAGLegalize::LegalizeOp().:: + + lib/CodeGen/SelectionDAG/LegalizeDAG.cpp + +The code to do the conversion or mutation of the STRICT node to a non-STRICT +version of the node happens in SelectionDAG::mutateStrictFPToFP(). Be +careful updating this function since some nodes are always chained and +some are not. Some nodes have the same return type as their input operand, +but some are different. Both of these points must be properly handled.:: + + lib/CodeGen/SelectionDAG/SelectionDAG.cpp + +To make debug logs readable it is helpful to update the SelectionDAG's +debug logger::: + + lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp + +Add documentation and tests +=========================== + +:: + + docs/LangRef.rst Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -13950,6 +13950,141 @@ operand computed with infinite precision, and then rounded to the target precision. +'``llvm.experimental.constrained.fptoui``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptoui( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptoui``' intrinsic returns the result of a +conversion of a floating point operand to an unsigned integer. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptoui``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is an unsigned integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero. + +'``llvm.experimental.constrained.fptosi``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptosi( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptosi``' intrinsic returns the result of a +conversion of a floating point operand to a signed integer. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptoui``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a signed integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero. + +'``llvm.experimental.constrained.fptrunc``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fptrunc( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fptrunc``' intrinsic returns the result of +a truncating of a floating point operand into a smaller floating point result. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fptrunc``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. This argument must be larger in size +than the result. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a floating point value truncated to be smaller in size +than the operand. + +'``llvm.experimental.constrained.fpext``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fpext( , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.fpext``' intrinsic returns the result of +an enlarging of a floating point operand. + +Arguments: +"""""""""" + +The first argument to the '``llvm.experimental.constrained.fpext``' +intrinsic must be :ref:`floating point ` or :ref:`vector +` of floating point values. This argument must be smaller in size +than the result. + +The second argument specifies the exception behavior as described above. + +Semantics: +"""""""""" + +The result produced is a floating point value extended to be larger in size +than the operand. All restrictions that apply to the fpext instruction also +apply to this intrinsic. + Constrained libm-equivalent Intrinsics -------------------------------------- Index: docs/index.rst =================================================================== --- docs/index.rst +++ docs/index.rst @@ -187,6 +187,7 @@ CommandLine CompilerWriterInfo ExtendingLLVM + AddingConstrainedIntrinsics HowToSetUpLLVMStyleRTTI ProgrammersManual Extensions @@ -223,6 +224,10 @@ :doc:`ExtendingLLVM` Look here to see how to add instructions and intrinsics to LLVM. +:doc:`AddingConstrainedIntrinsics` + Gives the steps necessary when adding a new constrained math intrinsic + to LLVM. + `Doxygen generated documentation `_ (`classes `_) Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -525,6 +525,11 @@ /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. FP_EXTEND, + STRICT_FP_TO_SINT, + STRICT_FP_TO_UINT, + STRICT_FP_ROUND, + STRICT_FP_EXTEND, + /// BITCAST - This operator converts between integer, vector and FP /// values, as if the value was stored to memory with one type and loaded /// from the same address with the other type (or equivalently for vector Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -672,6 +672,10 @@ case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: return true; } } Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -818,6 +818,10 @@ case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; + case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; + case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } auto Action = getOperationAction(EqOpc, VT); Index: include/llvm/IR/IntrinsicInst.h =================================================================== --- include/llvm/IR/IntrinsicInst.h +++ include/llvm/IR/IntrinsicInst.h @@ -239,6 +239,10 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -503,6 +503,22 @@ llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyfloat_ty, + llvm_metadata_ty ]>; + // These intrinsics are sensitive to the rounding mode so we need constrained // versions of each of them. When strict rounding and exception control are // not required the non-constrained versions of these intrinsics should be @@ -558,7 +574,7 @@ llvm_metadata_ty, llvm_metadata_ty ]>; } -// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi. +// FIXME: Add intrinsic for fcmp // FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round? Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1108,6 +1108,10 @@ case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -2978,12 +2982,14 @@ break; } case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getOperand(0).getValueType(), Node->getValueType(0), dl); @@ -3041,9 +3047,11 @@ Results.push_back(Tmp1); break; case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -113,6 +113,8 @@ case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; @@ -417,6 +419,11 @@ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; + if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && + !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't @@ -426,7 +433,8 @@ // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe - return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -312,6 +312,10 @@ case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -51,6 +51,7 @@ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; @@ -88,6 +89,7 @@ case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: @@ -534,6 +536,7 @@ case ISD::STORE: Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = ScalarizeVecOp_FP_ROUND(N, OpNo); break; @@ -1654,6 +1657,7 @@ case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: @@ -1688,6 +1692,7 @@ case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -2414,9 +2419,13 @@ case ISD::ANY_EXTEND: case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: @@ -3644,6 +3653,7 @@ Res = WidenVecOp_EXTEND(N); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7311,16 +7311,49 @@ NewOpc = ISD::FNEARBYINT; IsUnary = true; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; + case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; IsUnary = true; break; + case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; IsUnary = true; break; } + bool IsChained = true; + switch (OrigOpc) { + default: + break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: + IsChained = false; + break; + } + // We're taking this node out of the chain, so we need to re-link things. - SDValue InputChain = Node->getOperand(0); - SDValue OutputChain = SDValue(Node, 1); - ReplaceAllUsesOfValueWith(OutputChain, InputChain); + if (IsChained) { + SDValue InputChain = Node->getOperand(0); + SDValue OutputChain = SDValue(Node, 1); + ReplaceAllUsesOfValueWith(OutputChain, InputChain); + } - SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); + SDVTList VTs; SDNode *Res = nullptr; - if (IsUnary) + + switch (OrigOpc) { + default: + VTs = getVTList(Node->getOperand(1).getValueType()); + break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: + VTs = getVTList(Node->ValueList[0]); + break; + } + + if (!IsChained) + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(0) }); + else if (IsUnary) Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); else if (IsTernary) Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5607,6 +5607,10 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -6286,6 +6290,7 @@ const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); unsigned Opcode; + bool IsChained = true; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::experimental_constrained_fadd: @@ -6306,6 +6311,22 @@ case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + IsChained = false; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + IsChained = false; + break; + case Intrinsic::experimental_constrained_fptrunc: + Opcode = ISD::STRICT_FP_ROUND; + IsChained = false; + break; + case Intrinsic::experimental_constrained_fpext: + Opcode = ISD::STRICT_FP_EXTEND; + IsChained = false; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6347,12 +6368,21 @@ SDValue Chain = getRoot(); SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); - ValueVTs.push_back(MVT::Other); // Out chain + if (IsChained) + ValueVTs.push_back(MVT::Other); // Out chain SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; - if (FPI.isUnaryOp()) + if (Opcode == ISD::STRICT_FP_ROUND || Opcode == ISD::STRICT_FP_EXTEND) + Result = DAG.getNode(Opcode, sdl, VTs, + { getValue(FPI.getArgOperand(0)), + DAG.getTargetConstant(0, sdl, + TLI.getPointerTy(DAG.getDataLayout())) }); + else if (Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) Result = DAG.getNode(Opcode, sdl, VTs, + { getValue(FPI.getArgOperand(0)) }); + else if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); else if (FPI.isTernaryOp()) Result = DAG.getNode(Opcode, sdl, VTs, @@ -6364,9 +6394,11 @@ { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); - assert(Result.getNode()->getNumValues() == 2); - SDValue OutChain = Result.getValue(1); - DAG.setRoot(OutChain); + if (IsChained) { + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + } SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); } Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -292,14 +292,18 @@ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; + case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; + case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; Index: lib/IR/IntrinsicInst.cpp =================================================================== --- lib/IR/IntrinsicInst.cpp +++ lib/IR/IntrinsicInst.cpp @@ -142,6 +142,10 @@ switch (getIntrinsicID()) { default: return false; + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -4079,6 +4079,10 @@ case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -4485,17 +4489,143 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { unsigned NumOperands = FPI.getNumArgOperands(); - Assert(((NumOperands == 5 && FPI.isTernaryOp()) || - (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)), + bool HasExceptionMD = false; + bool HasRoundingMD = false; + switch (FPI.getIntrinsicID()) + { + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic", &FPI); - Assert(isa(FPI.getArgOperand(NumOperands-1)), - "invalid exception behavior argument", &FPI); - Assert(isa(FPI.getArgOperand(NumOperands-2)), - "invalid rounding mode argument", &FPI); - Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid, - "invalid rounding mode argument", &FPI); - Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid, - "invalid exception behavior argument", &FPI); + HasExceptionMD = true; + HasRoundingMD = true; + break; + + case Intrinsic::experimental_constrained_fma: + Assert((NumOperands == 5), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + HasRoundingMD = true; + break; + + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + Assert((NumOperands == 4), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + HasRoundingMD = true; + break; + + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + + Value *Operand = FPI.getArgOperand(0); + uint64_t NumSrcElem = 0; + if (Operand->getType()->isVectorTy()) { + auto *OperandT = cast(Operand->getType()); + NumSrcElem = OperandT->getNumElements(); + Assert(OperandT->getVectorElementType()->isFloatingPointTy(), + "Intrinsic first argument vector must be floating point", + &FPI); + } + else + Assert(Operand->getType()->isFloatingPointTy(), + "Intrinsic first argument must be floating point", + &FPI); + + Operand = &FPI; + Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", + &FPI); + if (Operand->getType()->isVectorTy()) { + auto *OperandT = cast(Operand->getType()); + Assert(NumSrcElem == OperandT->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); + Assert(OperandT->getVectorElementType()->isIntegerTy(), + "Intrinsic result vector must be integer", + &FPI); + } + else + Assert(Operand->getType()->isIntegerTy(), + "Intrinsic result must be an integer", + &FPI); + } + break; + + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: { + Assert((NumOperands == 2), + "invalid arguments for constrained FP intrinsic", &FPI); + HasExceptionMD = true; + + Value *Operand = FPI.getArgOperand(0); + uint64_t NumSrcElem = 0; + if (Operand->getType()->isVectorTy()) { + auto *OperandT = cast(Operand->getType()); + NumSrcElem = OperandT->getNumElements(); + Assert(OperandT->getVectorElementType()->isFloatingPointTy(), + "Intrinsic first argument vector must be floating point", + &FPI); + } + else + Assert(Operand->getType()->isFloatingPointTy(), + "Intrinsic first argument must be floating point", + &FPI); + + Operand = &FPI; + Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(), + "Intrinsic first argument and result disagree on vector use", + &FPI); + if (Operand->getType()->isVectorTy()) { + auto *OperandT = cast(Operand->getType()); + Assert(NumSrcElem == OperandT->getNumElements(), + "Intrinsic first argument and result vector lengths must be equal", + &FPI); + Assert(OperandT->getVectorElementType()->isFloatingPointTy(), + "Intrinsic result vector must be floating point", + &FPI); + } + else + Assert(Operand->getType()->isFloatingPointTy(), + "Intrinsic result must be an floating point", + &FPI); + } + break; + + default: + llvm_unreachable("Invalid constrained FP intrinsic!"); + } + + if (HasExceptionMD) { + Assert(isa(FPI.getArgOperand(NumOperands-1)), + "invalid exception behavior argument", &FPI); + Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid, + "invalid exception behavior argument", &FPI); + } + if (HasRoundingMD) { + int RoundingIdx = (HasExceptionMD ? NumOperands - 2 : NumOperands - 1); + Assert(isa(FPI.getArgOperand(RoundingIdx)), + "invalid rounding mode argument", &FPI); + Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid, + "invalid rounding mode argument", &FPI); + } } void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { Index: test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/fp-intrinsics.ll +++ test/CodeGen/X86/fp-intrinsics.ll @@ -286,6 +286,43 @@ ret double %rem } +; Verify that fptosi(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f20 +; COMMON: cvttsd2si +define i32 @f20() { +entry: + %result = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that round(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f21 +; COMMON: cvtsd2ss +define float @f21() { +entry: + %result = call float @llvm.experimental.constrained.fptrunc.f32.f64( + double 42.1, + metadata !"fpexcept.strict") + ret float %result +} + +; Verify that fpext(42.1) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +; CHECK-LABEL: @f22 +; COMMON: cvtss2sd +define double @f22(float %x) { +entry: + %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, + metadata !"fpexcept.strict") + ret double %result +} + @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) @@ -306,3 +343,7 @@ declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) + Index: test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -3668,6 +3668,75 @@ ret <4 x double> %nearby } +define <2 x i32> @constrained_vector_fptosi() { +; NO-FMA-LABEL: constrained_vector_fptosi: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: cvttsd2si {{.*}}(%rip), %rax +; NO-FMA-NEXT: movq %rax, %xmm1 +; NO-FMA-NEXT: cvttsd2si {{.*}}(%rip), %rax +; NO-FMA-NEXT: movq %rax, %xmm0 +; NO-FMA-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fptosi: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; HAS-FMA-NEXT: vmovq %rax, %xmm0 +; HAS-FMA-NEXT: vcvttsd2si {{.*}}(%rip), %rax +; HAS-FMA-NEXT: vmovq %rax, %xmm1 +; HAS-FMA-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; HAS-FMA-NEXT: retq +entry: + %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64( + <2 x double>, + metadata !"fpexcept.strict") + ret <2 x i32> %result +} + +define <4 x float> @constrained_vector_fptrunc(<4 x double> %D) { +; NO-FMA-LABEL: constrained_vector_fptrunc: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: cvtpd2ps %xmm1, %xmm1 +; NO-FMA-NEXT: cvtpd2ps %xmm0, %xmm0 +; NO-FMA-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fptrunc: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vcvtpd2ps %ymm0, %xmm0 +; HAS-FMA-NEXT: vzeroupper +; HAS-FMA-NEXT: retq +entry: + %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( + <4 x double> %D, + metadata !"fpexcept.strict") + ret <4 x float> %result +} + +define <2 x double> @constrained_vector_fpext(<2 x float> %D) { +; NO-FMA-LABEL: constrained_vector_fpext: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: cvtss2sd %xmm0, %xmm1 +; NO-FMA-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; NO-FMA-NEXT: cvtss2sd %xmm0, %xmm0 +; NO-FMA-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; NO-FMA-NEXT: movaps %xmm1, %xmm0 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_fpext: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: vcvtps2pd %xmm0, %ymm0 +; HAS-FMA-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; HAS-FMA-NEXT: vzeroupper +; HAS-FMA-NEXT: retq +entry: + %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( + <2 x float> %D, + metadata !"fpexcept.strict") + ret <2 x double> %result +} + + ; Single width declarations declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) @@ -3767,3 +3836,7 @@ declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) + +declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata) +declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) Index: test/Feature/fp-intrinsics.ll =================================================================== --- test/Feature/fp-intrinsics.ll +++ test/Feature/fp-intrinsics.ll @@ -242,6 +242,51 @@ ret double %result } +; Verify that fptoui(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f18 +; CHECK: call zeroext i32 @llvm.experimental.constrained.fptoui +define zeroext i32 @f18() { +entry: + %result = call zeroext i32 @llvm.experimental.constrained.fptoui.f64( + double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that fptosi(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f19 +; CHECK: call i32 @llvm.experimental.constrained.fptosi +define i32 @f19() { +entry: + %result = call i32 @llvm.experimental.constrained.fptosi.f64(double 42.1, + metadata !"fpexcept.strict") + ret i32 %result +} + +; Verify that fptrunc(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f20 +; CHECK: call float @llvm.experimental.constrained.fptrunc +define float @f20() { +entry: + %result = call float @llvm.experimental.constrained.fptrunc.f32(double 42.1, + metadata !"fpexcept.strict") + ret float %result +} + +; Verify that fpext(42.1) isn't simplified when the rounding mode is +; unknown. +; CHECK-LABEL: f21 +; CHECK: call double @llvm.experimental.constrained.fpext +define double @f21() { +entry: + %result = call double @llvm.experimental.constrained.fpext.f64(double 42.1, + metadata !"fpexcept.strict") + ret double %result +} + @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) @@ -260,3 +305,7 @@ declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare zeroext i32 @llvm.experimental.constrained.fptoui.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptosi.f64(double, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32(double, metadata) +declare double @llvm.experimental.constrained.fpext.f64(double, metadata)