Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -13323,6 +13323,119 @@ %a = load i16, i16* @x, align 2 %res = call float @llvm.convert.from.fp16(i16 %a) +Saturating floating-point to integer conversions +------------------------------------------------ + +The ``fptoui`` and ``fptosi`` instructions return a +:ref:`poison value ` if the rounded-towards-zero value is not +representable by the result type. These intrinsics provide an alternative +conversion, which will saturate towards the smallest and largest representable +integer values instead. + +'``llvm.fptoui.sat.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.fptoui.sat`` on any +floating-point argument type and any integer result type, or vectors thereof. +Not all targets may support all types, however. + +:: + + declare i32 @llvm.fptoui.sat.i32.f32(float %f) + declare i19 @llvm.fptoui.sat.i19.f64(double %f) + declare <4 x i100> @llvm.fptoui.sat.v4i100.v4f128(<4 x fp128> %f) + +Overview: +""""""""" + +This intrinsic converts the argument into an unsigned integer using saturating +semantics. + +Arguments: +"""""""""" + +The argument may be any floating-point or vector of floating-point type. The +return value may be any integer or vector of integer type. The number of vector +elements in argument and return must be same. + +Semantics: +"""""""""" + +The conversion to integer is performed subject to the following rules: + +- If the argument is any NaN, zero is returned. +- If the argument is smaller than zero, zero is returned. +- If the argument is larger than the largest representable integer of the + result type (this includes positive infinity), the largest representable + integer is returned. +- Otherwise, the result of rounding the argument towards zero is returned. + +Example: +"""""""" + +.. code-block:: text + + %a = call i8 @llvm.fptoui.sat.i8.f32(float 123.9) ; yields i8: 123 + %b = call i8 @llvm.fptoui.sat.i8.f32(float -5.7) ; yields i8: 0 + %c = call i8 @llvm.fptoui.sat.i8.f32(float 377.0) ; yields i8: 255 + %d = call i8 @llvm.fptoui.sat.i8.f32(float 0xFFF8000000000000) ; yields i8: 0 + +'``llvm.fptosi.sat.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.fptosi.sat`` on any +floating-point argument type and any integer result type, or vectors thereof. +Not all targets may support all types, however. + +:: + + declare i32 @llvm.fptosi.sat.i32.f32(float %f) + declare i19 @llvm.fptosi.sat.i19.f64(double %f) + declare <4 x i100> @llvm.fptosi.sat.v4i100.v4f128(<4 x fp128> %f) + +Overview: +""""""""" + +This intrinsic converts the argument into a signed integer using saturating +semantics. + +Arguments: +"""""""""" + +The argument may be any floating-point or vector of floating-point type. The +return value may be any integer or vector of integer type. The number of vector +elements in argument and return must be same. + +Semantics: +"""""""""" + +The conversion to integer is performed subject to the following rules: + +- If the argument is any NaN, zero is returned. +- If the argument is smaller than the smallest representable integer of the + result type (this includes negative infinity), the smallest representable + integer is returned. +- If the argument is larger than the largest representable integer of the + result type (this includes positive infinity), the largest representable + integer is returned. +- Otherwise, the result of rounding the argument towards zero is returned. + +Example: +"""""""" + +.. code-block:: text + + %a = call i8 @llvm.fptosi.sat.i8.f32(float 23.9) ; yields i8: 23 + %b = call i8 @llvm.fptosi.sat.i8.f32(float -130.8) ; yields i8: -128 + %c = call i8 @llvm.fptosi.sat.i8.f32(float 999.0) ; yields i8: 127 + %d = call i8 @llvm.fptosi.sat.i8.f32(float 0xFFF8000000000000) ; yields i8: 0 + .. _dbg_intrinsics: Debugger Intrinsics Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -513,6 +513,17 @@ FP_TO_SINT, FP_TO_UINT, + /// FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a + /// signed or unsigned integer type given in operand 1. If the FP value cannot + /// fit in the integer type, then if the FP value is NaN return 0, otherwise + /// return the largest/smallest integer value, if the FP value is + /// larger/smaller (or +INF/-INF) than the largest/smallest integer value. + /// + /// The type in operand 1 may be smaller than the result type as a result of + /// integer type legalization. + FP_TO_SINT_SAT, + FP_TO_UINT_SAT, + /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type /// down to the precision of the destination VT. TRUNC is a flag, which is /// always an integer that is zero or one. If TRUNC is 0, this is a Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -3694,6 +3694,11 @@ /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. + /// \param N Node to expand + /// \returns The expansion result + SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; + /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -899,6 +899,13 @@ def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>; } +// Saturating floating point to integer intrinsics +def int_fptoui_sat : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_fptosi_sat : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], + [IntrNoMem, IntrSpeculatable]>; + // Clear cache intrinsic, default to ignore (ie. emit nothing) // maps to void __clear_cache() on supporting platforms def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -159,6 +159,9 @@ def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1> ]>; +def SDTFPToIntSatOp : SDTypeProfile<1, 2, [ // fp_to_[su]int_sat + SDTCisInt<0>, SDTCisFP<1>, SDTCisVT<2, OtherVT>, SDTCisSameNumEltsAs<0, 1> +]>; def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>, SDTCisVTSmallerThanOp<2, 1> @@ -446,6 +449,8 @@ def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>; def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>; def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>; +def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>; +def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>; def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -173,6 +173,7 @@ const SDLoc &dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); + SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl); SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); @@ -1124,10 +1125,11 @@ case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: - case ISD::USUBSAT: { + case ISD::USUBSAT: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; - } case ISD::MSCATTER: Action = TLI.getOperationAction(Node->getOpcode(), cast(Node)->getValue().getValueType()); @@ -2509,6 +2511,30 @@ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } +/// Promote FP_TO_*INT_SAT operation to a larger result type. At this point +/// the result and operand types are legal and there must be a legal +/// FP_TO_*INT_SAT operation for a larger result type. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT_SAT(SDNode *Node, + const SDLoc &dl) { + unsigned Opcode = Node->getOpcode(); + + // Scan for the appropriate larger type to use. + EVT NewOutTy = Node->getValueType(0); + while (true) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + if (TLI.isOperationLegalOrCustom(Opcode, NewOutTy)) + break; + } + + // Saturation width is determined by second operand, so we don't have to + // perform any fixup and can directly truncate the result. + SDValue Result = DAG.getNode(Opcode, dl, NewOutTy, + Node->getOperand(0), Node->getOperand(1)); + return DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Result); +} + /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts. SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); @@ -2863,6 +2889,10 @@ if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Results.push_back(TLI.expandFP_TO_INT_SAT(Node, DAG)); + break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); @@ -4180,6 +4210,10 @@ Node->getOpcode() == ISD::FP_TO_SINT, dl); Results.push_back(Tmp1); break; + case ISD::FP_TO_UINT_SAT: + case ISD::FP_TO_SINT_SAT: + Results.push_back(PromoteLegalFP_TO_INT_SAT(Node, dl)); + break; case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -763,6 +763,8 @@ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: Res = SoftenFloatOp_FP_TO_XINT_SAT(N); break; case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; @@ -955,6 +957,11 @@ return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N) { + llvm_unreachable("fp_to_xint soften float op not implemented yet"); + return SDValue(); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); @@ -1768,6 +1775,9 @@ case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; @@ -1811,6 +1821,12 @@ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); } +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + return DAG.getNode( + N->getOpcode(), SDLoc(N), N->getValueType(0), Op, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { SDValue Op = GetPromotedFloat(N->getOperand(0)); EVT VT = N->getValueType(0); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -116,6 +116,10 @@ case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = PromoteIntRes_FP_TO_XINT_SAT(N); break; + case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; case ISD::AND: @@ -468,6 +472,14 @@ DAG.getValueType(N->getValueType(0).getScalarType())); } +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) { + // Promote the result type, while keeping the original type in Op1. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + return DAG.getNode( + N->getOpcode(), dl, NVT, N->getOperand(0), N->getOperand(1)); +} + SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -1454,6 +1466,8 @@ case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; @@ -2289,6 +2303,11 @@ Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + llvm_unreachable("fp_to_xint_sat expand int res not implemented yet"); +} + void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi) { if (ISD::isNormalLoad(N)) { Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -306,6 +306,7 @@ SDValue PromoteIntRes_CTTZ(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); @@ -395,6 +396,7 @@ void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -528,6 +530,7 @@ SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N); SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); @@ -627,6 +630,7 @@ SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); @@ -670,6 +674,7 @@ SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -681,6 +686,7 @@ SDValue ScalarizeVecOp_VSETCC(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_TO_XINT_SAT(SDNode *N); //===--------------------------------------------------------------------===// // Vector Splitting Support: LegalizeVectorTypes.cpp @@ -721,6 +727,7 @@ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); @@ -741,6 +748,7 @@ SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); SDValue SplitVecOp_FCOPYSIGN(SDNode *N); + SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -785,6 +793,7 @@ SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); @@ -805,6 +814,7 @@ SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); + SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -409,6 +409,8 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::FP_ROUND_INREG: Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -172,6 +172,11 @@ case ISD::STRICT_FTRUNC: R = ScalarizeVecRes_StrictFPOp(N); break; + + case ISD::FP_TO_UINT_SAT: + case ISD::FP_TO_SINT_SAT: + R = ScalarizeVecRes_FP_TO_XINT_SAT(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -499,6 +504,11 @@ return DAG.getNode(ExtendCode, DL, NVT, Res); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) { + llvm_unreachable("fp_to_xint_sat scalarize vec res not implemented yet"); + return SDValue(); +} + //===----------------------------------------------------------------------===// // Operand Vector Scalarization <1 x ty> -> ty. @@ -532,6 +542,10 @@ case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = ScalarizeVecOp_FP_TO_XINT_SAT(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -680,6 +694,11 @@ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); } +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_TO_XINT_SAT(SDNode *N) { + llvm_unreachable("fp_to_xint_sat scalarize vec op not implemented yet"); + return SDValue(); +} + //===----------------------------------------------------------------------===// // Result Vector Splitting //===----------------------------------------------------------------------===// @@ -848,6 +867,10 @@ case ISD::STRICT_FTRUNC: SplitVecRes_StrictFPOp(N, Lo, Hi); break; + case ISD::FP_TO_UINT_SAT: + case ISD::FP_TO_SINT_SAT: + SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1641,6 +1664,11 @@ } } +void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + llvm_unreachable("fp_to_xint_sat scalarize vec res not implemented yet"); +} + //===----------------------------------------------------------------------===// // Operand Vector Splitting @@ -1714,6 +1742,10 @@ case ISD::FCANONICALIZE: Res = SplitVecOp_UnaryOp(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = SplitVecOp_FP_TO_XINT_SAT(N); + break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: @@ -2333,6 +2365,10 @@ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); } +SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { + llvm_unreachable("fp_to_xint_sat split vec op not implemented yet"); + return SDValue(); +} //===----------------------------------------------------------------------===// // Result Vector Widening @@ -2473,6 +2509,11 @@ Res = WidenVecRes_Convert(N); break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = WidenVecRes_FP_TO_XINT_SAT(N); + break; + case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -2897,6 +2938,11 @@ return DAG.getBuildVector(WidenVT, DL, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) { + llvm_unreachable("fp_to_xint_sat widen vec res not implemented yet"); + return SDValue(); +} + SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue InOp = N->getOperand(0); @@ -3682,6 +3728,11 @@ case ISD::TRUNCATE: Res = WidenVecOp_Convert(N); break; + + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + Res = WidenVecOp_FP_TO_XINT_SAT(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -3806,6 +3857,11 @@ return DAG.getBuildVector(VT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) { + llvm_unreachable("fp_to_xint_sat widen vec op not implemented yet"); + return SDValue(); +} + SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8713,7 +8713,9 @@ Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: - case ISD::FP_ROUND_INREG: { + case ISD::FP_ROUND_INREG: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: { EVT ExtVT = cast(Operands[1])->getVT().getVectorElementType(); Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5679,6 +5679,20 @@ DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return nullptr; + case Intrinsic::fptosi_sat: { + EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, Type, + getValue(I.getArgOperand(0)), + DAG.getValueType(Type))); + return nullptr; + } + case Intrinsic::fptoui_sat: { + EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, Type, + getValue(I.getArgOperand(0)), + DAG.getValueType(Type))); + return nullptr; + } case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -314,6 +314,8 @@ case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::FP_TO_SINT_SAT: return "fp_to_sint_sat"; + case ISD::FP_TO_UINT_SAT: return "fp_to_uint_sat"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5090,3 +5090,100 @@ return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff); } } + +SDValue TargetLowering::expandFP_TO_INT_SAT( + SDNode *Node, SelectionDAG &DAG) const { + bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; + SDLoc dl(SDValue(Node, 0)); + SDValue Src = Node->getOperand(0); + + // DstVT is the result type, while SatVT is the size to which we saturate + EVT SrcVT = Src.getValueType(); + EVT SatVT = cast(Node->getOperand(1))->getVT(); + EVT DstVT = Node->getValueType(0); + + unsigned SatWidth = SatVT.getScalarSizeInBits(); + unsigned DstWidth = DstVT.getScalarSizeInBits(); + assert(SatWidth <= DstWidth && + "Expected saturation width smaller than result width"); + + // Determine minimum and maximum integer values and their corresponding + // floating-point values. + APInt MinInt, MaxInt; + if (IsSigned) { + MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + } else { + MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + } + + APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + + APFloat::opStatus MinStatus = MinFloat.convertFromAPInt( + MinInt, IsSigned, APFloat::rmTowardZero); + APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt( + MaxInt, IsSigned, APFloat::rmTowardZero); + bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) && + !(MaxStatus & APFloat::opStatus::opInexact); + + SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT); + SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT); + + // If the integer bounds are exactly representable as floats and min/max are + // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence + // of comparisons and selects. + bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) && + isOperationLegal(ISD::FMAXNUM, SrcVT); + if (AreExactFloatBounds && MinMaxLegal) { + SDValue Clamped = Src; + + // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. + Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode); + // Clamp by MaxFloat from above. NaN cannot occur. + Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode); + // Convert clamped value to integer. + SDValue FpToInt = DAG.getNode( + IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Clamped); + + // In the unsigned case we're done, because we mapped NaN to MinFloat, + // which will cast to zero. + if (!IsSigned) + return FpToInt; + + // Otherwise, select 0 if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC( + dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); + } + + SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); + SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT); + + // Result of direct conversion. The assumption here is that the operation is + // non-trapping and it's fine to apply it to an out-of-range value if we + // select it away later. + SDValue FpToInt = DAG.getNode( + IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src); + + SDValue Select = FpToInt; + + // If Src ULT MinFloat, select MinInt. In particular, this also selects + // MinInt if Src is NaN. + Select = DAG.getSelectCC( + dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT); + // If Src OGT MaxFloat, select MaxInt. + Select = DAG.getSelectCC( + dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT); + + // In the unsigned case we are done, because we mapped NaN to MinInt, which + // is already zero. + if (!IsSigned) + return Select; + + // Otherwise, select 0 if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC( + dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); +} Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -614,6 +614,8 @@ setOperationAction(ISD::UADDSAT, VT, Expand); setOperationAction(ISD::SSUBSAT, VT, Expand); setOperationAction(ISD::USUBSAT, VT, Expand); + setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand); // Overflow operations default to expand setOperationAction(ISD::SADDO, VT, Expand); Index: test/CodeGen/AArch64/fptoi-sat-scalar.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/fptoi-sat-scalar.ll @@ -0,0 +1,972 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; +; 32-bit float to signed integer +; + +declare i1 @llvm.fptosi.sat.i1.f32 (float) +declare i8 @llvm.fptosi.sat.i8.f32 (float) +declare i13 @llvm.fptosi.sat.i13.f32 (float) +declare i16 @llvm.fptosi.sat.i16.f32 (float) +declare i19 @llvm.fptosi.sat.i19.f32 (float) +declare i32 @llvm.fptosi.sat.i32.f32 (float) +declare i50 @llvm.fptosi.sat.i50.f32 (float) +declare i64 @llvm.fptosi.sat.i64.f32 (float) +declare i100 @llvm.fptosi.sat.i100.f32(float) +declare i128 @llvm.fptosi.sat.i128.f32(float) + +define i1 @test_signed_i1_f32(float %f) { +; CHECK-LABEL: test_signed_i1_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s1, #-1.00000000 +; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: fmaxnm s1, s0, s1 +; CHECK-NEXT: fminnm s1, s1, s2 +; CHECK-NEXT: fcvtzs w8, s1 +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %x = call i1 @llvm.fptosi.sat.i1.f32(float %f) + ret i1 %x +} + +define i8 @test_signed_i8_f32(float %f) { +; CHECK-LABEL: test_signed_i8_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: adrp x9, .LCPI1_1 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmaxnm s1, s0, s1 +; CHECK-NEXT: fminnm s1, s1, s2 +; CHECK-NEXT: fcvtzs w8, s1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i8 @llvm.fptosi.sat.i8.f32(float %f) + ret i8 %x +} + +define i13 @test_signed_i13_f32(float %f) { +; CHECK-LABEL: test_signed_i13_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: adrp x9, .LCPI2_1 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI2_1] +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmaxnm s1, s0, s1 +; CHECK-NEXT: fminnm s1, s1, s2 +; CHECK-NEXT: fcvtzs w8, s1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i13 @llvm.fptosi.sat.i13.f32(float %f) + ret i13 %x +} + +define i16 @test_signed_i16_f32(float %f) { +; CHECK-LABEL: test_signed_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: adrp x9, .LCPI3_1 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmaxnm s1, s0, s1 +; CHECK-NEXT: fminnm s1, s1, s2 +; CHECK-NEXT: fcvtzs w8, s1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i16 @llvm.fptosi.sat.i16.f32(float %f) + ret i16 %x +} + +define i19 @test_signed_i19_f32(float %f) { +; CHECK-LABEL: test_signed_i19_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: adrp x9, .LCPI4_1 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: fmaxnm s1, s0, s1 +; CHECK-NEXT: fminnm s1, s1, s2 +; CHECK-NEXT: fcvtzs w8, s1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i19 @llvm.fptosi.sat.i19.f32(float %f) + ret i19 %x +} + +define i32 @test_signed_i32_f32(float %f) { +; CHECK-LABEL: test_signed_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, .LCPI5_0 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI5_0] +; CHECK-NEXT: adrp x9, .LCPI5_1 +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: orr w10, wzr, #0x80000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr w9, wzr, #0x7fffffff +; CHECK-NEXT: csel w8, w10, w8, lt +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: csel w8, w9, w8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i32 @llvm.fptosi.sat.i32.f32(float %f) + ret i32 %x +} + +define i50 @test_signed_i50_f32(float %f) { +; CHECK-LABEL: test_signed_i50_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, .LCPI6_0 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI6_0] +; CHECK-NEXT: adrp x9, .LCPI6_1 +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI6_1] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: orr x10, xzr, #0xfffe000000000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr x9, xzr, #0x1ffffffffffff +; CHECK-NEXT: csel x8, x10, x8, lt +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: ret + %x = call i50 @llvm.fptosi.sat.i50.f32(float %f) + ret i50 %x +} + +define i64 @test_signed_i64_f32(float %f) { +; CHECK-LABEL: test_signed_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, .LCPI7_0 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI7_0] +; CHECK-NEXT: adrp x9, .LCPI7_1 +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI7_1] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: orr x10, xzr, #0x8000000000000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr x9, xzr, #0x7fffffffffffffff +; CHECK-NEXT: csel x8, x10, x8, lt +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: ret + %x = call i64 @llvm.fptosi.sat.i64.f32(float %f) + ret i64 %x +} + +;define i100 @test_signed_i100_f32(float %f) { +; %x = call i100 @llvm.fptosi.sat.i100.f32(float %f) +; ret i100 %x +;} +; +;define i128 @test_signed_i128_f32(float %f) { +; %x = call i128 @llvm.fptosi.sat.i128.f32(float %f) +; ret i128 %x +;} + +; +; 32-bit float to unsigned integer +; + +declare i1 @llvm.fptoui.sat.i1.f32 (float) +declare i8 @llvm.fptoui.sat.i8.f32 (float) +declare i13 @llvm.fptoui.sat.i13.f32 (float) +declare i16 @llvm.fptoui.sat.i16.f32 (float) +declare i19 @llvm.fptoui.sat.i19.f32 (float) +declare i32 @llvm.fptoui.sat.i32.f32 (float) +declare i50 @llvm.fptoui.sat.i50.f32 (float) +declare i64 @llvm.fptoui.sat.i64.f32 (float) +declare i100 @llvm.fptoui.sat.i100.f32(float) +declare i128 @llvm.fptoui.sat.i128.f32(float) + +define i1 @test_unsigned_i1_f32(float %f) { +; CHECK-LABEL: test_unsigned_i1_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s1, wzr +; CHECK-NEXT: fmaxnm s0, s0, s1 +; CHECK-NEXT: fmov s1, #1.00000000 +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %x = call i1 @llvm.fptoui.sat.i1.f32(float %f) + ret i1 %x +} + +define i8 @test_unsigned_i8_f32(float %f) { +; CHECK-LABEL: test_unsigned_i8_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret + %x = call i8 @llvm.fptoui.sat.i8.f32(float %f) + ret i8 %x +} + +define i13 @test_unsigned_i13_f32(float %f) { +; CHECK-LABEL: test_unsigned_i13_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret + %x = call i13 @llvm.fptoui.sat.i13.f32(float %f) + ret i13 %x +} + +define i16 @test_unsigned_i16_f32(float %f) { +; CHECK-LABEL: test_unsigned_i16_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI11_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret + %x = call i16 @llvm.fptoui.sat.i16.f32(float %f) + ret i16 %x +} + +define i19 @test_unsigned_i19_f32(float %f) { +; CHECK-LABEL: test_unsigned_i19_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: fmov s2, wzr +; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: fminnm s0, s0, s1 +; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: ret + %x = call i19 @llvm.fptoui.sat.i19.f32(float %f) + ret i19 %x +} + +define i32 @test_unsigned_i32_f32(float %f) { +; CHECK-LABEL: test_unsigned_i32_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csinv w0, w8, wzr, le +; CHECK-NEXT: ret + %x = call i32 @llvm.fptoui.sat.i32.f32(float %f) + ret i32 %x +} + +define i50 @test_unsigned_i50_f32(float %f) { +; CHECK-LABEL: test_unsigned_i50_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: fcvtzu x8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel x8, xzr, x8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr x9, xzr, #0x3ffffffffffff +; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: ret + %x = call i50 @llvm.fptoui.sat.i50.f32(float %f) + ret i50 %x +} + +define i64 @test_unsigned_i64_f32(float %f) { +; CHECK-LABEL: test_unsigned_i64_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: fcvtzu x8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel x8, xzr, x8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: ret + %x = call i64 @llvm.fptoui.sat.i64.f32(float %f) + ret i64 %x +} + +;define i100 @test_unsigned_i100_f32(float %f) { +; %x = call i100 @llvm.fptoui.sat.i100.f32(float %f) +; ret i100 %x +;} +; +;define i128 @test_unsigned_i128_f32(float %f) { +; %x = call i128 @llvm.fptoui.sat.i128.f32(float %f) +; ret i128 %x +;} + +; +; 64-bit float to signed integer +; + +declare i1 @llvm.fptosi.sat.i1.f64 (double) +declare i8 @llvm.fptosi.sat.i8.f64 (double) +declare i13 @llvm.fptosi.sat.i13.f64 (double) +declare i16 @llvm.fptosi.sat.i16.f64 (double) +declare i19 @llvm.fptosi.sat.i19.f64 (double) +declare i32 @llvm.fptosi.sat.i32.f64 (double) +declare i50 @llvm.fptosi.sat.i50.f64 (double) +declare i64 @llvm.fptosi.sat.i64.f64 (double) +declare i100 @llvm.fptosi.sat.i100.f64(double) +declare i128 @llvm.fptosi.sat.i128.f64(double) + +define i1 @test_signed_i1_f64(double %f) { +; CHECK-LABEL: test_signed_i1_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d1, #-1.00000000 +; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: fmaxnm d1, d0, d1 +; CHECK-NEXT: fminnm d1, d1, d2 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %x = call i1 @llvm.fptosi.sat.i1.f64(double %f) + ret i1 %x +} + +define i8 @test_signed_i8_f64(double %f) { +; CHECK-LABEL: test_signed_i8_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: adrp x9, .LCPI17_1 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI17_1] +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmaxnm d1, d0, d1 +; CHECK-NEXT: fminnm d1, d1, d2 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i8 @llvm.fptosi.sat.i8.f64(double %f) + ret i8 %x +} + +define i13 @test_signed_i13_f64(double %f) { +; CHECK-LABEL: test_signed_i13_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: adrp x9, .LCPI18_1 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI18_1] +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmaxnm d1, d0, d1 +; CHECK-NEXT: fminnm d1, d1, d2 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i13 @llvm.fptosi.sat.i13.f64(double %f) + ret i13 %x +} + +define i16 @test_signed_i16_f64(double %f) { +; CHECK-LABEL: test_signed_i16_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: adrp x9, .LCPI19_1 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI19_1] +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmaxnm d1, d0, d1 +; CHECK-NEXT: fminnm d1, d1, d2 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i16 @llvm.fptosi.sat.i16.f64(double %f) + ret i16 %x +} + +define i19 @test_signed_i19_f64(double %f) { +; CHECK-LABEL: test_signed_i19_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: adrp x9, .LCPI20_1 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI20_1] +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmaxnm d1, d0, d1 +; CHECK-NEXT: fminnm d1, d1, d2 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i19 @llvm.fptosi.sat.i19.f64(double %f) + ret i19 %x +} + +define i32 @test_signed_i32_f64(double %f) { +; CHECK-LABEL: test_signed_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI21_0 +; CHECK-NEXT: adrp x9, .LCPI21_1 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI21_1] +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmaxnm d1, d0, d1 +; CHECK-NEXT: fminnm d1, d1, d2 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i32 @llvm.fptosi.sat.i32.f64(double %f) + ret i32 %x +} + +define i50 @test_signed_i50_f64(double %f) { +; CHECK-LABEL: test_signed_i50_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI22_0 +; CHECK-NEXT: adrp x9, .LCPI22_1 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI22_1] +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: fmaxnm d1, d0, d1 +; CHECK-NEXT: fminnm d1, d1, d2 +; CHECK-NEXT: fcvtzs x8, d1 +; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: ret + %x = call i50 @llvm.fptosi.sat.i50.f64(double %f) + ret i50 %x +} + +define i64 @test_signed_i64_f64(double %f) { +; CHECK-LABEL: test_signed_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x9, .LCPI23_0 +; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI23_0] +; CHECK-NEXT: adrp x9, .LCPI23_1 +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI23_1] +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: orr x10, xzr, #0x8000000000000000 +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: orr x9, xzr, #0x7fffffffffffffff +; CHECK-NEXT: csel x8, x10, x8, lt +; CHECK-NEXT: fcmp d0, d2 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: fcmp d0, d0 +; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: ret + %x = call i64 @llvm.fptosi.sat.i64.f64(double %f) + ret i64 %x +} + +;define i100 @test_signed_i100_f64(double %f) { +; %x = call i100 @llvm.fptosi.sat.i100.f64(double %f) +; ret i100 %x +;} +; +;define i128 @test_signed_i128_f64(double %f) { +; %x = call i128 @llvm.fptosi.sat.i128.f64(double %f) +; ret i128 %x +;} + +; +; 64-bit float to unsigned integer +; + +declare i1 @llvm.fptoui.sat.i1.f64 (double) +declare i8 @llvm.fptoui.sat.i8.f64 (double) +declare i13 @llvm.fptoui.sat.i13.f64 (double) +declare i16 @llvm.fptoui.sat.i16.f64 (double) +declare i19 @llvm.fptoui.sat.i19.f64 (double) +declare i32 @llvm.fptoui.sat.i32.f64 (double) +declare i50 @llvm.fptoui.sat.i50.f64 (double) +declare i64 @llvm.fptoui.sat.i64.f64 (double) +declare i100 @llvm.fptoui.sat.i100.f64(double) +declare i128 @llvm.fptoui.sat.i128.f64(double) + +define i1 @test_unsigned_i1_f64(double %f) { +; CHECK-LABEL: test_unsigned_i1_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: fmaxnm d0, d0, d1 +; CHECK-NEXT: fmov d1, #1.00000000 +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %x = call i1 @llvm.fptoui.sat.i1.f64(double %f) + ret i1 %x +} + +define i8 @test_unsigned_i8_f64(double %f) { +; CHECK-LABEL: test_unsigned_i8_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI25_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0] +; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: fmaxnm d0, d0, d2 +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret + %x = call i8 @llvm.fptoui.sat.i8.f64(double %f) + ret i8 %x +} + +define i13 @test_unsigned_i13_f64(double %f) { +; CHECK-LABEL: test_unsigned_i13_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI26_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0] +; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: fmaxnm d0, d0, d2 +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret + %x = call i13 @llvm.fptoui.sat.i13.f64(double %f) + ret i13 %x +} + +define i16 @test_unsigned_i16_f64(double %f) { +; CHECK-LABEL: test_unsigned_i16_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI27_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0] +; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: fmaxnm d0, d0, d2 +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret + %x = call i16 @llvm.fptoui.sat.i16.f64(double %f) + ret i16 %x +} + +define i19 @test_unsigned_i19_f64(double %f) { +; CHECK-LABEL: test_unsigned_i19_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI28_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI28_0] +; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: fmaxnm d0, d0, d2 +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret + %x = call i19 @llvm.fptoui.sat.i19.f64(double %f) + ret i19 %x +} + +define i32 @test_unsigned_i32_f64(double %f) { +; CHECK-LABEL: test_unsigned_i32_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI29_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI29_0] +; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: fmaxnm d0, d0, d2 +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: ret + %x = call i32 @llvm.fptoui.sat.i32.f64(double %f) + ret i32 %x +} + +define i50 @test_unsigned_i50_f64(double %f) { +; CHECK-LABEL: test_unsigned_i50_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI30_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI30_0] +; CHECK-NEXT: fmov d2, xzr +; CHECK-NEXT: fmaxnm d0, d0, d2 +; CHECK-NEXT: fminnm d0, d0, d1 +; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: ret + %x = call i50 @llvm.fptoui.sat.i50.f64(double %f) + ret i50 %x +} + +define i64 @test_unsigned_i64_f64(double %f) { +; CHECK-LABEL: test_unsigned_i64_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI31_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI31_0] +; CHECK-NEXT: fcvtzu x8, d0 +; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: csel x8, xzr, x8, lt +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: ret + %x = call i64 @llvm.fptoui.sat.i64.f64(double %f) + ret i64 %x +} + +;define i100 @test_unsigned_i100_f64(double %f) { +; %x = call i100 @llvm.fptoui.sat.i100.f64(double %f) +; ret i100 %x +;} +; +;define i128 @test_unsigned_i128_f64(double %f) { +; %x = call i128 @llvm.fptoui.sat.i128.f64(double %f) +; ret i128 %x +;} + +; +; 16-bit float to signed integer +; + +declare i1 @llvm.fptosi.sat.i1.f16 (half) +declare i8 @llvm.fptosi.sat.i8.f16 (half) +declare i13 @llvm.fptosi.sat.i13.f16 (half) +declare i16 @llvm.fptosi.sat.i16.f16 (half) +declare i19 @llvm.fptosi.sat.i19.f16 (half) +declare i32 @llvm.fptosi.sat.i32.f16 (half) +declare i50 @llvm.fptosi.sat.i50.f16 (half) +declare i64 @llvm.fptosi.sat.i64.f16 (half) +declare i100 @llvm.fptosi.sat.i100.f16(half) +declare i128 @llvm.fptosi.sat.i128.f16(half) + +define i1 @test_signed_i1_f16(half %f) { +; CHECK-LABEL: test_signed_i1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fmov s1, #-1.00000000 +; CHECK-NEXT: fcvtzs w8, s0 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csinv w8, w8, wzr, ge +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w8, wzr, w8, vs +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %x = call i1 @llvm.fptosi.sat.i1.f16(half %f) + ret i1 %x +} + +define i8 @test_signed_i8_f16(half %f) { +; CHECK-LABEL: test_signed_i8_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI33_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI33_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: adrp x9, .LCPI33_1 +; CHECK-NEXT: orr w8, wzr, #0xffffff80 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI33_1] +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: orr w9, wzr, #0x7f +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel w8, w9, w8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i8 @llvm.fptosi.sat.i8.f16(half %f) + ret i8 %x +} + +define i13 @test_signed_i13_f16(half %f) { +; CHECK-LABEL: test_signed_i13_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI34_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI34_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: adrp x9, .LCPI34_1 +; CHECK-NEXT: orr w8, wzr, #0xfffff000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI34_1] +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: orr w9, wzr, #0xfff +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel w8, w9, w8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i13 @llvm.fptosi.sat.i13.f16(half %f) + ret i13 %x +} + +define i16 @test_signed_i16_f16(half %f) { +; CHECK-LABEL: test_signed_i16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI35_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI35_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: adrp x9, .LCPI35_1 +; CHECK-NEXT: orr w8, wzr, #0xffff8000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI35_1] +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: orr w9, wzr, #0x7fff +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel w8, w9, w8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i16 @llvm.fptosi.sat.i16.f16(half %f) + ret i16 %x +} + +define i19 @test_signed_i19_f16(half %f) { +; CHECK-LABEL: test_signed_i19_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI36_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI36_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: adrp x9, .LCPI36_1 +; CHECK-NEXT: orr w8, wzr, #0xfffc0000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI36_1] +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: orr w9, wzr, #0x3ffff +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel w8, w9, w8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i19 @llvm.fptosi.sat.i19.f16(half %f) + ret i19 %x +} + +define i32 @test_signed_i32_f16(half %f) { +; CHECK-LABEL: test_signed_i32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI37_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI37_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: adrp x9, .LCPI37_1 +; CHECK-NEXT: orr w8, wzr, #0x80000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI37_1] +; CHECK-NEXT: fcvtzs w9, s0 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: orr w9, wzr, #0x7fffffff +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel w8, w9, w8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel w0, wzr, w8, vs +; CHECK-NEXT: ret + %x = call i32 @llvm.fptosi.sat.i32.f16(half %f) + ret i32 %x +} + +define i50 @test_signed_i50_f16(half %f) { +; CHECK-LABEL: test_signed_i50_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI38_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI38_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: adrp x9, .LCPI38_1 +; CHECK-NEXT: orr x8, xzr, #0xfffe000000000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI38_1] +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: csel x8, x8, x9, lt +; CHECK-NEXT: orr x9, xzr, #0x1ffffffffffff +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: ret + %x = call i50 @llvm.fptosi.sat.i50.f16(half %f) + ret i50 %x +} + +define i64 @test_signed_i64_f16(half %f) { +; CHECK-LABEL: test_signed_i64_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI39_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI39_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: adrp x9, .LCPI39_1 +; CHECK-NEXT: orr x8, xzr, #0x8000000000000000 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI39_1] +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: csel x8, x8, x9, lt +; CHECK-NEXT: orr x9, xzr, #0x7fffffffffffffff +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csel x8, x9, x8, gt +; CHECK-NEXT: fcmp s0, s0 +; CHECK-NEXT: csel x0, xzr, x8, vs +; CHECK-NEXT: ret + %x = call i64 @llvm.fptosi.sat.i64.f16(half %f) + ret i64 %x +} + +;define i100 @test_signed_i100_f16(half %f) { +; %x = call i100 @llvm.fptosi.sat.i100.f16(half %f) +; ret i100 %x +;} +; +;define i128 @test_signed_i128_f16(half %f) { +; %x = call i128 @llvm.fptosi.sat.i128.f16(half %f) +; ret i128 %x +;} + +; +; 16-bit float to unsigned integer +; + +declare i1 @llvm.fptoui.sat.i1.f16 (half) +declare i8 @llvm.fptoui.sat.i8.f16 (half) +declare i13 @llvm.fptoui.sat.i13.f16 (half) +declare i16 @llvm.fptoui.sat.i16.f16 (half) +declare i19 @llvm.fptoui.sat.i19.f16 (half) +declare i32 @llvm.fptoui.sat.i32.f16 (half) +declare i50 @llvm.fptoui.sat.i50.f16 (half) +declare i64 @llvm.fptoui.sat.i64.f16 (half) +declare i100 @llvm.fptoui.sat.i100.f16(half) +declare i128 @llvm.fptoui.sat.i128.f16(half) + +define i1 @test_unsigned_i1_f16(half %f) { +; CHECK-LABEL: test_unsigned_i1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fmov s1, #1.00000000 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csinc w8, w8, wzr, le +; CHECK-NEXT: and w0, w8, #0x1 +; CHECK-NEXT: ret + %x = call i1 @llvm.fptoui.sat.i1.f16(half %f) + ret i1 %x +} + +define i8 @test_unsigned_i8_f16(half %f) { +; CHECK-LABEL: test_unsigned_i8_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI41_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI41_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr w9, wzr, #0xff +; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: ret + %x = call i8 @llvm.fptoui.sat.i8.f16(half %f) + ret i8 %x +} + +define i13 @test_unsigned_i13_f16(half %f) { +; CHECK-LABEL: test_unsigned_i13_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI42_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI42_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr w9, wzr, #0x1fff +; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: ret + %x = call i13 @llvm.fptoui.sat.i13.f16(half %f) + ret i13 %x +} + +define i16 @test_unsigned_i16_f16(half %f) { +; CHECK-LABEL: test_unsigned_i16_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI43_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI43_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr w9, wzr, #0xffff +; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: ret + %x = call i16 @llvm.fptoui.sat.i16.f16(half %f) + ret i16 %x +} + +define i19 @test_unsigned_i19_f16(half %f) { +; CHECK-LABEL: test_unsigned_i19_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI44_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI44_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr w9, wzr, #0x7ffff +; CHECK-NEXT: csel w0, w9, w8, gt +; CHECK-NEXT: ret + %x = call i19 @llvm.fptoui.sat.i19.f16(half %f) + ret i19 %x +} + +define i32 @test_unsigned_i32_f16(half %f) { +; CHECK-LABEL: test_unsigned_i32_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI45_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI45_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu w8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel w8, wzr, w8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csinv w0, w8, wzr, le +; CHECK-NEXT: ret + %x = call i32 @llvm.fptoui.sat.i32.f16(half %f) + ret i32 %x +} + +define i50 @test_unsigned_i50_f16(half %f) { +; CHECK-LABEL: test_unsigned_i50_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI46_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI46_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu x8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel x8, xzr, x8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: orr x9, xzr, #0x3ffffffffffff +; CHECK-NEXT: csel x0, x9, x8, gt +; CHECK-NEXT: ret + %x = call i50 @llvm.fptoui.sat.i50.f16(half %f) + ret i50 %x +} + +define i64 @test_unsigned_i64_f16(half %f) { +; CHECK-LABEL: test_unsigned_i64_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI47_0 +; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI47_0] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtzu x8, s0 +; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: csel x8, xzr, x8, lt +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: ret + %x = call i64 @llvm.fptoui.sat.i64.f16(half %f) + ret i64 %x +} + +;define i100 @test_unsigned_i100_f16(half %f) { +; %x = call i100 @llvm.fptoui.sat.i100.f16(half %f) +; ret i100 %x +;} +; +;define i128 @test_unsigned_i128_f16(half %f) { +; %x = call i128 @llvm.fptoui.sat.i128.f16(half %f) +; ret i128 %x +;}