diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -282,6 +282,11 @@ // the value read before the modification and the new chain pointer. SWAP_CSR, + // FP to 32 bit int conversions for RV64. These are used to keep track of the + // result being sign extended to 64 bit. These saturate out of range inputs. + STRICT_FCVT_W_RTZ_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCVT_WU_RTZ_RV64, + // Memory opcodes start here. VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -335,6 +335,8 @@ setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f16, Expand); @@ -405,6 +407,8 @@ setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f64, Expand); @@ -430,6 +434,11 @@ setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } @@ -5727,11 +5736,17 @@ SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != TargetLowering::TypeSoftenFloat) { - // FIXME: Support strict FP. - if (IsStrict) - return; if (!isTypeLegal(Op0.getValueType())) return; + if (IsStrict) { + unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RTZ_RV64 + : RISCVISD::STRICT_FCVT_WU_RTZ_RV64; + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(Opc, DL, VTs, N->getOperand(0), Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + Results.push_back(Res.getValue(1)); + return; + } unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0); @@ -7152,11 +7167,23 @@ // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during // type legalization. This is safe because fp_to_uint produces poison if // it overflows. - if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() && - N->getOperand(0).getOpcode() == ISD::FP_TO_UINT && - isTypeLegal(N->getOperand(0).getOperand(0).getValueType())) - return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, - N->getOperand(0).getOperand(0)); + if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { + SDValue Src = N->getOperand(0); + if (Src.getOpcode() == ISD::FP_TO_UINT && + isTypeLegal(Src.getOperand(0).getValueType())) + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, + Src.getOperand(0)); + if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && + isTypeLegal(Src.getOperand(1).getValueType())) { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, + Src.getOperand(0), Src.getOperand(1)); + DCI.CombineTo(N, Res); + DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(Src.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked. + } + } return SDValue(); case RISCVISD::SELECT_CC: { // Transform @@ -7717,6 +7744,8 @@ case RISCVISD::BDECOMPRESSW: case RISCVISD::FCVT_W_RTZ_RV64: case RISCVISD::FCVT_WU_RTZ_RV64: + case RISCVISD::STRICT_FCVT_W_RTZ_RV64: + case RISCVISD::STRICT_FCVT_WU_RTZ_RV64: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -9448,6 +9477,8 @@ NODE_NAME_CASE(FCVT_XU_RTZ) NODE_NAME_CASE(FCVT_W_RTZ_RV64) NODE_NAME_CASE(FCVT_WU_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_W_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_WU_RTZ_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) NODE_NAME_CASE(GREV) NODE_NAME_CASE(GREVW) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -184,8 +184,8 @@ /// Float conversion operations // f64 -> f32, f32 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; // [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so // are defined later. @@ -271,8 +271,8 @@ def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>; // double->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; // Saturating double->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>; @@ -285,8 +285,8 @@ def : Pat<(i32 (lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>; // [u]int->double. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; } // Predicates = [HasStdExtD, IsRV32] let Predicates = [HasStdExtD, IsRV64] in { @@ -301,20 +301,20 @@ // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; // [u]int32->fp -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; // Saturating double->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>; def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>; // double->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; // double->int64 with current rounding mode. def : Pat<(i64 (lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; @@ -325,6 +325,6 @@ def : Pat<(i64 (llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; // [u]int64->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; } // Predicates = [HasStdExtD, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -19,9 +19,9 @@ : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; def SDT_RISCVFMV_X_ANYEXTW_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; -def STD_RISCVFCVT_W_RV64 +def SDT_RISCVFCVT_W_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>; -def STD_RISCVFCVT_X +def SDT_RISCVFCVT_X : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>; def riscv_fmv_w_x_rv64 @@ -29,13 +29,27 @@ def riscv_fmv_x_anyextw_rv64 : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>; def riscv_fcvt_w_rtz_rv64 - : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_wu_rtz_rv64 - : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_x_rtz - : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_X_RTZ", SDT_RISCVFCVT_X>; def riscv_fcvt_xu_rtz - : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_XU_RTZ", SDT_RISCVFCVT_X>; + +def riscv_strict_fcvt_w_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; +def riscv_strict_fcvt_wu_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; + +def riscv_any_fcvt_w_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_w_rtz_rv64 node:$src), + (riscv_fcvt_w_rtz_rv64 node:$src)]>; +def riscv_any_fcvt_wu_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_wu_rtz_rv64 node:$src), + (riscv_fcvt_wu_rtz_rv64 node:$src)]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -375,8 +389,8 @@ def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>; // float->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; @@ -389,8 +403,8 @@ def : Pat<(i32 (lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>; // [u]int->float. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV32] let Predicates = [HasStdExtF, IsRV64] in { @@ -403,12 +417,12 @@ // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; // float->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; @@ -423,8 +437,8 @@ def : Pat<(i64 (llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -277,8 +277,8 @@ /// Float conversion operations // f32 -> f16, f16 -> f32 -def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; // Moves (no conversion) def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; @@ -287,8 +287,8 @@ let Predicates = [HasStdExtZfh, IsRV32] in { // half->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; @@ -301,20 +301,20 @@ def : Pat<(i32 (lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV32] let Predicates = [HasStdExtZfh, IsRV64] in { // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; // half->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; @@ -329,17 +329,17 @@ def : Pat<(i64 (llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV64] let Predicates = [HasStdExtZfhmin, HasStdExtD] in { /// Float conversion operations // f64 -> f16, f16 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; /// Float arithmetic operations def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), diff --git a/llvm/test/CodeGen/RISCV/double-convert-strict.ll b/llvm/test/CodeGen/RISCV/double-convert-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/double-convert-strict.ll @@ -0,0 +1,848 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IFD %s +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IFD %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64I %s + +define float @fcvt_s_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_s_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.s.d ft0, ft0 +; RV32IFD-NEXT: fmv.x.w a0, ft0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_s_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.s.d ft0, ft0 +; RV64IFD-NEXT: fmv.x.w a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_s_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __truncdfsf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __truncdfsf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) + +define double @fcvt_d_s(float %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_s: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fmv.w.x ft0, a0 +; RV32IFD-NEXT: fcvt.d.s ft0, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_s: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.w.x ft0, a0 +; RV64IFD-NEXT: fcvt.d.s ft0, ft0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __extendsfdf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __extendsfdf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.fpext.f64.f32(float %a, metadata !"fpexcept.strict") + ret double %1 +} +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) + +; For RV64D, fcvt.l.d is semantically equivalent to fcvt.w.d in this case +; because fptosi will produce poison if the result doesn't fit into an i32. +define i32 @fcvt_w_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_w_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_w_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_w_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) + +; For RV64D, fcvt.lu.d is semantically equivalent to fcvt.wu.d in this case +; because fptosi will produce poison if the result doesn't fit into an i32. +define i32 @fcvt_wu_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_wu_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_wu_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +define i32 @fcvt_wu_d_multiple_use(double %x, i32* %y) { +; RV32IFD-LABEL: fcvt_wu_d_multiple_use: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.wu.d a1, ft0, rtz +; RV32IFD-NEXT: li a0, 1 +; RV32IFD-NEXT: beqz a1, .LBB4_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: mv a0, a1 +; RV32IFD-NEXT: .LBB4_2: +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_wu_d_multiple_use: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.wu.d a1, ft0, rtz +; RV64IFD-NEXT: li a0, 1 +; RV64IFD-NEXT: beqz a1, .LBB4_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: mv a0, a1 +; RV64IFD-NEXT: .LBB4_2: +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: beqz a1, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: call __fixunsdfsi@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define double @fcvt_d_w(i32 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) + +define double @fcvt_d_w_load(i32* %p) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lw a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @fcvt_d_wu(i32 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) + +define double @fcvt_d_wu_load(i32* %p) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lwu a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define i64 @fcvt_l_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_l_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_l_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_l_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) + +define i64 @fcvt_lu_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_lu_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_lu_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) + +define double @fcvt_d_l(i64 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_l: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __floatdidf@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_l: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) + +define double @fcvt_d_lu(i64 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_lu: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __floatundidf@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_lu: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.lu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) + +define double @fcvt_d_w_i8(i8 signext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_i8: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_i8: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata) + +define double @fcvt_d_wu_i8(i8 zeroext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_i8: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_i8: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata) + +define double @fcvt_d_w_i16(i16 signext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_i16: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_i16: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata) + +define double @fcvt_d_wu_i16(i16 zeroext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_i16: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_i16: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, double* %1) { +; RV32IFD-LABEL: fcvt_d_w_demanded_bits: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi a0, a0, 1 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 0(a1) +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_demanded_bits: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addiw a0, a0, 1 +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fsd ft0, 0(a1) +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store double %4, double* %1, align 8 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, double* %1) { +; RV32IFD-LABEL: fcvt_d_wu_demanded_bits: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi a0, a0, 1 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 0(a1) +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_demanded_bits: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addiw a0, a0, 1 +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fsd ft0, 0(a1) +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store double %4, double* %1, align 8 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/float-convert-strict.ll b/llvm/test/CodeGen/RISCV/float-convert-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/float-convert-strict.ll @@ -0,0 +1,717 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IF %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64I %s + +; For RV64F, fcvt.l.s is semantically equivalent to fcvt.w.s in this case +; because fptosi will produce poison if the result doesn't fit into an i32. +define i32 @fcvt_w_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_w_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_w_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) + +define i32 @fcvt_wu_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_wu_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_wu_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +define i32 @fcvt_wu_s_multiple_use(float %x, i32* %y) { +; RV32IF-LABEL: fcvt_wu_s_multiple_use: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.wu.s a1, ft0, rtz +; RV32IF-NEXT: li a0, 1 +; RV32IF-NEXT: beqz a1, .LBB2_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: mv a0, a1 +; RV32IF-NEXT: .LBB2_2: +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_wu_s_multiple_use: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a1, ft0, rtz +; RV64IF-NEXT: li a0, 1 +; RV64IF-NEXT: beqz a1, .LBB2_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: .LBB2_2: +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: beqz a1, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: call __fixunssfsi@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define float @fcvt_s_w(i32 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) + +define float @fcvt_s_w_load(i32* %p) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lw a0, 0(a0) +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @fcvt_s_wu(i32 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata, metadata) + +define float @fcvt_s_wu_load(i32* %p) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lwu a0, 0(a0) +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define i64 @fcvt_l_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_l_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_l_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_l_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) + +define i64 @fcvt_lu_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_lu_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_lu_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) + +define float @fcvt_s_l(i64 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_l: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __floatdisf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_l: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) + +define float @fcvt_s_lu(i64 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_lu: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __floatundisf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_lu: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.lu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) + +define float @fcvt_s_w_i8(i8 signext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_i8: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_i8: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata) + +define float @fcvt_s_wu_i8(i8 zeroext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_i8: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_i8: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata) + +define float @fcvt_s_w_i16(i16 signext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_i16: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_i16: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) + +define float @fcvt_s_wu_i16(i16 zeroext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_i16: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_i16: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_s_w_demanded_bits(i32 signext %0, float* %1) { +; RV32IF-LABEL: fcvt_s_w_demanded_bits: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi a0, a0, 1 +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsw ft0, 0(a1) +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_demanded_bits: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addiw a0, a0, 1 +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fsw ft0, 0(a1) +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store float %4, float* %1, align 4 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, float* %1) { +; RV32IF-LABEL: fcvt_s_wu_demanded_bits: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi a0, a0, 1 +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fsw ft0, 0(a1) +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_demanded_bits: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addiw a0, a0, 1 +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fsw ft0, 0(a1) +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store float %4, float* %1, align 4 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/half-convert-strict.ll b/llvm/test/CodeGen/RISCV/half-convert-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-convert-strict.ll @@ -0,0 +1,708 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV32IDZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV64IDZFH %s + +define i16 @fcvt_si_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_si_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_si_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_si_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_si_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i16 %1 +} +declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata) + +define i16 @fcvt_ui_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_ui_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_ui_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_ui_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_ui_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i16 %1 +} +declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata) + +define i32 @fcvt_w_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_w_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_w_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_w_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_w_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) + +define i32 @fcvt_wu_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_wu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +; FIXME: We should not have an fcvt.wu.h and an fcvt.lu.h. +define i32 @fcvt_wu_h_multiple_use(half %x, i32* %y) { +; RV32IZFH-LABEL: fcvt_wu_h_multiple_use: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV32IZFH-NEXT: li a0, 1 +; RV32IZFH-NEXT: beqz a1, .LBB4_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: mv a0, a1 +; RV32IZFH-NEXT: .LBB4_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h_multiple_use: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV64IZFH-NEXT: li a0, 1 +; RV64IZFH-NEXT: beqz a1, .LBB4_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: mv a0, a1 +; RV64IZFH-NEXT: .LBB4_2: +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h_multiple_use: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV32IDZFH-NEXT: li a0, 1 +; RV32IDZFH-NEXT: beqz a1, .LBB4_2 +; RV32IDZFH-NEXT: # %bb.1: +; RV32IDZFH-NEXT: mv a0, a1 +; RV32IDZFH-NEXT: .LBB4_2: +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h_multiple_use: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV64IDZFH-NEXT: li a0, 1 +; RV64IDZFH-NEXT: beqz a1, .LBB4_2 +; RV64IDZFH-NEXT: # %bb.1: +; RV64IDZFH-NEXT: mv a0, a1 +; RV64IDZFH-NEXT: .LBB4_2: +; RV64IDZFH-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define i64 @fcvt_l_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_l_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __fixhfdi@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_l_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_l_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __fixhfdi@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_l_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) + +define i64 @fcvt_lu_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_lu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __fixunshfdi@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_lu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_lu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __fixunshfdi@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_lu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) + +define half @fcvt_h_si(i16 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_si: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: slli a0, a0, 16 +; RV32IZFH-NEXT: srai a0, a0, 16 +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_si: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: slli a0, a0, 48 +; RV64IZFH-NEXT: srai a0, a0, 48 +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_si: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: slli a0, a0, 16 +; RV32IDZFH-NEXT: srai a0, a0, 16 +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_si: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: slli a0, a0, 48 +; RV64IDZFH-NEXT: srai a0, a0, 48 +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata) + +define half @fcvt_h_si_signext(i16 signext %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_si_signext: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_si_signext: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_si_signext: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_si_signext: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_ui(i16 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_ui: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a1, 16 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a0, a1 +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_ui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a1, 16 +; RV64IZFH-NEXT: addiw a1, a1, -1 +; RV64IZFH-NEXT: and a0, a0, a1 +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_ui: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lui a1, 16 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a0, a1 +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_ui: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lui a1, 16 +; RV64IDZFH-NEXT: addiw a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a0, a1 +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata) + +define half @fcvt_h_ui_zeroext(i16 zeroext %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_ui_zeroext: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_ui_zeroext: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_ui_zeroext: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_ui_zeroext: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_w(i32 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_w: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) + +define half @fcvt_h_w_load(i32* %p) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_w_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lw a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lw a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_wu(i32 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_wu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) + +define half @fcvt_h_wu_load(i32* %p) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_wu_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lwu a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lwu a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_l(i64 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_l: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __floatdihf@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_l: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_l: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __floatdihf@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_l: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) + +define half @fcvt_h_lu(i64 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_lu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __floatundihf@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_lu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_lu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __floatundihf@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_lu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) + +define half @fcvt_h_s(float %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_s: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_s: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret half %1 +} +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) + +define float @fcvt_s_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_s_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_s_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_s_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_s_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call float @llvm.experimental.constrained.fpext.f32.f16(half %a, metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) + +define half @fcvt_h_d(double %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_d: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __truncdfhf2@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_d: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: call __truncdfhf2@plt +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_d: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_d: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret half %1 +} +declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) + +define double @fcvt_d_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_d_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call __extendsfdf2@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_d_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call __extendsfdf2@plt +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_d_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_d_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call double @llvm.experimental.constrained.fpext.f64.f16(half %a, metadata !"fpexcept.strict") + ret double %1 +} +declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_h_w_demanded_bits(i32 signext %0, half* %1) { +; RV32IZFH-LABEL: fcvt_h_w_demanded_bits: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi a0, a0, 1 +; RV32IZFH-NEXT: fcvt.h.w ft0, a0 +; RV32IZFH-NEXT: fsh ft0, 0(a1) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w_demanded_bits: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addiw a0, a0, 1 +; RV64IZFH-NEXT: fcvt.h.w ft0, a0 +; RV64IZFH-NEXT: fsh ft0, 0(a1) +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w_demanded_bits: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi a0, a0, 1 +; RV32IDZFH-NEXT: fcvt.h.w ft0, a0 +; RV32IDZFH-NEXT: fsh ft0, 0(a1) +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w_demanded_bits: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: addiw a0, a0, 1 +; RV64IDZFH-NEXT: fcvt.h.w ft0, a0 +; RV64IDZFH-NEXT: fsh ft0, 0(a1) +; RV64IDZFH-NEXT: ret + %3 = add i32 %0, 1 + %4 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store half %4, half* %1, align 2 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_h_wu_demanded_bits(i32 signext %0, half* %1) { +; RV32IZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi a0, a0, 1 +; RV32IZFH-NEXT: fcvt.h.wu ft0, a0 +; RV32IZFH-NEXT: fsh ft0, 0(a1) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addiw a0, a0, 1 +; RV64IZFH-NEXT: fcvt.h.wu ft0, a0 +; RV64IZFH-NEXT: fsh ft0, 0(a1) +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi a0, a0, 1 +; RV32IDZFH-NEXT: fcvt.h.wu ft0, a0 +; RV32IDZFH-NEXT: fsh ft0, 0(a1) +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: addiw a0, a0, 1 +; RV64IDZFH-NEXT: fcvt.h.wu ft0, a0 +; RV64IDZFH-NEXT: fsh ft0, 0(a1) +; RV64IDZFH-NEXT: ret + %3 = add i32 %0, 1 + %4 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store half %4, half* %1, align 2 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64ID + +; This file exhaustively checks double<->i32 conversions. In general, +; fcvt.l[u].d can be selected instead of fcvt.w[u].d because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: aext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) + +define signext i32 @sext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: sext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: zext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: slli a0, a0, 32 +; RV64ID-NEXT: srli a0, a0, 32 +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: aext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +define signext i32 @sext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: sext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: zext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define double @uitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_aext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata, metadata) + +define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_sext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_zext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @sitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_aext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata, metadata) + +define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_sext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @sitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_zext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64IF + +; This file exhaustively checks float<->i32 conversions. In general, +; fcvt.l[u].s can be selected instead of fcvt.w[u].s because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: aext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) + +define signext i32 @sext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: sext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: zext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: slli a0, a0, 32 +; RV64IF-NEXT: srli a0, a0, 32 +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: aext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +define signext i32 @sext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: sext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: zext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define float @uitofp_aext_i32_to_f32(i32 %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_aext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata, metadata) + +define float @uitofp_sext_i32_to_f32(i32 signext %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_sext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @uitofp_zext_i32_to_f32(i32 zeroext %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_zext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @sitofp_aext_i32_to_f32(i32 %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_aext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) + +define float @sitofp_sext_i32_to_f32(i32 signext %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_sext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @sitofp_zext_i32_to_f32(i32 zeroext %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_zext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s | \ +; RUN: FileCheck %s -check-prefix=RV64IZFH + +; This file exhaustively checks half<->i32 conversions. In general, +; fcvt.l[u].h can be selected instead of fcvt.w[u].h because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) + +define signext i32 @sext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: slli a0, a0, 32 +; RV64IZFH-NEXT: srli a0, a0, 32 +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) + +define signext i32 @sext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define half @uitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: uitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata, metadata) + +define half @uitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: uitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @uitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: uitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: sitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata, metadata) + +define half @sitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: sitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: sitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +}