diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -84,6 +84,10 @@ FMV_X_ANYEXTH, FMV_W_X_RV64, FMV_X_ANYEXTW_RV64, + // FP to 32 bit int conversions for RV64. These are used to keep track of the + // result being sign extended to 64 bit. + FCVT_W_RV64, + FCVT_WU_RV64, // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target // (returns (Lo, Hi)). It takes a chain operand. READ_CYCLE_WIDE, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -849,6 +849,7 @@ setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget.hasStdExtV()) { setTargetDAGCombine(ISD::FCOPYSIGN); setTargetDAGCombine(ISD::MGATHER); @@ -4765,20 +4766,30 @@ case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { - bool IsStrict = N->isStrictFPOpcode(); assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); + bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); // If the FP type needs to be softened, emit a library call using the 'si' // version. If we left it to default legalization we'd end up with 'di'. If // the FP type doesn't need to be softened just let generic type // legalization promote the result type. if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != - TargetLowering::TypeSoftenFloat) + TargetLowering::TypeSoftenFloat) { + // FIXME: Support strict FP. + if (IsStrict) + return; + if (!isTypeLegal(Op0.getValueType())) + return; + unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64; + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; + } RTLIB::Libcall LC; - if (N->getOpcode() == ISD::FP_TO_SINT || - N->getOpcode() == ISD::STRICT_FP_TO_SINT) + if (IsSigned) LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0)); else LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0)); @@ -5962,6 +5973,16 @@ return performXORCombine(N, DCI, Subtarget); case ISD::ANY_EXTEND: return performANY_EXTENDCombine(N, DCI, Subtarget); + case ISD::ZERO_EXTEND: + // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during + // type legalization. This is safe because fp_to_uint produces poison if + // it overflows. + if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() && + N->getOperand(0).getOpcode() == ISD::FP_TO_UINT && + isTypeLegal(N->getOperand(0).getOperand(0).getValueType())) + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, + N->getOperand(0).getOperand(0)); + return SDValue(); case RISCVISD::SELECT_CC: { // Transform SDValue LHS = N->getOperand(0); @@ -6490,6 +6511,8 @@ case RISCVISD::UNSHFLW: case RISCVISD::BCOMPRESSW: case RISCVISD::BDECOMPRESSW: + case RISCVISD::FCVT_W_RV64: + case RISCVISD::FCVT_WU_RV64: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -8201,6 +8224,8 @@ NODE_NAME_CASE(FMV_X_ANYEXTH) NODE_NAME_CASE(FMV_W_X_RV64) NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) + NODE_NAME_CASE(FCVT_W_RV64) + NODE_NAME_CASE(FCVT_WU_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) NODE_NAME_CASE(GREV) NODE_NAME_CASE(GREVW) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -351,12 +351,11 @@ def : Pat<(bitconvert (i64 GPR:$rs1)), (FMV_D_X GPR:$rs1)>; def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>; -// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe -// because fpto[u|s]i produce poison if the value can't fit into the target. -// We match the single case below because fcvt.wu.d sign-extends its result so -// is cheaper than fcvt.lu.d+sext.w. -def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR64:$rs1)), i32), - (FCVT_WU_D $rs1, 0b001)>; +// Use target specific isd nodes to help us remember the result is sign +// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be +// duplicated if it has another user that didn't need the sign_extend. +def : Pat<(riscv_fcvt_w_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; +def : Pat<(riscv_fcvt_wu_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; // [u]int32->fp def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -19,11 +19,17 @@ : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; def SDT_RISCVFMV_X_ANYEXTW_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; +def STD_RISCVFCVT_W_RV64 + : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>; def riscv_fmv_w_x_rv64 : SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>; def riscv_fmv_x_anyextw_rv64 : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>; +def riscv_fcvt_w_rv64 + : SDNode<"RISCVISD::FCVT_W_RV64", STD_RISCVFCVT_W_RV64>; +def riscv_fcvt_wu_rv64 + : SDNode<"RISCVISD::FCVT_WU_RV64", STD_RISCVFCVT_W_RV64>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -391,12 +397,11 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32), (FMV_X_W FPR32:$src)>; -// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe -// because fpto[u|s]i produces poison if the value can't fit into the target. -// We match the single case below because fcvt.wu.s sign-extends its result so -// is cheaper than fcvt.lu.s+sext.w. -def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR32:$rs1)), i32), - (FCVT_WU_S $rs1, 0b001)>; +// Use target specific isd nodes to help us remember the result is sign +// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be +// duplicated if it has another user that didn't need the sign_extend. +def : Pat<(riscv_fcvt_w_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(riscv_fcvt_wu_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; // float->[u]int64. Round-to-zero must be used. def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -350,12 +350,11 @@ } // Predicates = [HasStdExtZfh, IsRV32] let Predicates = [HasStdExtZfh, IsRV64] in { -// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe -// because fpto[u|s]i produces poison if the value can't fit into the target. -// We match the single case below because fcvt.wu.s sign-extends its result so -// is cheaper than fcvt.lu.h+sext.w. -def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR16:$rs1)), i32), - (FCVT_WU_H $rs1, 0b001)>; +// Use target specific isd nodes to help us remember the result is sign +// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be +// duplicated if it has another user that didn't need the sign_extend. +def : Pat<(riscv_fcvt_w_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(riscv_fcvt_wu_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; // half->[u]int64. Round-to-zero must be used. def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -64,7 +64,7 @@ ; RV64IFD-LABEL: fcvt_w_d: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz ; RV64IFD-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 @@ -133,7 +133,7 @@ ; RV64IFD-LABEL: fcvt_wu_d: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz ; RV64IFD-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 @@ -166,7 +166,7 @@ ; RV64IFD-NEXT: addi a0, zero, 1 ; RV64IFD-NEXT: beqz a1, .LBB5_2 ; RV64IFD-NEXT: # %bb.1: -; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: mv a0, a1 ; RV64IFD-NEXT: .LBB5_2: ; RV64IFD-NEXT: ret %a = fptoui double %x to i32 diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -16,7 +16,7 @@ ; RV64IF-LABEL: fcvt_w_s: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz ; RV64IF-NEXT: ret %1 = fptosi float %a to i32 ret i32 %1 @@ -102,7 +102,7 @@ ; RV64IF-LABEL: fcvt_wu_s: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz ; RV64IF-NEXT: ret %1 = fptoui float %a to i32 ret i32 %1 @@ -130,7 +130,7 @@ ; RV64IF-NEXT: addi a0, zero, 1 ; RV64IF-NEXT: beqz a1, .LBB3_2 ; RV64IF-NEXT: # %bb.1: -; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: mv a0, a1 ; RV64IF-NEXT: .LBB3_2: ; RV64IF-NEXT: ret %a = fptoui float %x to i32 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -164,7 +164,7 @@ ; RV64IZFH-NEXT: addi a0, zero, 1 ; RV64IZFH-NEXT: beqz a1, .LBB3_2 ; RV64IZFH-NEXT: # %bb.1: -; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: mv a0, a1 ; RV64IZFH-NEXT: .LBB3_2: ; RV64IZFH-NEXT: ret ; @@ -174,7 +174,7 @@ ; RV64IDZFH-NEXT: addi a0, zero, 1 ; RV64IDZFH-NEXT: beqz a1, .LBB3_2 ; RV64IDZFH-NEXT: # %bb.1: -; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: mv a0, a1 ; RV64IDZFH-NEXT: .LBB3_2: ; RV64IDZFH-NEXT: ret %a = fptoui half %x to i32 @@ -246,12 +246,12 @@ ; ; RV64IZFH-LABEL: fcvt_w_h: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz ; RV64IZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_w_h: ; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret %1 = fptosi half %a to i32 ret i32 %1 @@ -400,12 +400,12 @@ ; ; RV64IZFH-LABEL: fcvt_wu_h: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_h: ; RV64IDZFH: # %bb.0: -; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IDZFH-NEXT: ret %1 = fptoui half %a to i32 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll --- a/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll @@ -11,7 +11,7 @@ ; RV64ID-LABEL: aext_fptosi: ; RV64ID: # %bb.0: ; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.l.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz ; RV64ID-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 @@ -21,7 +21,7 @@ ; RV64ID-LABEL: sext_fptosi: ; RV64ID: # %bb.0: ; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.l.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz ; RV64ID-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 @@ -31,7 +31,7 @@ ; RV64ID-LABEL: zext_fptosi: ; RV64ID: # %bb.0: ; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.l.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 ; RV64ID-NEXT: ret @@ -43,7 +43,7 @@ ; RV64ID-LABEL: aext_fptoui: ; RV64ID: # %bb.0: ; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz ; RV64ID-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll b/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll --- a/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64f-float-convert.ll @@ -11,7 +11,7 @@ ; RV64IF-LABEL: aext_fptosi: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz ; RV64IF-NEXT: ret %1 = fptosi float %a to i32 ret i32 %1 @@ -21,7 +21,7 @@ ; RV64IF-LABEL: sext_fptosi: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz ; RV64IF-NEXT: ret %1 = fptosi float %a to i32 ret i32 %1 @@ -31,7 +31,7 @@ ; RV64IF-LABEL: zext_fptosi: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz ; RV64IF-NEXT: slli a0, a0, 32 ; RV64IF-NEXT: srli a0, a0, 32 ; RV64IF-NEXT: ret @@ -43,7 +43,7 @@ ; RV64IF-LABEL: aext_fptoui: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz ; RV64IF-NEXT: ret %1 = fptoui float %a to i32 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll --- a/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll @@ -10,7 +10,7 @@ define i32 @aext_fptosi(half %a) nounwind { ; RV64IZFH-LABEL: aext_fptosi: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz ; RV64IZFH-NEXT: ret %1 = fptosi half %a to i32 ret i32 %1 @@ -19,7 +19,7 @@ define signext i32 @sext_fptosi(half %a) nounwind { ; RV64IZFH-LABEL: sext_fptosi: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz ; RV64IZFH-NEXT: ret %1 = fptosi half %a to i32 ret i32 %1 @@ -28,7 +28,7 @@ define zeroext i32 @zext_fptosi(half %a) nounwind { ; RV64IZFH-LABEL: zext_fptosi: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz ; RV64IZFH-NEXT: slli a0, a0, 32 ; RV64IZFH-NEXT: srli a0, a0, 32 ; RV64IZFH-NEXT: ret @@ -39,7 +39,7 @@ define i32 @aext_fptoui(half %a) nounwind { ; RV64IZFH-LABEL: aext_fptoui: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz ; RV64IZFH-NEXT: ret %1 = fptoui half %a to i32 ret i32 %1