diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13462,6 +13462,33 @@ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) return N0.getOperand(0); + // fold (fp_round ({u,s}int_to_fp x)) -> ({u,s}int_to_fp x) + // but only when the ({u,s}int_to_fp x) remains precise + if (N0.getOpcode() == ISD::SINT_TO_FP || N0.getOpcode() == ISD::UINT_TO_FP) { + SDValue IntNode = N0->getOperand(0); + EVT IntTy = IntNode.getValueType(); + + bool IsIntSigned = N0.getOpcode() == ISD::SINT_TO_FP; + unsigned IntSize = (int)IntTy.getScalarSizeInBits() - IsIntSigned; + const fltSemantics &IntrSem = DAG.EVTToAPFloatSemantics(N0.getValueType()); + + // The intuition behind this check is that the original DAG took an integer, + // and converted it to the resulting float, but via a more precise float. + // After the transform, the integer is converted directly to the resulting + // float, without the intermediate precise float. + // + // Because the intermediate float is rounded to the resulting float, we know + // the resulting float is less precise than the intermediate float. + // Therefore, the relative precision of the int to the resulting float does + // not matter as long as we can fully represent the int in the intermediate + // float value. This avoids double-rounding issues. + if (APFloat::semanticsPrecision(IntrSem) >= IntSize && + (!LegalTypes || hasOperation(N0.getOpcode(), IntTy))) { + SDLoc DL(N); + return DAG.getNode(N0.getOpcode(), DL, VT, IntNode); + } + } + // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -392,12 +392,6 @@ // [u]int->float. Match GCC and default to using dynamic rounding mode. def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>; def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>; - -// [u]int->double->float -def : Pat<(fpround (f64 (sint_to_fp GPR:$rs1))), - (FCVT_S_W GPR:$rs1, 0b111)>; -def : Pat<(fpround (f64 (uint_to_fp GPR:$rs1))), - (FCVT_S_WU GPR:$rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV32] let Predicates = [HasStdExtF, IsRV64] in { diff --git a/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll b/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll --- a/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll +++ b/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll @@ -11,19 +11,15 @@ ;; These testcases check that we merge sequences of `fcvt.d.wu; fcvt.s.d` into ;; `fcvt.s.wu`. ;; -;; TODO: Unfortunately, though this only uses 32-bit FP instructions, we cannot -;; do this optimisation without the D extension as we need 64-bit FP values to -;; be legal to get the right operands to match. +;; These folds are actually implemented in the DAGCombiner, because otherwise +;; without the D extension the intermediate `double` will be legalised away and +;; the conversions will be turned into libcalls. define float @fcvt_s_w_via_d(i32 %a) nounwind { ; RV32IF-LABEL: fcvt_s_w_via_d: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) -; RV32IF-NEXT: call __floatsidf -; RV32IF-NEXT: call __truncdfsf2 -; RV32IF-NEXT: lw ra, 12(sp) -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV32IFD-LABEL: fcvt_s_w_via_d: @@ -34,13 +30,8 @@ ; ; RV64IF-LABEL: fcvt_s_w_via_d: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: sd ra, 8(sp) -; RV64IF-NEXT: sext.w a0, a0 -; RV64IF-NEXT: call __floatsidf -; RV64IF-NEXT: call __truncdfsf2 -; RV64IF-NEXT: ld ra, 8(sp) -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret ; ; RV64IFD-LABEL: fcvt_s_w_via_d: @@ -56,12 +47,8 @@ define float @fcvt_s_wu_via_d(i32 %a) nounwind { ; RV32IF-LABEL: fcvt_s_wu_via_d: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) -; RV32IF-NEXT: call __floatunsidf -; RV32IF-NEXT: call __truncdfsf2 -; RV32IF-NEXT: lw ra, 12(sp) -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; ; RV32IFD-LABEL: fcvt_s_wu_via_d: @@ -72,14 +59,8 @@ ; ; RV64IF-LABEL: fcvt_s_wu_via_d: ; RV64IF: # %bb.0: -; RV64IF-NEXT: addi sp, sp, -16 -; RV64IF-NEXT: sd ra, 8(sp) -; RV64IF-NEXT: slli a0, a0, 32 -; RV64IF-NEXT: srli a0, a0, 32 -; RV64IF-NEXT: call __floatunsidf -; RV64IF-NEXT: call __truncdfsf2 -; RV64IF-NEXT: ld ra, 8(sp) -; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret ; ; RV64IFD-LABEL: fcvt_s_wu_via_d: